User-agent: * Disallow: /a/downloads/-/* Disallow: /admin Disallow: /cart Disallow: /orders Disallow: /checkouts/ Disallow: /checkout Disallow: /25881542733/checkouts Disallow: /25881542733/orders Disallow: /carts Disallow: /account Disallow: /collections/*sort_by* Disallow: /*/collections/*sort_by* Disallow: /collections/*+* Disallow: /collections/*%2B* Disallow: /collections/*%2b* Disallow: /*/collections/*+* Disallow: /*/collections/*%2B* Disallow: /*/collections/*%2b* Disallow: */collections/*filter*&*filter* Disallow: /blogs/*+* Disallow: /blogs/*%2B* Disallow: /blogs/*%2b* Disallow: /*/blogs/*+* Disallow: /*/blogs/*%2B* Disallow: /*/blogs/*%2b* Disallow: /*?*oseid=* Disallow: /*preview_theme_id* Disallow: /*preview_script_id* Disallow: /policies/ Disallow: /*/policies/ Disallow: /*/*?*ls=*&ls=* Disallow: /*/*?*ls%3D*%3Fls%3D* Disallow: /*/*?*ls%3d*%3fls%3d* Disallow: /search Disallow: /apple-app-site-association Disallow: /.well-known/shopify/monorail Disallow: /cdn/wpm/*.js Disallow: /recommendations/products Disallow: /*/recommendations/products Disallow: /products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote # ═══════════════════════════════════════════════════════════════════════════════ # CUSTOM CRAWL RULES — applied to all standard crawlers (including Googlebot) # ═══════════════════════════════════════════════════════════════════════════════ # ── Faceted / thin collection pages (root domain) ──────────────────────────── # Prevents crawl of filter-state, constraint, and alternate-view URLs # that produce thin duplicate content and drain crawl budget. Disallow: /collections/all* Disallow: /collections/vendors?q=* Disallow: /collections/types?q=* Disallow: /collections/*?*constraint* Disallow: /collections/*?*filter* Disallow: /collections/*?*pf_* Disallow: /collections/*?*grid_list* Disallow: /collections/*?*view=* # ── Faceted / thin collection pages (Shopify Market subfolders) ────────────── # Mirrors every root rule for /en-gb/, /fr/, /de/, /ja/ etc. # The /* prefix matches any single-segment subfolder. Disallow: /*/collections/all* Disallow: /*/collections/vendors?q=* Disallow: /*/collections/types?q=* Disallow: /*/collections/*?*constraint* Disallow: /*/collections/*?*filter* Disallow: /*/collections/*?*pf_* Disallow: /*/collections/*?*grid_list* Disallow: /*/collections/*?*view=* # ── Sorting parameters (all paths including subfolders) ────────────────────── Disallow: /*?*sort_by=* # ── Pagination parameters ──────────────────────────────────────────────────── # Collection/blog pagination creates thin index-bloat pages. # Product discovery is handled via sitemap — no crawl-path loss. Disallow: /*?*page=* # ── Site search results (root + market subfolders) ─────────────────────────── Disallow: /search Disallow: /*/search # ── Tracking & noise query parameters ──────────────────────────────────────── # Prevents crawl of URLs polluted by ad click IDs, UTM tags, and # Google's own auto-appended parameters (srsltid, gbraid, wbraid). Disallow: /*?*utm_* Disallow: /*?*fbclid=* Disallow: /*?*gclid=* Disallow: /*?*gbraid=* Disallow: /*?*wbraid=* Disallow: /*?*srsltid=* Disallow: /*?*preview_theme_id=* # ── Shopify section rendering endpoints ────────────────────────────────────── # ?section_id= URLs are AJAX fragment endpoints used by Shopify's # Section Rendering API — zero content value for indexing. Disallow: /*?*section_id=* Sitemap: https://sendegaro.com/sitemap.xml User-agent: adsbot-google Disallow: /checkouts/ Disallow: /checkout Disallow: /carts Disallow: /orders Disallow: /25881542733/checkouts Disallow: /25881542733/orders Disallow: /*?*oseid=* Disallow: /*preview_theme_id* Disallow: /*preview_script_id* Disallow: /cdn/wpm/*.js Disallow: /products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote User-agent: Nutch Disallow: / User-agent: AhrefsBot Crawl-delay: 10 Disallow: /a/downloads/-/* Disallow: /admin Disallow: /cart Disallow: /orders Disallow: /checkouts/ Disallow: /checkout Disallow: /25881542733/checkouts Disallow: /25881542733/orders Disallow: /carts Disallow: /account Disallow: /collections/*sort_by* Disallow: /*/collections/*sort_by* Disallow: /collections/*+* Disallow: /collections/*%2B* Disallow: /collections/*%2b* Disallow: /*/collections/*+* Disallow: /*/collections/*%2B* Disallow: /*/collections/*%2b* Disallow: */collections/*filter*&*filter* Disallow: /blogs/*+* Disallow: /blogs/*%2B* Disallow: /blogs/*%2b* Disallow: /*/blogs/*+* Disallow: /*/blogs/*%2B* Disallow: /*/blogs/*%2b* Disallow: /*?*oseid=* Disallow: /*preview_theme_id* Disallow: /*preview_script_id* Disallow: /policies/ Disallow: /*/policies/ Disallow: /*/*?*ls=*&ls=* Disallow: /*/*?*ls%3D*%3Fls%3D* Disallow: /*/*?*ls%3d*%3fls%3d* Disallow: /search Disallow: /apple-app-site-association Disallow: /.well-known/shopify/monorail Disallow: /cdn/wpm/*.js Disallow: /products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Sitemap: https://sendegaro.com/sitemap.xml User-agent: AhrefsSiteAudit Crawl-delay: 10 Disallow: /a/downloads/-/* Disallow: /admin Disallow: /cart Disallow: /orders Disallow: /checkouts/ Disallow: /checkout Disallow: /25881542733/checkouts Disallow: /25881542733/orders Disallow: /carts Disallow: /account Disallow: /collections/*sort_by* Disallow: /*/collections/*sort_by* Disallow: /collections/*+* Disallow: /collections/*%2B* Disallow: /collections/*%2b* Disallow: /*/collections/*+* Disallow: /*/collections/*%2B* Disallow: /*/collections/*%2b* Disallow: */collections/*filter*&*filter* Disallow: /blogs/*+* Disallow: /blogs/*%2B* Disallow: /blogs/*%2b* Disallow: /*/blogs/*+* Disallow: /*/blogs/*%2B* Disallow: /*/blogs/*%2b* Disallow: /*?*oseid=* Disallow: /*preview_theme_id* Disallow: /*preview_script_id* Disallow: /policies/ Disallow: /*/policies/ Disallow: /*/*?*ls=*&ls=* Disallow: /*/*?*ls%3D*%3Fls%3D* Disallow: /*/*?*ls%3d*%3fls%3d* Disallow: /search Disallow: /apple-app-site-association Disallow: /.well-known/shopify/monorail Disallow: /cdn/wpm/*.js Disallow: /products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Disallow: /*/collections/*/products/*-[a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9][a-f0-9]-remote Sitemap: https://sendegaro.com/sitemap.xml User-agent: MJ12bot Crawl-delay: 10 User-agent: Pinterest Crawl-delay: 1 # ═══════════════════════════════════════════════════════════════════════════════ # GOOGLE ADS & MERCHANT CENTER CRAWLERS # ═══════════════════════════════════════════════════════════════════════════════ # # AdsBot crawlers do NOT inherit from User-agent: * — they must be named # explicitly or they default to "no rules" (crawl everything). # Being explicit here ensures GMC quality/policy checks always pass, # even if the wildcard group changes in future. # # AdsBot-Google → Desktop landing page quality checks (GMC + Google Ads) # AdsBot-Google-Mobile → Mobile landing page quality checks # Mediapartners-Google → AdSense content matching (future-proofing) User-agent: AdsBot-Google Allow: / User-agent: AdsBot-Google-Mobile Allow: / User-agent: Mediapartners-Google Allow: / # ═══════════════════════════════════════════════════════════════════════════════ # AI / LLM CRAWLERS — full access for maximum AI search & training visibility # ═══════════════════════════════════════════════════════════════════════════════ # # Strategy: Sendegaro benefits from being in AI training data and AI search # results. Every product page, collection page, and brand page that AI models # can read = higher chance of being surfaced when users ask AI assistants # "where to buy Dolce & Gabbana cheap" or "best luxury outlet online". # # LLMs.txt (llmstxt.org specification): # /llms.txt → concise AI-readable store index # /llms-full.txt → complete product catalog with variants, SKUs, and GTINs # # These crawlers are declared explicitly because some do not reliably # fall back to User-agent: * depending on implementation. # ── OpenAI ─────────────────────────────────────────────────────────────────── User-agent: GPTBot Allow: / User-agent: OAI-SearchBot Allow: / User-agent: ChatGPT-User Allow: / # ── Anthropic ──────────────────────────────────────────────────────────────── User-agent: ClaudeBot Allow: / # ── Google AI (Gemini training/grounding — does NOT affect Search ranking) ─── User-agent: Google-Extended Allow: / # ── Perplexity AI Search ───────────────────────────────────────────────────── User-agent: PerplexityBot Allow: / # ── Apple (Siri, Apple Intelligence, Spotlight) ────────────────────────────── User-agent: Applebot-Extended Allow: / # ── Common Crawl (feeds many AI training datasets) ─────────────────────────── User-agent: CCBot Allow: / # ── Meta AI ────────────────────────────────────────────────────────────────── User-agent: FacebookBot Allow: / # ── Cohere AI ──────────────────────────────────────────────────────────────── User-agent: cohere-ai Allow: / # ═══════════════════════════════════════════════════════════════════════════════ # BLOCKED CHINESE CRAWLERS — Sendegaro does not sell to China # ═══════════════════════════════════════════════════════════════════════════════ User-agent: Baiduspider Disallow: / User-agent: Sogou web spider Disallow: / User-agent: YisouSpider Disallow: / User-agent: Bytespider Disallow: / User-agent: 360Spider Disallow: / User-agent: Toutiaospider Disallow: / User-agent: PetalBot Disallow: / # ═══════════════════════════════════════════════════════════════════════════════ # BLOCKED SEO AUDIT CRAWLERS # ═══════════════════════════════════════════════════════════════════════════════ # # These bots consume server resources without benefiting Sendegaro directly. # NOTE: To run a site audit via Ahrefs/Semrush, temporarily comment out # the relevant block, run the crawl, then re-enable. User-agent: AhrefsBot Disallow: / User-agent: AhrefsSiteAudit Disallow: / User-agent: SemrushBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: Rogerbot Disallow: / User-agent: MauiBot Disallow: /