######################## # Block AI / LLM / data-scraping bots ######################## # OpenAI / ChatGPT User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: GPT-User Disallow: / User-agent: OAI-SearchBot Disallow: / # Anthropic / Claude User-agent: ClaudeBot Disallow: / User-agent: Claude-Web Disallow: / User-agent: Claude-User Disallow: / User-agent: Claude-SearchBot Disallow: / # Perplexity User-agent: PerplexityBot Disallow: / User-agent: Perplexity-User Disallow: / # Common Crawl (often used for AI training) User-agent: CCBot Disallow: / # Other AI-related crawlers User-agent: anthropic-ai Disallow: / User-agent: cohere-ai Disallow: / User-agent: Diffbot Disallow: / User-agent: Omigili Disallow: / User-agent: OmigiliBot Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: Bytespider Disallow: / User-agent: TikTokSpider Disallow: / User-agent: FacebookBot Disallow: / User-agent: Meta-ExternalAgent Disallow: / # Google / Apple AI extensions (keep normal search crawlers allowed by default rules) User-agent: Google-Extended Disallow: / User-agent: GoogleOther-Batch Disallow: / User-agent: Google-CloudVertexBot Disallow: / User-agent: Applebot-Extended Disallow: / ######################## # Block SEO / marketing / analytics crawlers ######################## # Semrush User-agent: SemrushBot Disallow: / User-agent: SemrushBot-SA Disallow: / # Majestic User-agent: MJ12bot Disallow: / # Moz User-agent: Rogerbot Disallow: / # Sitebulb User-agent: Sitebulb Crawler Disallow: / # DeepCrawl / Lumar User-agent: Lumar Disallow: / User-agent: DeepCrawl Disallow: / # Other SEO platforms User-agent: CognitiveSEO Site Explorer Disallow: / User-agent: OnCrawl Disallow: / User-agent: Botify Disallow: / User-agent: JetOctopus Disallow: / User-agent: Netpeak Spider Disallow: / User-agent: ContentKing Disallow: / # Generic SEO / link crawlers User-agent: BLEXBot Disallow: / User-agent: DotBot Disallow: / User-agent: spbot Disallow: / User-agent: Spbot Disallow: / User-agent: SEOkicks-Robot Disallow: / User-agent: XoviBot Disallow: / User-agent: MegaIndex.ru Disallow: / User-agent: PetalBot Disallow: / User-agent: HaosouSpider Disallow: / User-agent: 360Spider Disallow: / User-agent: Exabot Disallow: / User-agent: Swiftbot Disallow: / User-agent: seoscanners.net bot Disallow: / User-agent: SEO Scanner Disallow: / User-agent: SearchAtlas.com SEO Crawler Disallow: / User-agent: wotbox Disallow: / User-agent: Barkrowler Disallow: / ######################## # Block archive / backup crawlers ######################## User-agent: archive.org_bot Disallow: / ######################## # Default rules for all remaining user-agents ######################## User-agent: * # Pagination and sorting parameters Disallow: /*?page= Disallow: /*&page= Disallow: /*?sort= Disallow: /*&sort= Disallow: /*?order= Disallow: /*&order= Disallow: /*?limit= Disallow: /*&limit= Disallow: /*?filter_ Disallow: /*&filter_ # Technical directories (not meant for indexing) Disallow: /admin/ Disallow: /system/ Disallow: /storage/ Disallow: /catalog/controller/ Disallow: /catalog/model/ Disallow: /catalog/view/ Disallow: /index.php?route=checkout/ Disallow: /index.php?route=account/