From 99ec01de0ad5b367818492e933b64002129eaa88 Mon Sep 17 00:00:00 2001 From: Jake Jarvis Date: Sat, 15 Mar 2025 21:39:52 -0400 Subject: [PATCH] let vercel firewall block AI crawler bots --- app/robots.ts | 84 ++++++--------------------------------------------- 1 file changed, 10 insertions(+), 74 deletions(-) diff --git a/app/robots.ts b/app/robots.ts index d9c6c134..4834d00c 100644 --- a/app/robots.ts +++ b/app/robots.ts @@ -3,79 +3,15 @@ import type { MetadataRoute } from "next"; export const dynamic = "force-static"; -const robots = (): MetadataRoute.Robots => { - // I'm already _so_ over this shit... - // https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.txt - // TODO: dynamically fetch this list from the above repo. - const naughtySpiders = [ - "AI2Bot", - "Ai2Bot-Dolma", - "Amazonbot", - "anthropic-ai", - "Applebot", - "Applebot-Extended", - "Brightbot 1.0", - "Bytespider", - "CCBot", - "ChatGPT-User", - "Claude-Web", - "ClaudeBot", - "cohere-ai", - "cohere-training-data-crawler", - "Crawlspace", - "Diffbot", - "DuckAssistBot", - "FacebookBot", - "FriendlyCrawler", - "Google-Extended", - "GoogleOther", - "GoogleOther-Image", - "GoogleOther-Video", - "GPTBot", - "iaskspider/2.0", - "ICC-Crawler", - "ImagesiftBot", - "img2dataset", - "ISSCyberRiskCrawler", - "Kangaroo Bot", - "Meta-ExternalAgent", - "Meta-ExternalFetcher", - "OAI-SearchBot", - "omgili", - "omgilibot", - "PanguBot", - "PerplexityBot", - "PetalBot", - "Scrapy", - "SemrushBot-OCOB", - "SemrushBot-SWA", - "Sidetrade indexer bot", - "Timpibot", - "VelenPublicWebCrawler", - "Webzio-Extended", - "YouBot", - "AhrefsBot", - "BLEXBot", - "DataForSeoBot", - "magpie-crawler", - "MJ12bot", - "TurnitinBot", - ]; - - return { - rules: [ - { - userAgent: "*", - // block access to staging sites - [process.env.NEXT_PUBLIC_VERCEL_ENV === "production" ? "allow" : "disallow"]: "/", - }, - { - userAgent: naughtySpiders, - disallow: "/", - }, - ], - sitemap: `${BASE_URL}/sitemap.xml`, - }; -}; +const robots = (): MetadataRoute.Robots => ({ + rules: [ + { + userAgent: "*", + // block access to staging sites + [process.env.NEXT_PUBLIC_VERCEL_ENV === "production" ? "allow" : "disallow"]: "/", + }, + ], + sitemap: `${BASE_URL}/sitemap.xml`, +}); export default robots;