1
mirror of https://github.com/jakejarvis/jarv.is.git synced 2025-04-26 17:48:30 -04:00

let vercel firewall block AI crawler bots

This commit is contained in:
Jake Jarvis 2025-03-15 21:39:52 -04:00
parent 7c4144a1e7
commit 99ec01de0a
Signed by: jake
SSH Key Fingerprint: SHA256:nCkvAjYA6XaSPUqc4TfbBQTpzr8Xj7ritg/sGInCdkc

View File

@ -3,79 +3,15 @@ import type { MetadataRoute } from "next";
export const dynamic = "force-static"; export const dynamic = "force-static";
const robots = (): MetadataRoute.Robots => { const robots = (): MetadataRoute.Robots => ({
// I'm already _so_ over this shit... rules: [
// https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.txt {
// TODO: dynamically fetch this list from the above repo. userAgent: "*",
const naughtySpiders = [ // block access to staging sites
"AI2Bot", [process.env.NEXT_PUBLIC_VERCEL_ENV === "production" ? "allow" : "disallow"]: "/",
"Ai2Bot-Dolma", },
"Amazonbot", ],
"anthropic-ai", sitemap: `${BASE_URL}/sitemap.xml`,
"Applebot", });
"Applebot-Extended",
"Brightbot 1.0",
"Bytespider",
"CCBot",
"ChatGPT-User",
"Claude-Web",
"ClaudeBot",
"cohere-ai",
"cohere-training-data-crawler",
"Crawlspace",
"Diffbot",
"DuckAssistBot",
"FacebookBot",
"FriendlyCrawler",
"Google-Extended",
"GoogleOther",
"GoogleOther-Image",
"GoogleOther-Video",
"GPTBot",
"iaskspider/2.0",
"ICC-Crawler",
"ImagesiftBot",
"img2dataset",
"ISSCyberRiskCrawler",
"Kangaroo Bot",
"Meta-ExternalAgent",
"Meta-ExternalFetcher",
"OAI-SearchBot",
"omgili",
"omgilibot",
"PanguBot",
"PerplexityBot",
"PetalBot",
"Scrapy",
"SemrushBot-OCOB",
"SemrushBot-SWA",
"Sidetrade indexer bot",
"Timpibot",
"VelenPublicWebCrawler",
"Webzio-Extended",
"YouBot",
"AhrefsBot",
"BLEXBot",
"DataForSeoBot",
"magpie-crawler",
"MJ12bot",
"TurnitinBot",
];
return {
rules: [
{
userAgent: "*",
// block access to staging sites
[process.env.NEXT_PUBLIC_VERCEL_ENV === "production" ? "allow" : "disallow"]: "/",
},
{
userAgent: naughtySpiders,
disallow: "/",
},
],
sitemap: `${BASE_URL}/sitemap.xml`,
};
};
export default robots; export default robots;