1
mirror of https://github.com/jakejarvis/jarv.is.git synced 2025-04-26 09:05:22 -04:00

block even more AI bullsh*t

This commit is contained in:
Jake Jarvis 2024-06-23 13:23:15 -04:00
parent 19fb5da2d9
commit 00165a5871
Signed by: jake
SSH Key Fingerprint: SHA256:nCkvAjYA6XaSPUqc4TfbBQTpzr8Xj7ritg/sGInCdkc

View File

@ -6,31 +6,43 @@ export const getServerSideProps: GetServerSideProps<Record<string, never>> = asy
// this production check should be unnecessary because "noindex" and "nofollow" are also set in a meta tag (see
// DefaultSeo's props in pages/_app.tsx), but it doesn't hurt...
const robots = `User-agent: *
${process.env.NEXT_PUBLIC_VERCEL_ENV !== "production" ? `Disallow: /` : `Allow: /`}
${
process.env.NEXT_PUBLIC_VERCEL_ENV !== "production"
? `Disallow: /`
: `Allow: /
# Block CommonCrawl
User-agent: CCBot
Disallow: /
# Block Google Bard
User-agent: Google-Extended
Disallow: /
# Block OpenAI & ChatGPT
User-agent: GPTBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
# Block Anthropic AI
# I'm already _so_ over this shit...
# https://github.com/ai-robots-txt/ai.robots.txt/blob/main/robots.txt
User-agent: AdsBot-Google
User-agent: Amazonbot
User-agent: anthropic-ai
Disallow: /
User-agent: ClaudeBot
Disallow: /
# Block ByteDance
User-agent: Applebot-Extended
User-agent: Bytespider
Disallow: /
User-agent: CCBot
User-agent: ChatGPT-User
User-agent: Claude-Web
User-agent: ClaudeBot
User-agent: cohere-ai
User-agent: Diffbot
User-agent: FacebookBot
User-agent: FriendlyCrawler
User-agent: Google-Extended
User-agent: GPTBot
User-agent: img2dataset
User-agent: omgili
User-agent: omgilibot
User-agent: peer39_crawler
User-agent: peer39_crawler/1.0
User-agent: PerplexityBot
User-agent: YouBot
User-agent: AhrefsBot
User-agent: BLEXBot
User-agent: DataForSeoBot
User-agent: magpie-crawler
User-agent: MJ12bot
User-agent: TurnitinBot
Disallow: /`
}
Sitemap: ${process.env.NEXT_PUBLIC_BASE_URL || ""}/sitemap.xml
`;