From fd34c6c281d29a20b7b84dbb70ddec2b45e8ed9e Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 27 Jan 2025 22:22:04 +0100 Subject: [PATCH] Doc: robots.txt: lame attempt at preventing AI robots to scrap us --- doc/source/extra_path/robots.txt | 81 ++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/doc/source/extra_path/robots.txt b/doc/source/extra_path/robots.txt index 7eba37d690b6..93484b2f1691 100644 --- a/doc/source/extra_path/robots.txt +++ b/doc/source/extra_path/robots.txt @@ -2,3 +2,84 @@ User-agent: * Allow: /en/stable/ Disallow: /en/ Sitemap: https://gdal.org/sitemap.xml + +# Prevent AI scrapping +# Source: https://neil-clarke.com/block-the-bots-that-feed-ai-models-by-scraping-your-website/ + +User-agent: CCBot +Disallow: / + +User-agent: ChatGPT-User +Disallow: / + +User-agent: GPTBot +Disallow: / + +User-agent: Google-Extended +Disallow: / + +User-agent: Google-CloudVertexBot +Disallow: / + +User-agent: Applebot-Extended +Disallow: / + +User-agent: anthropic-ai +Disallow: / + +User-agent: ClaudeBot +Disallow: / + +User-agent: Omgilibot +Disallow: / + +User-agent: Omgili +Disallow: / + +User-agent: FacebookBot +Disallow: / + +User-agent: Diffbot +Disallow: / + +User-agent: DuckAssistBot +Disallow: / + +User-agent: AI2Bot +Disallow: / + +User-agent: Bytespider +Disallow: / + +User-agent: Kangaroo Bot +Disallow: / + +User-agent: PanguBot +Disallow: / + +User-agent: ImagesiftBot +Disallow: / + +User-agent: PerplexityBot +Disallow: / + +User-agent: cohere-ai +Disallow: / + +User-agent: cohere-training-data-crawler +Disallow: / + +User-agent: Meta-ExternalAgent +Disallow: / + +User-agent: Meta-ExternalFetcher +Disallow: / + +User-agent: Timpibot +Disallow: / + +User-agent: Webzio-Extended +Disallow: / + +User-agent: YouBot +Disallow: /