From 6cd0d49da793b6e835d22c7a198da50e13d5f9b2 Mon Sep 17 00:00:00 2001 From: Naresh Kumar Babu Date: Fri, 2 Aug 2024 22:26:19 +0530 Subject: [PATCH] Update robots.txt, add ROBOTS_DISALLOWED and extra-robots.txt (#426) * Update robots.txt, add ROBOTS_DISALLOWED and extra-robots.txt * Update robots.txt, add ROBOTS_DISALLOWED and extra-robots.txt * Update robots.txt, add ROBOTS_DISALLOWED and extra-robots.txt * Update robots.txt, add ROBOTS_DISALLOWED and extra-robots.txt * Update robots.txt, add ROBOTS_DISALLOWED and extra-robots.txt --- _sources/configs/robots.php | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/_sources/configs/robots.php b/_sources/configs/robots.php index 7c653120..3efdb9df 100644 --- a/_sources/configs/robots.php +++ b/_sources/configs/robots.php @@ -5,8 +5,15 @@ ini_set( 'display_errors', 0 ); error_reporting( 0 ); +echo "# The following lines are generated by robots.php\n"; + header( 'Content-Type: text/plain' ); +$robotsDisallowed = getenv( 'ROBOTS_DISALLOWED' ); +if ( !empty( $robotsDisallowed ) && in_array( strtolower($robotsDisallowed), [ 'true', '1' ] ) ) { + die( "User-agent: *\nDisallow: /\n" ); +} + $enableSitemapEnv = getenv( 'MW_ENABLE_SITEMAP_GENERATOR'); // match the value check to the isTrue function at _sources/scripts/functions.sh if ( !empty( $enableSitemapEnv ) && in_array( $enableSitemapEnv, [ 'true', 'True', 'TRUE', '1' ] ) ) { @@ -17,11 +24,17 @@ $siteMapUrl = "$server$script/sitemap$subdir/sitemap-index-$identifier.xml"; - echo "# It generated by the robots.php file\n"; - echo "# Add the sitemap url:\n"; echo "Sitemap: $siteMapUrl\n"; - - echo "\n# Content of the robots.txt file:\n"; + echo "\n# Content of the robots.txt file:\n"; } readfile( 'robots-main.txt' ); + +// If the file `extra-robots.txt` is created under the name +// `/var/www/mediawiki/extra-robots.txt` then its contents get appended to the +// default `robots.txt` +if ( is_readable( 'extra-robots.txt' ) ) { + // Extra line to separate the files so that rules don't combine + echo "\n"; + readfile( 'extra-robots.txt' ); +}