matrix-org · anoadragon453 · Dec 8, 2020 · Dec 5, 2020 · Dec 6, 2020 · Dec 8, 2020
@@ -0,0 +1 @@
+Add `X-Robots-Tag` header to stop web crawlers from indexing media.
@@ -155,6 +155,11 @@ def _quote(x):
     request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
     request.setHeader(b"Content-Length", b"%d" % (file_size,))
 
+    # Tell web crawlers to not index, archive, or follow links in media. This
+    # should help to prevent things in the media repo from showing up in web
+    # search results.
+    request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")
+
 
 # separators as defined in RFC2616. SP and HT are handled separately.
 # see _can_encode_filename_as_token.

@@ -362,3 +362,16 @@ def _test_thumbnail(self, method, expected_body, expected_found):
                     "error": "Not found [b'example.com', b'12345']",
                 },
             )
+
+    def test_x_robots_tag_header(self):
+        """
+        Tests that the `X-Robots-Tag` header is present, which informs web crawlers
+        to not index, archive, or follow links in media.
+        """
+        channel = self._req(b"inline; filename=out" + self.test_image.extension)
+
+        headers = channel.headers
+        self.assertEqual(
+            headers.getRawHeaders(b"X-Robots-Tag"),
+            [b"noindex, nofollow, noarchive, noimageindex"],
+        )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add `X-Robots-Tag` header to stop web crawlers from indexing media.