Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add X-Robots-Tag header to stop crawlers from indexing media #8887

Merged
merged 9 commits into from
Dec 8, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/8887.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `X-Robots-Tag` header to stop web crawlers from indexing media.
aaronraimist marked this conversation as resolved.
Show resolved Hide resolved
5 changes: 5 additions & 0 deletions synapse/rest/media/v1/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@ def _quote(x):
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
request.setHeader(b"Content-Length", b"%d" % (file_size,))

# Tell web crawlers to not index, archive, or follow links in media. This
# should help to prevent things in the media repo from showing up in web
# search results.
request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")
aaronraimist marked this conversation as resolved.
Show resolved Hide resolved


# separators as defined in RFC2616. SP and HT are handled separately.
# see _can_encode_filename_as_token.
Expand Down
9 changes: 9 additions & 0 deletions tests/rest/media/v1/test_media_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,3 +362,12 @@ def _test_thumbnail(self, method, expected_body, expected_found):
"error": "Not found [b'example.com', b'12345']",
},
)

def test_x_robots_tag_header(self):
aaronraimist marked this conversation as resolved.
Show resolved Hide resolved
channel = self._req(b"inline; filename=out" + self.test_image.extension)

headers = channel.headers
self.assertEqual(
headers.getRawHeaders(b"X-Robots-Tag"),
[b"noindex, nofollow, noarchive, noimageindex"],
)