From 0f0011ac0151dd4dc28ef39b0e0e3ded8dda918e Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 26 Oct 2022 14:06:53 +0100 Subject: [PATCH 1/2] Improve type hinting of `RawHeaders` to detect the problem that #14301 fixed. --- synapse/app/generic_worker.py | 8 ++++---- synapse/http/client.py | 24 +++++++++++++++++++----- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 2a9f039367b9..cb5892f041e9 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -178,13 +178,13 @@ async def on_POST( # Proxy headers from the original request, such as the auth headers # (in case the access token is there) and the original IP / # User-Agent of the request. - headers = { - header: request.requestHeaders.getRawHeaders(header, []) + headers: Dict[bytes, List[bytes]] = { + header: list(request.requestHeaders.getRawHeaders(header, [])) for header in (b"Authorization", b"User-Agent") } # Add the previous hop to the X-Forwarded-For header. - x_forwarded_for = request.requestHeaders.getRawHeaders( - b"X-Forwarded-For", [] + x_forwarded_for = list( + request.requestHeaders.getRawHeaders(b"X-Forwarded-For", []) ) # we use request.client here, since we want the previous hop, not the # original client (as returned by request.getClientAddress()). diff --git a/synapse/http/client.py b/synapse/http/client.py index 084d0a5b84e9..4eb740c04020 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -25,7 +25,6 @@ List, Mapping, Optional, - Sequence, Tuple, Union, ) @@ -90,14 +89,29 @@ "synapse_http_client_responses", "", ["method", "code"] ) -# the type of the headers list, to be passed to the t.w.h.Headers. -# Actually we can mix str and bytes keys, but Mapping treats 'key' as invariant so -# we simplify. +# the type of the headers map, to be passed to the t.w.h.Headers. +# +# The actual type accepted by Twisted is +# Mapping[Union[str, bytes], Sequence[Union[str, bytes]] , +# allowing us to mix and match str and bytes freely. However: any str is also a +# Sequence[str]; passing a header string value which is a +# standalone str is interpreted as a sequence of 1-codepoint strings. This is a disastrous footgun. +# We use a narrower value type (RawHeaderValue) to avoid this footgun. +# +# We also simplify the keys to be either all str or all bytes. This helps because +# Dict[K, V] is invariant in K (and indeed V). RawHeaders = Union[Mapping[str, "RawHeaderValue"], Mapping[bytes, "RawHeaderValue"]] # the value actually has to be a List, but List is invariant so we can't specify that # the entries can either be Lists or bytes. -RawHeaderValue = Sequence[Union[str, bytes]] +RawHeaderValue = Union[ + List[str], + List[bytes], + List[Union[str, bytes]], + Tuple[str, ...], + Tuple[bytes, ...], + Tuple[Union[str, bytes], ...], +] def check_against_blacklist( From f5f1df899f7957208aad317a5ba07f8b28bc280f Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 26 Oct 2022 14:13:21 +0100 Subject: [PATCH 2/2] Changelog --- changelog.d/14303.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/14303.misc diff --git a/changelog.d/14303.misc b/changelog.d/14303.misc new file mode 100644 index 000000000000..24ce238223dc --- /dev/null +++ b/changelog.d/14303.misc @@ -0,0 +1 @@ +Improve type hinting of `RawHeaders`.