-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Forwarded headers and otherwise improved proxy handling #1638
Changes from 30 commits
f69383d
b896454
1838f43
1dd854f
6aaec12
60c8e7a
45225b7
dacbf1b
9d70054
a04617f
296ceec
e3bbd2c
8da60b0
8181450
6c612e4
c166f76
63cc2d0
bb339fa
c05db14
6971e65
a1d0ecc
ea9f520
a34fbcf
fdf4c3d
9302047
030422d
a638758
7c66e5f
d971549
95b7bc2
7eab1dc
d9a1fa2
2a5683d
7aff38d
8f54aff
b983cf4
c7bbd58
502af27
2a81f66
8d81318
3548d6a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,9 +4,9 @@ | |
|
||
Options = typing.Dict[str, str] # key=value fields in various headers | ||
|
||
token, quoted = r"([\w!#$%&'*+\-.^_`|~]+)", r'"([^"]*)"' | ||
parameter = re.compile(fr";\s*{token}=(?:{token}|{quoted})", re.ASCII) | ||
firefox_quote_escape = re.compile(r'\\"(?!; |\s*$)') | ||
_token, _quoted = r"([\w!#$%&'*+\-.^_`|~]+)", r'"([^"]*)"' | ||
_param = re.compile(fr";\s*{_token}=(?:{_token}|{_quoted})", re.ASCII) | ||
_firefox_quote_escape = re.compile(r'\\"(?!; |\s*$)') | ||
|
||
# RFC's quoted-pair escapes are mostly ignored by browsers. Chrome, Firefox and | ||
# curl all have different escaping, that we try to handle as well as possible, | ||
|
@@ -24,14 +24,132 @@ def parse_content_header(value: str) -> typing.Tuple[str, Options]: | |
Mostly identical to cgi.parse_header and werkzeug.parse_options_header | ||
but runs faster and handles special characters better. Unescapes quotes. | ||
""" | ||
value = firefox_quote_escape.sub("%22", value) | ||
value = _firefox_quote_escape.sub("%22", value) | ||
pos = value.find(";") | ||
if pos == -1: | ||
options = {} | ||
else: | ||
options = { | ||
m.group(1).lower(): m.group(2) or m.group(3).replace("%22", '"') | ||
for m in parameter.finditer(value[pos:]) | ||
for m in _param.finditer(value[pos:]) | ||
} | ||
value = value[:pos] | ||
return value.strip().lower(), options | ||
|
||
|
||
# https://tools.ietf.org/html/rfc7230#section-3.2.6 and | ||
# https://tools.ietf.org/html/rfc7239#section-4 | ||
# This regex is for *reversed* strings because that works much faster for | ||
# right-to-left matching than the other way around. Be wary that all things are | ||
# a bit backwards! _rparam matches forwarded pairs alike ";key=value" | ||
_rparam = re.compile(f"(?:{_token}|{_quoted})={_token}\\s*($|[;,])", re.ASCII) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 I like this one. |
||
|
||
|
||
def parse_forwarded(headers, config): | ||
"""Parse HTTP Forwarded headers. | ||
Accepts only the last element with secret=`config.FORWARDED_SECRET` | ||
:return: dict with matching keys (lowercase) and values, or None. | ||
""" | ||
header = headers.getall("forwarded", None) | ||
secret = config.FORWARDED_SECRET | ||
if header is None or not secret: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not super familiar with the RFC but why can we only accept forwaded headers with matching secrets? Seems like a decision that could be left to the application? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The alternative is to use PROXIES_COUNT, as is done with X-Forwarded headers in the existing as well as the new implementation. If this same setting was applied to Forwarded as well, any existing installed applications using X-Forwarded would instantly become exploitable (send a request with fake Forwarded header), so if we wanted to use that, we'd have to add a new config option for that. Using a secret key, as suggested by Nginx folks, solves the entire issue in a smart and secure manner. This key is not part of RFC but such extensions are explicitly allowed by https://tools.ietf.org/html/rfc7239#section-5.5 |
||
return None | ||
header = ",".join(header) # Join multiple header lines | ||
if secret not in header: | ||
return None | ||
# Loop over <separator><key>=<value> elements from right to left | ||
ret = sep = pos = None | ||
found = False | ||
for m in _rparam.finditer(header[::-1]): | ||
# Start of new element? (on parser skips and non-semicolon right sep) | ||
if m.start() != pos or sep != ";": | ||
if found: | ||
return normalize(ret) | ||
ret = {} | ||
pos = m.end() | ||
val_token, val_quoted, key, sep = m.groups() | ||
key = key.lower()[::-1] | ||
val = (val_token or val_quoted.replace('"\\', '"'))[::-1] | ||
ret[key] = val | ||
if key == "secret" and val == secret: | ||
found = True | ||
if found and sep != ";": | ||
return normalize(ret) | ||
return normalize(ret) if found else None | ||
|
||
|
||
def parse_xforwarded(headers, config): | ||
"""Parse X-Real-IP, X-Scheme and X-Forwarded-* headers.""" | ||
proxies_count = config.PROXIES_COUNT | ||
if not proxies_count: | ||
return None | ||
h1, h2 = config.REAL_IP_HEADER, config.FORWARDED_FOR_HEADER | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we please use a better name for the variables instead of |
||
addr = h1 and headers.get(h1) | ||
forwarded_for = h2 and headers.getall(h2, None) | ||
if not addr and forwarded_for: | ||
assert proxies_count == -1 or proxies_count > 0, config.PROXIES_COUNT | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be a good idea to have a bit of a clearly defined error message with the assert ? Or would it be easier to handle it the way the |
||
# Combine, split and filter multiple headers' entries | ||
proxies = (p.strip() for h in forwarded_for for p in h.split(",")) | ||
proxies = [p for p in proxies if p] | ||
try: | ||
addr = proxies[-proxies_count] if proxies_count > 0 else proxies[0] | ||
except IndexError: | ||
return None | ||
if not addr: | ||
return None | ||
other = ( | ||
(key, headers.get(header)) | ||
for key, header in ( | ||
("proto", "x-scheme"), | ||
("proto", "x-forwarded-proto"), # Overrides X-Scheme if present | ||
("host", "x-forwarded-host"), | ||
("port", "x-forwarded-port"), | ||
("path", "x-forwarded-path"), | ||
) | ||
) | ||
return normalize({"for": addr, **{k: v for k, v in other if v}}) | ||
|
||
|
||
_ipv6 = "(?:[0-9A-Fa-f]{0,4}:){2,7}[0-9A-Fa-f]{0,4}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We might need to make this regex a bit stronger if we are to rely on the regex. (I am sure you have thought of this already) This would be a good to have. Otherwise, we might as well use the xref: http://vernon.mauery.com/content/projects/linux/ipv6_regex There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would use ipaddress but its implementation is awfully slow. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Tronic Really? I wasn't aware of that. Internally at work, I have a custom class called There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are not entirely comparable as they do different things, but for reference:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Tronic That's interesting. Well, if we are using the RegEx, can we please have a bit more strict version of this RegEx? This regex can easily return false positives There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see any practical way to make it more strict than it already is. Also, this is only meant to tell IPv4 and IPv6 addresses and hostnames apart, and as far as I can tell, there cannot be any false positives there. Neither IPv4 or hostnames can have 2-7 colons in them. The closest that I can think of is a hexadecimal-looking hostname in a host header like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A somewhat bigger concern is that IPv6 are not normalized to compressed form. For this it might be justified to use ipaddress module even though running an address through it takes more time than parsing the entire headers. |
||
_ipv6_re = re.compile(_ipv6) | ||
_host_re = re.compile( | ||
r"((?:\[" + _ipv6 + r"\])|[a-zA-Z0-9.\-]{1,253})(?::(\d{1,5}))?" | ||
) | ||
|
||
|
||
def parse_host(host): | ||
m = _host_re.fullmatch(host) | ||
if not m: | ||
return None, None | ||
host, port = m.groups() | ||
return host.lower(), port and int(port) | ||
|
||
|
||
def bracketv6(addr): | ||
return f"[{addr}]" if _ipv6_re.fullmatch(addr) else addr | ||
|
||
|
||
def normalize(fwd: dict) -> dict: | ||
"""Normalize and convert values extracted from forwarded headers. | ||
Modifies fwd in place and returns the same object. | ||
""" | ||
if "proto" in fwd: | ||
sjsadowski marked this conversation as resolved.
Show resolved
Hide resolved
|
||
fwd["proto"] = fwd["proto"].lower() | ||
if "by" in fwd: | ||
fwd["by"] = bracketv6(fwd["by"]).lower() | ||
if "for" in fwd: | ||
fwd["for"] = bracketv6(fwd["for"]).lower() | ||
if "port" in fwd: | ||
try: | ||
fwd["port"] = int(fwd["port"]) | ||
except ValueError: | ||
fwd.pop("port", None) | ||
if "host" in fwd: | ||
host, port = parse_host(fwd["host"]) | ||
if host: | ||
fwd["host"] = host | ||
if port: | ||
fwd["port"] = port | ||
else: | ||
del fwd["host"] | ||
return fwd |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,7 +11,12 @@ | |
from httptools import parse_url | ||
|
||
from sanic.exceptions import InvalidUsage | ||
from sanic.headers import parse_content_header | ||
from sanic.headers import ( | ||
parse_content_header, | ||
parse_forwarded, | ||
parse_host, | ||
parse_xforwarded, | ||
) | ||
from sanic.log import error_logger, logger | ||
|
||
|
||
|
@@ -87,6 +92,7 @@ class Request(dict): | |
"parsed_files", | ||
"parsed_form", | ||
"parsed_json", | ||
"parsed_forwarded", | ||
"raw_url", | ||
"stream", | ||
"transport", | ||
|
@@ -107,6 +113,7 @@ def __init__(self, url_bytes, headers, version, method, transport, app): | |
|
||
# Init but do not inhale | ||
self.body_init() | ||
self.parsed_forwarded = None | ||
self.parsed_json = None | ||
self.parsed_form = None | ||
self.parsed_files = None | ||
|
@@ -372,86 +379,70 @@ def _get_address(self): | |
def server_name(self): | ||
""" | ||
Attempt to get the server's hostname in this order: | ||
`config.SERVER_NAME`, `x-forwarded-host` header, :func:`Request.host` | ||
`config.SERVER_NAME`, `forwarded` header, `x-forwarded-host` header, | ||
:func:`Request.host` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
:return: the server name without port number | ||
:rtype: str | ||
""" | ||
return ( | ||
self.app.config.get("SERVER_NAME") | ||
or self.headers.get("x-forwarded-host") | ||
or self.host.split(":")[0] | ||
) | ||
server_name = self.app.config.get("SERVER_NAME") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not a blocker here because this is existing logic but general question, does this make sense? SERVER_NAME seems like its relevant to the application server but is being used to override request specific headers which seems weird. @harshanarayana I noticed you were on the review for #1465 is there something I'm missing here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I gather that SERVER_NAME is supposed to be a rarely specified override, not a fallback. Not sure how useful that actually is. One use would be to actively prevent any headers being used for URL construction, but this is also limited in that it cannot handle the same Sanic instance serving multiple hosts. Also worth noting is that this is supposed to be in the same format as Host, and thus includes any non-default port. Another consideration is what to do if none of these match. One could possibly still match against SSL SNI but if that is missing, I would not go further. In particular, sockname (ip/port or unix socket path) should not be exposed in external URLs. OTOH, all reasonable clients include Host or its HTTP/2 equivalent :authority header. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @abuckenheimer I think As for the SSL SNI goes, do we really need to do that in the core of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe that getting an SNI is practically certain at this point, whenever SSL is used, as vhosts would not work without. Getting the value is quite simple with a callback in SSL context. How that would be used in Sanic is another question, especially since it doesn't contain port (which is present in host/:authority, and is presumed to exist in SERVER_NAME as well, whenever a non-standard port is being used). What ever is going to be done with |
||
if server_name: | ||
host = server_name.split("//", 1)[-1].split("/", 1)[0] | ||
return parse_host(host)[0] | ||
return self.forwarded.get("host") or parse_host(self.host)[0] | ||
|
||
@property | ||
def forwarded(self): | ||
if self.parsed_forwarded is None: | ||
self.parsed_forwarded = ( | ||
parse_forwarded(self.headers, self.app.config) | ||
or parse_xforwarded(self.headers, self.app.config) | ||
or {} | ||
) | ||
return self.parsed_forwarded | ||
|
||
@property | ||
def server_port(self): | ||
""" | ||
Attempt to get the server's port in this order: | ||
`x-forwarded-port` header, :func:`Request.host`, actual port used by | ||
the transport layer socket. | ||
`forwarded` header, `x-forwarded-port` header, :func:`Request.host`, | ||
actual port used by the transport layer socket. | ||
:return: server port | ||
:rtype: int | ||
""" | ||
forwarded_port = self.headers.get("x-forwarded-port") or ( | ||
self.host.split(":")[1] if ":" in self.host else None | ||
if self.forwarded: | ||
return self.forwarded.get("port") or ( | ||
80 if self.scheme in ("http", "ws") else 443 | ||
) | ||
return ( | ||
parse_host(self.host)[1] | ||
or self.transport.get_extra_info("sockname")[1] | ||
) | ||
if forwarded_port: | ||
return int(forwarded_port) | ||
else: | ||
port = self.transport.get_extra_info("sockname")[1] | ||
return port | ||
|
||
@property | ||
def remote_addr(self): | ||
"""Attempt to return the original client ip based on X-Forwarded-For | ||
or X-Real-IP. If HTTP headers are unavailable or untrusted, returns | ||
an empty string. | ||
"""Attempt to return the original client ip based on `forwarded`, | ||
`x-forwarded-for` or `x-real-ip`. If HTTP headers are unavailable or | ||
untrusted, returns an empty string. | ||
|
||
:return: original client ip. | ||
""" | ||
if not hasattr(self, "_remote_addr"): | ||
if self.app.config.PROXIES_COUNT == 0: | ||
self._remote_addr = "" | ||
elif self.app.config.REAL_IP_HEADER and self.headers.get( | ||
self.app.config.REAL_IP_HEADER | ||
): | ||
self._remote_addr = self.headers[ | ||
self.app.config.REAL_IP_HEADER | ||
] | ||
elif self.app.config.FORWARDED_FOR_HEADER: | ||
forwarded_for = self.headers.get( | ||
self.app.config.FORWARDED_FOR_HEADER, "" | ||
).split(",") | ||
remote_addrs = [ | ||
addr | ||
for addr in [addr.strip() for addr in forwarded_for] | ||
if addr | ||
] | ||
if self.app.config.PROXIES_COUNT == -1: | ||
self._remote_addr = remote_addrs[0] | ||
elif len(remote_addrs) >= self.app.config.PROXIES_COUNT: | ||
self._remote_addr = remote_addrs[ | ||
-self.app.config.PROXIES_COUNT | ||
] | ||
else: | ||
self._remote_addr = "" | ||
else: | ||
self._remote_addr = "" | ||
self._remote_addr = self.forwarded.get("for") or "" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. first of all 👏 much cleaner moving this logic out into headers.py, second I have a slight preference for the more straight forward
sjsadowski marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return self._remote_addr | ||
|
||
@property | ||
def scheme(self): | ||
""" | ||
Attempt to get the request scheme. | ||
Seeking the value in this order: | ||
`x-forwarded-proto` header, `x-scheme` header, the sanic app itself. | ||
`forwarded` header, `x-forwarded-proto` header, | ||
`x-scheme` header, the sanic app itself. | ||
|
||
:return: http|https|ws|wss or arbitrary value given by the headers. | ||
:rtype: str | ||
""" | ||
forwarded_proto = self.headers.get( | ||
"x-forwarded-proto" | ||
) or self.headers.get("x-scheme") | ||
forwarded_proto = self.forwarded.get("proto") | ||
if forwarded_proto: | ||
return forwarded_proto | ||
|
||
|
@@ -471,7 +462,7 @@ def scheme(self): | |
@property | ||
def host(self): | ||
""" | ||
:return: the Host specified in the header, may contains port number. | ||
:return: the Host specified in the header, may contain a port number. | ||
""" | ||
# it appears that httptools doesn't return the host | ||
# so pull it from the headers | ||
|
@@ -514,6 +505,10 @@ def url_for(self, view_name, **kwargs): | |
:return: an absolute url to the given view | ||
:rtype: str | ||
""" | ||
# Full URL SERVER_NAME can only be handled in app.url_for | ||
if "//" in self.app.config.SERVER_NAME: | ||
return self.app.url_for(view_name, _external=True, **kwargs) | ||
|
||
scheme = self.scheme | ||
host = self.server_name | ||
port = self.server_port | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add documentation to https://github.com/huge-success/sanic/blob/master/docs/sanic/config.md