Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Forwarded, X-Forwarded-Scheme and X-Forwarded-Host for better scheme and host resolution #1881

Merged
merged 26 commits into from
May 22, 2017
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ Changes

- Do not unquote `+` in match_info values #1816

- Use Forwarded, X-Forwarded-Scheme and X-Forwarded-Host for better scheme and
host resolution. #1134

- Fix sub-application middlewares resolution order #1853

- Fix applications comparison #1866
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ Enrique Saez
Erich Healy
Eugene Chernyshov
Eugene Naydenov
Evert Lammerts
Frederik Gladhorn
Frederik Peter Aalund
Gabriel Tremblay
Expand Down
3 changes: 3 additions & 0 deletions aiohttp/hdrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
ETAG = istr('ETAG')
EXPECT = istr('EXPECT')
EXPIRES = istr('EXPIRES')
FORWARDED = istr('FORWARDED')
FROM = istr('FROM')
HOST = istr('HOST')
IF_MATCH = istr('IF-MATCH')
Expand Down Expand Up @@ -89,3 +90,5 @@
WANT_DIGEST = istr('WANT-DIGEST')
WARNING = istr('WARNING')
WWW_AUTHENTICATE = istr('WWW-AUTHENTICATE')
X_FORWARDED_HOST = istr('X-FORWARDED-HOST')
X_FORWARDED_PROTO = istr('X-FORWARDED-PROTO')
4 changes: 4 additions & 0 deletions aiohttp/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def __init__(self, *,
if loop is not None:
warnings.warn("loop argument is deprecated", ResourceWarning)

if secure_proxy_ssl_header is not None:
warnings.warn(
"secure_proxy_ssl_header is deprecated", ResourceWarning)

self._debug = debug
self._router = router
self._secure_proxy_ssl_header = secure_proxy_ssl_header
Expand Down
108 changes: 98 additions & 10 deletions aiohttp/web_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import datetime
import json
import re
import string
import tempfile
import warnings
from email.utils import parsedate
Expand All @@ -21,6 +22,35 @@
FileField = collections.namedtuple(
'Field', 'name filename file content_type headers')

_TCHAR = string.digits + string.ascii_letters + r"!#$%&'*+\-.^_`|~"
# notice the escape of '-' to prevent interpretation as range

_TOKEN = r'[{tchar}]*'.format(tchar=_TCHAR)

_QDTEXT = r'[{}]'.format(
r''.join(chr(c) for c in (0x09, 0x20, 0x21, *range(0x23, 0x7F))))
# qdtext includes 0x5C to escape 0x5D ('\]')
# qdtext excludes obs-text (because obsoleted, and encoding not specified)

_QUOTED_PAIR = r'\\[\t {tchar}]'.format(tchar=_TCHAR)

_QUOTED_STRING = r'"(?:{quoted_pair}|{qdtext})*"'.format(
qdtext=_QDTEXT, quoted_pair=_QUOTED_PAIR)

_FORWARDED_PARAMS = (
r'[bB][yY]|[fF][oO][rR]|[hH][oO][sS][tT]|[pP][rR][oO][tT][oO]')

_FORWARDED_PAIR = (
r'^ *({forwarded_params})=({token}|{quoted_string}) *$'.format(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC spaces around = are allowed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure but as far as I can see that's not the case. Section 4 only says forwarded-pair = token "=" value and section 7 recalls Note that an HTTP list allows white spaces to occur between the identifiers, and the list may be split over multiple header fields.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMHO it's rare case but still allowed

forwarded_params=_FORWARDED_PARAMS,
token=_TOKEN,
quoted_string=_QUOTED_STRING))
# allow whitespace as specified in RFC 7239 section 7.1

_QUOTED_PAIR_REPLACE_RE = re.compile(r'\\([\t {tchar}])'.format(tchar=_TCHAR))
# same pattern as _QUOTED_PAIR but contains a capture group

_FORWARDED_PAIR_RE = re.compile(_FORWARDED_PAIR)

############################################################
# HTTP Request
Expand Down Expand Up @@ -150,16 +180,61 @@ def secure(self):
"""
return self.url.scheme == 'https'

@reify
def forwarded(self):
""" A frozendict containing parsed Forwarded header(s).

Makes an effort to parse Forwarded headers as specified by RFC 7239:

- It adds all parameters (by, for, host, proto) in the order it finds
them; starting at the topmost / first added 'Forwarded' header, at
the leftmost / first-added parwameter.
- It checks that the value has valid syntax in general as specified in
section 4: either a 'token' or a 'quoted-string'.
- It un-escapes found escape sequences.
- It does NOT validate 'by' and 'for' contents as specified in section
6.
- It does NOT validate 'host' contents (Host ABNF).
- It does NOT validate 'proto' contents for valid URI scheme names.

Returns a dict(by=tuple(...), for=tuple(...), host=tuple(...),
proto=tuple(...), )
"""
params = {'by': [], 'for': [], 'host': [], 'proto': []}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the property's type should be MultyDict instead of dict of lists.
It preserves all information about proxies in right order but original host still could be checked as req.forwarded.get('host')

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use MultiDict here instead of dict of pairs. It preserves all information about proxies.

if hdrs.FORWARDED in self._message.headers:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop the check, in is not free.
Use for forwarded_elm in self._message.headers.getall(hdrs.FORWARDED, []) instead

for forwarded_elm in self._message.headers.getall(hdrs.FORWARDED):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop previous line, in is not free but getall(hdrs.FORWARDED, ()) does the check using single call to data structure.

if len(forwarded_elm):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop this check, iteration over empty forwarded_elm is pretty fine.

forwarded_pairs = tuple(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't create a tuple but make a generator comprehension. It saves both time and memory.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please support Forwarded: by=first, by=second form, it's equal to

Forwarded: by=first
Forwarded: by=second

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is supported by the regexes and I've included tests for it

_FORWARDED_PAIR_RE.findall(pair)
for pair in forwarded_elm.split(';'))
for forwarded_pair in forwarded_pairs:
if len(forwarded_pair) != 1:
# non-compliant syntax, ignore
continue
param = forwarded_pair[0][0].lower()
value = forwarded_pair[0][1]
if len(value) and value[0] == '"':
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a check for value[-1] == '"'

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If empty value allowed? I mean len(value) == 0.
Also if value: gives the same result as if len(value) but works a bit faster.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no need to check for a closing quote: the regex validates quoted-string so any value starting with " must also have a closing ".

A token may be empty (but note again that I don't check the syntax in section 6)

# quoted string: replace quotes and escape
# sequences
value = _QUOTED_PAIR_REPLACE_RE.sub(
r'\1', value[1:-1])
params[param].append(value)
return params
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return MultiDictProxy(params) -- it's an immutable view.


@reify
def _scheme(self):
proto = 'http'
if self._transport.get_extra_info('sslcontext'):
return 'https'
secure_proxy_ssl_header = self._secure_proxy_ssl_header
if secure_proxy_ssl_header is not None:
header, value = secure_proxy_ssl_header
proto = 'https'
elif self._secure_proxy_ssl_header is not None:
header, value = self._secure_proxy_ssl_header
if self.headers.get(header) == value:
return 'https'
return 'http'
proto = 'https'
elif self.forwarded['proto']:
proto = self.forwarded['proto'][0]
elif hdrs.X_FORWARDED_PROTO in self._message.headers:
proto = self._message.headers[hdrs.X_FORWARDED_PROTO]
return proto

@property
def method(self):
Expand All @@ -179,16 +254,29 @@ def version(self):

@reify
def host(self):
"""Read only property for getting *HOST* header of request.
""" Hostname of the request.

Hostname is resolved through the following headers, in this order:

- Forwarded
- X-Forwarded-Host
- Host

Returns str or None if HTTP request has no HOST header.
Returns str, or None if no hostname is found in the headers.
"""
return self._message.headers.get(hdrs.HOST)
host = None
if self.forwarded['host']:
host = self.forwarded['host'][0]
elif hdrs.X_FORWARDED_HOST in self._message.headers:
host = self._message.headers[hdrs.X_FORWARDED_HOST]
elif hdrs.HOST in self._message.headers:
host = self._message.headers[hdrs.HOST]
return host

@reify
def url(self):
return URL('{}://{}{}'.format(self._scheme,
self._message.headers.get(hdrs.HOST),
self.host,
str(self._rel_url)))

@property
Expand Down
22 changes: 12 additions & 10 deletions docs/web_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,10 @@ and :ref:`aiohttp-web-signals` handlers.

A string representing the scheme of the request.

The scheme is ``'https'`` if transport for request handling is
*SSL* or ``secure_proxy_ssl_header`` is matching.
The scheme is ``'https'`` if transport for request handling is *SSL*, if
``secure_proxy_ssl_header`` is matching (deprecated), if the ``proto``
portion of a ``Forward`` header is present and contains ``https``, or if
an ``X-Forwarded-Proto`` header is present and contains ``https``.

``'http'`` otherwise.

Expand All @@ -81,9 +83,7 @@ and :ref:`aiohttp-web-signals` handlers.

.. attribute:: secure

A boolean indicating if transport for request handling is
*SSL* or ``secure_proxy_ssl_header`` is matching,
e.g. if ``request.url.scheme == 'https'``
Shorthand for ``request.url.scheme == 'https'``

Read-only :class:`bool` property.

Expand Down Expand Up @@ -1049,7 +1049,7 @@ WebSocketResponse


.. seealso:: :ref:`WebSockets handling<aiohttp-web-websockets>`


WebSocketReady
^^^^^^^^^^^^^^
Expand Down Expand Up @@ -1233,11 +1233,13 @@ duplicated like one using :meth:`Application.copy`.
If param is ``None`` :func:`asyncio.get_event_loop`
used for getting default event loop.

:param tuple secure_proxy_ssl_header: Secure proxy SSL header. Can
be used to detect request scheme,
e.g. ``secure_proxy_ssl_header=('X-Forwarded-Proto', 'https')``.
:param tuple secure_proxy_ssl_header: Default: ``None``.

.. deprecated:: 2.1

See ``request.url.scheme`` for built-in resolution of the current
scheme using the standard and de-facto standard headers.

Default: ``None``.
:param bool tcp_keepalive: Enable TCP Keep-Alive. Default: ``True``.
:param int keepalive_timeout: Number of seconds before closing Keep-Alive
connection. Default: ``75`` seconds (NGINX's default value).
Expand Down
97 changes: 97 additions & 0 deletions tests/test_web_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,103 @@ def test_https_scheme_by_secure_proxy_ssl_header_false_test(make_request):
assert req.secure is False


def test_single_forwarded_header(make_request):
header = 'by=identifier; for=identifier; host=identifier; proto=identifier'
req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header}))
assert req.forwarded['by'] == ['identifier',]
assert req.forwarded['for'] == ['identifier',]
assert req.forwarded['host'] == ['identifier',]
assert req.forwarded['proto'] == ['identifier',]

def test_single_forwarded_header_camelcase(make_request):
header = 'bY=identifier; fOr=identifier; HOst=identifier; pRoTO=identifier'
req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header}))
assert req.forwarded['by'] == ['identifier',]
assert req.forwarded['for'] == ['identifier',]
assert req.forwarded['host'] == ['identifier',]
assert req.forwarded['proto'] == ['identifier',]

def test_single_forwarded_header_single_param(make_request):
header = 'BY=identifier'
req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header}))
assert req.forwarded['by'] == ['identifier',]

def test_single_forwarded_header_multiple_param(make_request):
header = 'By=identifier1;BY=identifier2; By=identifier3; BY=identifier4'
req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header}))
assert req.forwarded['by'] == ['identifier1', 'identifier2', 'identifier3',
'identifier4']

def test_single_forwarded_header_quoted_escaped(make_request):
header = 'Proto=identifier; pROTO="\lala lan\d\~ 123\!&"'
req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header}))
assert req.forwarded['proto'] == ['identifier', 'lala land~ 123!&']

def test_multiple_forwarded_headers(make_request):
headers = CIMultiDict()
headers.add('Forwarded', 'By=identifier1;BY=identifier2')
headers.add('Forwarded', 'By=identifier3; BY=identifier4')
req = make_request('GET', '/', headers=headers)
assert req.forwarded['by'] == ['identifier1', 'identifier2', 'identifier3',
'identifier4']


def test_https_scheme_by_forwarded_header(make_request):
req = make_request('GET', '/',
headers=CIMultiDict(
{'Forwarded': 'by=; for=; host=; proto=https'}))
assert "https" == req.scheme
assert req.secure is True


def test_https_scheme_by_malformed_forwarded_header(make_request):
req = make_request('GET', '/',
headers=CIMultiDict({'Forwarded': 'malformed value'}))
assert "http" == req.scheme
assert req.secure is False


def test_https_scheme_by_x_forwarded_proto_header(make_request):
req = make_request('GET', '/',
headers=CIMultiDict({'X-Forwarded-Proto': 'https'}))
assert "https" == req.scheme
assert req.secure is True


def test_https_scheme_by_x_forwarded_proto_header_no_tls(make_request):
req = make_request('GET', '/',
headers=CIMultiDict({'X-Forwarded-Proto': 'http'}))
assert "http" == req.scheme
assert req.secure is False


def test_host_by_forwarded_header(make_request):
req = make_request('GET', '/',
headers=CIMultiDict(
{'Forwarded': 'by=; for=; host'
'=example.com; proto=https'}))
assert req.host == 'example.com'


def test_host_by_forwarded_header_malformed(make_request):
req = make_request('GET', '/',
headers=CIMultiDict({'Forwarded': 'malformed value'}))
assert req.host is None


def test_host_by_x_forwarded_host_header(make_request):
req = make_request('GET', '/',
headers=CIMultiDict(
{'X-Forwarded-Host': 'example.com'}))
assert req.host == 'example.com'


def test_host_by_host_header(make_request):
req = make_request('GET', '/',
headers=CIMultiDict({'Host': 'example.com'}))
assert req.host == 'example.com'


def test_raw_headers(make_request):
req = make_request('GET', '/',
headers=CIMultiDict({'X-HEADER': 'aaa'}))
Expand Down