From 2e46f2426e2845e6a088e21451d3d0031b804cea Mon Sep 17 00:00:00 2001 From: Bert JW Regeer Date: Mon, 23 Dec 2019 14:38:15 +0100 Subject: [PATCH] Validate HTTP header-field more completely This was brought about by certain whitespace characters being allowed that are not allowed in the HTTP standard. Waitress would dutifully strip those whitespace characters and continue on as if nothing mattered, however whitespace in HTTP messages does matter and could allow for HTTP request smuggling if the front-end proxy server does not agree with the back-end server on how to parse a HTTP message. This disallows things like this: Content-Length: 10 Transfer-Encoding:[0x0b]chunked Which would get parsed by a front-end server as a request with Content-Length 10, and an invalid Transfer-Encoding header, but would get parsed as a chunked request by Waitress. --- waitress/parser.py | 46 ++++++++++--------- waitress/tests/test_parser.py | 85 ++++++++++++++++++++++++++++++++++- 2 files changed, 109 insertions(+), 22 deletions(-) diff --git a/waitress/parser.py b/waitress/parser.py index dd591f2d..8b07dd6a 100644 --- a/waitress/parser.py +++ b/waitress/parser.py @@ -29,6 +29,7 @@ ServerNotImplemented, find_double_newline, ) +from .rfc7230 import HEADER_FIELD class ParsingError(Exception): @@ -38,7 +39,6 @@ class ParsingError(Exception): class TransferEncodingNotImplemented(Exception): pass - class HTTPRequestParser(object): """A structure that collects the HTTP request. @@ -208,26 +208,27 @@ def parse_header(self, header_plus): headers = self.headers for line in lines: - index = line.find(b":") - if index > 0: - key = line[:index] - - if key != key.strip(): - raise ParsingError("Invalid whitespace after field-name") - - if b"_" in key: - continue - value = line[index + 1 :].strip() - key1 = tostr(key.upper().replace(b"-", b"_")) - # If a header already exists, we append subsequent values - # seperated by a comma. Applications already need to handle - # the comma seperated values, as HTTP front ends might do - # the concatenation for you (behavior specified in RFC2616). - try: - headers[key1] += tostr(b", " + value) - except KeyError: - headers[key1] = tostr(value) - # else there's garbage in the headers? + header = HEADER_FIELD.match(line) + + if not header: + raise ParsingError("Invalid header") + + key, value = header.group('name', 'value') + + if b"_" in key: + # TODO(xistence): Should we drop this request instead? + continue + + value = value.strip() + key1 = tostr(key.upper().replace(b"-", b"_")) + # If a header already exists, we append subsequent values + # seperated by a comma. Applications already need to handle + # the comma seperated values, as HTTP front ends might do + # the concatenation for you (behavior specified in RFC2616). + try: + headers[key1] += tostr(b", " + value) + except KeyError: + headers[key1] = tostr(value) # command, uri, version will be bytes command, uri, version = crack_first_line(first_line) @@ -352,6 +353,9 @@ def get_header_lines(header): r = [] lines = header.split(b"\r\n") for line in lines: + if not line: + continue + if b"\r" in line or b"\n" in line: raise ParsingError('Bare CR or LF found in header line "%s"' % tostr(line)) diff --git a/waitress/tests/test_parser.py b/waitress/tests/test_parser.py index 1a95e23e..8d42600b 100644 --- a/waitress/tests/test_parser.py +++ b/waitress/tests/test_parser.py @@ -308,10 +308,93 @@ def test_parse_header_invalid_whitespace(self): try: self.parser.parse_header(data) except ParsingError as e: - self.assertIn("Invalid whitespace after field-name", e.args[0]) + self.assertIn("Invalid header", e.args[0]) else: # pragma: nocover self.assertTrue(False) + def test_parse_header_invalid_whitespace_vtab(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo:\x0bbar\r\n" + try: + self.parser.parse_header(data) + except ParsingError as e: + self.assertIn("Invalid header", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + + def test_parse_header_invalid_no_colon(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nnotvalid\r\n" + try: + self.parser.parse_header(data) + except ParsingError as e: + self.assertIn("Invalid header", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + + def test_parse_header_invalid_folding_spacing(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\n\t\x0bbaz\r\n" + try: + self.parser.parse_header(data) + except ParsingError as e: + self.assertIn("Invalid header", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + + def test_parse_header_invalid_chars(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\n\foo: \x0bbaz\r\n" + try: + self.parser.parse_header(data) + except ParsingError as e: + self.assertIn("Invalid header", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + + def test_parse_header_empty(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nempty:\r\n" + self.parser.parse_header(data) + + self.assertIn("EMPTY", self.parser.headers) + self.assertIn("FOO", self.parser.headers) + self.assertEqual(self.parser.headers["EMPTY"], "") + self.assertEqual(self.parser.headers["FOO"], "bar") + + def test_parse_header_multiple_values(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever, more, please, yes\r\n" + self.parser.parse_header(data) + + self.assertIn("FOO", self.parser.headers) + self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes") + + def test_parse_header_multiple_values_header_folded(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever,\r\n more, please, yes\r\n" + self.parser.parse_header(data) + + self.assertIn("FOO", self.parser.headers) + self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes") + + def test_parse_header_multiple_values_header_folded_multiple(self): + from waitress.parser import ParsingError + + data = b"GET /foobar HTTP/1.1\r\nfoo: bar, whatever,\r\n more\r\nfoo: please, yes\r\n" + self.parser.parse_header(data) + + self.assertIn("FOO", self.parser.headers) + self.assertEqual(self.parser.headers["FOO"], "bar, whatever, more, please, yes") + + class Test_split_uri(unittest.TestCase): def _callFUT(self, uri):