Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow digits and valid token chars in headers #134

Merged
merged 1 commit into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 15 additions & 22 deletions multipart/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,14 @@ class MultipartState(IntEnum):
LOWER_Z = b"z"[0]
NULL = b"\x00"[0]


# Lower-casing a character is different, because of the difference between
# str on Py2, and bytes on Py3. Same with getting the ordinal value of a byte,
# and joining a list of bytes together.
# These functions abstract that.
def lower_char(c: int) -> int:
return c | 0x20
# Mask for ASCII characters that can be http tokens.
# Per RFC7230 - 3.2.6, this is all alpha-numeric characters
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add the link on the comment as well, please?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. I added a commit with the link.

# and these: !#$%&'*+-.^_`|~
TOKEN_CHARS_SET = frozenset(
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
b"abcdefghijklmnopqrstuvwxyz"
b"0123456789"
b"!#$%&'*+-.^_`|~")


def ord_char(c: int) -> int:
Expand Down Expand Up @@ -1175,12 +1176,8 @@ def data_callback(name: str, remaining: bool = False) -> None:
# Increment our index in the header.
index += 1

# Do nothing if we encounter a hyphen.
if c == HYPHEN:
pass

# If we've reached a colon, we're done with this header.
elif c == COLON:
if c == COLON:
# A 0-length header is an error.
if index == 1:
msg = "Found 0-length header at %d" % (i,)
Expand All @@ -1195,16 +1192,12 @@ def data_callback(name: str, remaining: bool = False) -> None:
# Move to parsing the header value.
state = MultipartState.HEADER_VALUE_START

else:
# Lower-case this character, and ensure that it is in fact
# a valid letter. If not, it's an error.
cl = lower_char(c)
if cl < LOWER_A or cl > LOWER_Z:
msg = "Found non-alphanumeric character %r in " "header at %d" % (c, i)
self.logger.warning(msg)
e = MultipartParseError(msg)
e.offset = i
raise e
elif c not in TOKEN_CHARS_SET:
msg = "Found invalid character %r in header at %d" % (c, i)
self.logger.warning(msg)
e = MultipartParseError(msg)
e.offset = i
raise e

elif state == MultipartState.HEADER_VALUE_START:
# Skip leading spaces.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_data/http/bad_header_char.http
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
------WebKitFormBoundaryTkr3kCBQlBe1nrhc
Content-999position: form-data; name="field"
Content-<<<position: form-data; name="field"

This is a test.
------WebKitFormBoundaryTkr3kCBQlBe1nrhc--
11 changes: 11 additions & 0 deletions tests/test_data/http/header_with_number.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
--b8825ae386be4fdc9644d87e392caad3
Content-Disposition: form-data; filename="secret.txt"; name="files"
Content-Type: text/plain; charset=utf-8
X-Funky-Header-1: bar
abcdefghijklmnopqrstuvwxyz01234: foo
ABCDEFGHIJKLMNOPQRSTUVWXYZ56789: bar
other!#$%&'*+-.^_`|~: baz
Content-Length: 6

aaaaaa
--b8825ae386be4fdc9644d87e392caad3--
7 changes: 7 additions & 0 deletions tests/test_data/http/header_with_number.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
boundary: b8825ae386be4fdc9644d87e392caad3
expected:
- name: files
type: file
file_name: secret.txt
data: !!binary |
YWFhYWFh