From 073d9827706879e75b5191cf337ce7e17f3f5561 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 25 Mar 2021 15:20:36 +0000 Subject: [PATCH 1/4] Fix Content-Length for unicode file contents with multipart --- httpx/_multipart.py | 8 ++------ tests/test_multipart.py | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index f690afc9ae..bf75a5663b 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -40,11 +40,7 @@ def render_headers(self) -> bytes: def render_data(self) -> bytes: if not hasattr(self, "_data"): - self._data = ( - self.value - if isinstance(self.value, bytes) - else self.value.encode("utf-8") - ) + self._data = to_bytes(self.value) return self._data @@ -88,7 +84,7 @@ def get_length(self) -> int: headers = self.render_headers() if isinstance(self.file, (str, bytes)): - return len(headers) + len(self.file) + return len(headers) + len(to_bytes(self.file)) # Let's do our best not to read `file` into memory. try: diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 94813932a8..199af4b0a5 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -133,6 +133,29 @@ def test_multipart_encode(tmp_path: typing.Any) -> None: assert content == b"".join(stream) +def test_multipart_encode_unicode_file_contents() -> None: + files = {"file": ("name.txt", "<Ășnicode string>")} + + with mock.patch("os.urandom", return_value=os.urandom(16)): + boundary = os.urandom(16).hex() + + headers, stream = encode_request(files=files) + assert isinstance(stream, typing.Iterable) + + content = ( + '--{0}\r\nContent-Disposition: form-data; name="file";' + ' filename="name.txt"\r\n' + "Content-Type: text/plain\r\n\r\n<Ășnicode string>\r\n" + "--{0}--\r\n" + "".format(boundary).encode("utf-8") + ) + assert headers == { + "Content-Type": f"multipart/form-data; boundary={boundary}", + "Content-Length": str(len(content)), + } + assert content == b"".join(stream) + + def test_multipart_encode_files_allows_filenames_as_none() -> None: files = {"file": (None, io.BytesIO(b""))} with mock.patch("os.urandom", return_value=os.urandom(16)): From dc15b799d49b73b9dd6c08897f5271bfca9cf2c5 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 25 Mar 2021 16:36:24 +0000 Subject: [PATCH 2/4] Handle bool and None cases for URLEncoded data --- httpx/_content.py | 9 +++++++- httpx/_models.py | 6 ++--- httpx/_utils.py | 2 +- tests/test_content.py | 51 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 5 deletions(-) diff --git a/httpx/_content.py b/httpx/_content.py index bf402c9e29..0b9672be3f 100644 --- a/httpx/_content.py +++ b/httpx/_content.py @@ -21,6 +21,7 @@ RequestFiles, ResponseContent, ) +from ._utils import primitive_value_to_str class PlainByteStream: @@ -106,7 +107,13 @@ def encode_content( def encode_urlencoded_data( data: dict, ) -> Tuple[Dict[str, str], ByteStream]: - body = urlencode(data, doseq=True).encode("utf-8") + plain_data = [] + for key, value in data.items(): + if isinstance(value, (list, tuple)): + plain_data.extend([(key, primitive_value_to_str(item)) for item in value]) + else: + plain_data.append((key, primitive_value_to_str(value))) + body = urlencode(plain_data, doseq=True).encode("utf-8") content_length = str(len(body)) content_type = "application/x-www-form-urlencoded" headers = {"Content-Length": content_length, "Content-Type": content_type} diff --git a/httpx/_models.py b/httpx/_models.py index 34fb2d388c..ade5a31925 100644 --- a/httpx/_models.py +++ b/httpx/_models.py @@ -54,7 +54,7 @@ normalize_header_value, obfuscate_sensitive_headers, parse_header_links, - str_query_param, + primitive_value_to_str, ) @@ -450,8 +450,8 @@ def __init__(self, *args: QueryParamTypes, **kwargs: typing.Any) -> None: else: items = flatten_queryparams(value) - self._list = [(str(k), str_query_param(v)) for k, v in items] - self._dict = {str(k): str_query_param(v) for k, v in items} + self._list = [(str(k), primitive_value_to_str(v)) for k, v in items] + self._dict = {str(k): primitive_value_to_str(v) for k, v in items} def keys(self) -> typing.KeysView: return self._dict.keys() diff --git a/httpx/_utils.py b/httpx/_utils.py index 072db3f1e8..149bea6fb7 100644 --- a/httpx/_utils.py +++ b/httpx/_utils.py @@ -56,7 +56,7 @@ def normalize_header_value( return value.encode(encoding or "ascii") -def str_query_param(value: "PrimitiveData") -> str: +def primitive_value_to_str(value: "PrimitiveData") -> str: """ Coerce a primitive data type into a string value for query params. diff --git a/tests/test_content.py b/tests/test_content.py index 384f9f2287..1dda028632 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -139,6 +139,57 @@ async def test_urlencoded_content(): assert async_content == b"Hello=world%21" +@pytest.mark.asyncio +async def test_urlencoded_boolean(): + headers, stream = encode_request(data={"example": True}) + assert isinstance(stream, typing.Iterable) + assert isinstance(stream, typing.AsyncIterable) + + sync_content = b"".join([part for part in stream]) + async_content = b"".join([part async for part in stream]) + + assert headers == { + "Content-Length": "12", + "Content-Type": "application/x-www-form-urlencoded", + } + assert sync_content == b"example=true" + assert async_content == b"example=true" + + +@pytest.mark.asyncio +async def test_urlencoded_none(): + headers, stream = encode_request(data={"example": None}) + assert isinstance(stream, typing.Iterable) + assert isinstance(stream, typing.AsyncIterable) + + sync_content = b"".join([part for part in stream]) + async_content = b"".join([part async for part in stream]) + + assert headers == { + "Content-Length": "8", + "Content-Type": "application/x-www-form-urlencoded", + } + assert sync_content == b"example=" + assert async_content == b"example=" + + +@pytest.mark.asyncio +async def test_urlencoded_list(): + headers, stream = encode_request(data={"example": ["a", 1, True]}) + assert isinstance(stream, typing.Iterable) + assert isinstance(stream, typing.AsyncIterable) + + sync_content = b"".join([part for part in stream]) + async_content = b"".join([part async for part in stream]) + + assert headers == { + "Content-Length": "32", + "Content-Type": "application/x-www-form-urlencoded", + } + assert sync_content == b"example=a&example=1&example=true" + assert async_content == b"example=a&example=1&example=true" + + @pytest.mark.asyncio async def test_multipart_files_content(): files = {"file": io.BytesIO(b"")} From 30dd7d3c7c7023fc516e65c7a223006f1238bed8 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Thu, 25 Mar 2021 16:47:24 +0000 Subject: [PATCH 3/4] Handle int, float, bool, and None for multipart or urlencoded data --- httpx/_multipart.py | 13 +++++++++---- tests/test_multipart.py | 6 +++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/httpx/_multipart.py b/httpx/_multipart.py index bf75a5663b..b5f8fb48f8 100644 --- a/httpx/_multipart.py +++ b/httpx/_multipart.py @@ -8,6 +8,7 @@ format_form_param, guess_content_type, peek_filelike_length, + primitive_value_to_str, to_bytes, ) @@ -17,17 +18,21 @@ class DataField: A single form field item, within a multipart form field. """ - def __init__(self, name: str, value: typing.Union[str, bytes]) -> None: + def __init__( + self, name: str, value: typing.Union[str, bytes, int, float, None] + ) -> None: if not isinstance(name, str): raise TypeError( f"Invalid type for name. Expected str, got {type(name)}: {name!r}" ) - if not isinstance(value, (str, bytes)): + if value is not None and not isinstance(value, (str, bytes, int, float)): raise TypeError( - f"Invalid type for value. Expected str or bytes, got {type(value)}: {value!r}" + f"Invalid type for value. Expected primitive type, got {type(value)}: {value!r}" ) self.name = name - self.value = value + self.value: typing.Union[str, bytes] = ( + value if isinstance(value, bytes) else primitive_value_to_str(value) + ) def render_headers(self) -> bytes: if not hasattr(self, "_headers"): diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 199af4b0a5..9eb62f785b 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -57,7 +57,7 @@ def test_multipart_invalid_key(key): assert repr(key) in str(e.value) -@pytest.mark.parametrize(("value"), (1, 2.3, None, [None, "abc"], {None: "abc"})) +@pytest.mark.parametrize(("value"), (object(), {"key": "value"})) def test_multipart_invalid_value(value): client = httpx.Client(transport=httpx.MockTransport(echo_request_content)) @@ -104,6 +104,8 @@ def test_multipart_encode(tmp_path: typing.Any) -> None: "b": b"C", "c": ["11", "22", "33"], "d": "", + "e": True, + "f": "", } files = {"file": ("name.txt", open(path, "rb"))} @@ -120,6 +122,8 @@ def test_multipart_encode(tmp_path: typing.Any) -> None: '--{0}\r\nContent-Disposition: form-data; name="c"\r\n\r\n22\r\n' '--{0}\r\nContent-Disposition: form-data; name="c"\r\n\r\n33\r\n' '--{0}\r\nContent-Disposition: form-data; name="d"\r\n\r\n\r\n' + '--{0}\r\nContent-Disposition: form-data; name="e"\r\n\r\ntrue\r\n' + '--{0}\r\nContent-Disposition: form-data; name="f"\r\n\r\n\r\n' '--{0}\r\nContent-Disposition: form-data; name="file";' ' filename="name.txt"\r\n' "Content-Type: text/plain\r\n\r\n\r\n" From b8d3f081db358cbbd5917b9dd61c2f8625d27781 Mon Sep 17 00:00:00 2001 From: Tom Christie Date: Fri, 26 Mar 2021 12:42:32 +0000 Subject: [PATCH 4/4] Update httpx/_utils.py Co-authored-by: Florimond Manca --- httpx/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/httpx/_utils.py b/httpx/_utils.py index 149bea6fb7..cf136a3ba8 100644 --- a/httpx/_utils.py +++ b/httpx/_utils.py @@ -58,7 +58,7 @@ def normalize_header_value( def primitive_value_to_str(value: "PrimitiveData") -> str: """ - Coerce a primitive data type into a string value for query params. + Coerce a primitive data type into a string value. Note that we prefer JSON-style 'true'/'false' for boolean values here. """