diff --git a/CHANGES/3477.bugfix b/CHANGES/3477.bugfix new file mode 100644 index 00000000000..ae25f225d8d --- /dev/null +++ b/CHANGES/3477.bugfix @@ -0,0 +1 @@ +Fix handling of chunked+gzipped response when first chunk does not give uncompressed data diff --git a/aiohttp/streams.py b/aiohttp/streams.py index 9dd08794fa3..4bb96ab6e82 100644 --- a/aiohttp/streams.py +++ b/aiohttp/streams.py @@ -248,21 +248,38 @@ def feed_data(self, data: bytes, size: int=0) -> None: def begin_http_chunk_receiving(self) -> None: if self._http_chunk_splits is None: + if self.total_bytes: + raise RuntimeError("Called begin_http_chunk_receiving when" + "some data was already fed") self._http_chunk_splits = [] def end_http_chunk_receiving(self) -> None: if self._http_chunk_splits is None: raise RuntimeError("Called end_chunk_receiving without calling " "begin_chunk_receiving first") - if not self._http_chunk_splits or \ - self._http_chunk_splits[-1] != self.total_bytes: - self._http_chunk_splits.append(self.total_bytes) - # wake up readchunk when end of http chunk received - waiter = self._waiter - if waiter is not None: - self._waiter = None - set_result(waiter, False) + # self._http_chunk_splits contains logical byte offsets from start of + # the body transfer. Each offset is the offset of the end of a chunk. + # "Logical" means bytes, accessible for a user. + # If no chunks containig logical data were received, current position + # is difinitely zero. + pos = self._http_chunk_splits[-1] if self._http_chunk_splits else 0 + + if self.total_bytes == pos: + # We should not add empty chunks here. So we check for that. + # Note, when chunked + gzip is used, we can receive a chunk + # of compressed data, but that data may not be enough for gzip FSM + # to yield any uncompressed data. That's why current position may + # not change after receiving a chunk. + return + + self._http_chunk_splits.append(self.total_bytes) + + # wake up readchunk when end of http chunk received + waiter = self._waiter + if waiter is not None: + self._waiter = None + set_result(waiter, False) async def _wait(self, func_name: str) -> None: # StreamReader uses a future to link the protocol feed_data() method diff --git a/tests/test_streams.py b/tests/test_streams.py index 601efca083a..4d4b8357ce4 100644 --- a/tests/test_streams.py +++ b/tests/test_streams.py @@ -719,6 +719,35 @@ async def test_readchunk_with_other_read_calls(self) -> None: assert b'' == data assert not end_of_chunk + async def test_read_empty_chunks(self) -> None: + """Test that feeding empty chunks does not break stream""" + stream = self._make_one() + + # Simulate empty first chunk. This is significant special case + stream.begin_http_chunk_receiving() + stream.end_http_chunk_receiving() + + stream.begin_http_chunk_receiving() + stream.feed_data(b'ungzipped') + stream.end_http_chunk_receiving() + + # Possible when compression is enabled. + stream.begin_http_chunk_receiving() + stream.end_http_chunk_receiving() + + # is also possible + stream.begin_http_chunk_receiving() + stream.end_http_chunk_receiving() + + stream.begin_http_chunk_receiving() + stream.feed_data(b' data') + stream.end_http_chunk_receiving() + + stream.feed_eof() + + data = await stream.read() + assert data == b'ungzipped data' + async def test_readchunk_separate_http_chunk_tail(self) -> None: """Test that stream.readchunk returns (b'', True) when end of http chunk received after body