Fix handling of chunked+gzipped response when first chunk does not gi…

…ve uncompressed data (#3477) (#3485)
aio-libs · Jan 5, 2019 · a929c06 · a929c06
1 parent 10a9295
commit a929c06
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 8 deletions.
diff --git a/CHANGES/3477.bugfix b/CHANGES/3477.bugfix
@@ -0,0 +1 @@
+Fix handling of chunked+gzipped response when first chunk does not give uncompressed data
diff --git a/aiohttp/streams.py b/aiohttp/streams.py
@@ -248,21 +248,38 @@ def feed_data(self, data: bytes, size: int=0) -> None:
 
     def begin_http_chunk_receiving(self) -> None:
         if self._http_chunk_splits is None:
+            if self.total_bytes:
+                raise RuntimeError("Called begin_http_chunk_receiving when"
+                                   "some data was already fed")
             self._http_chunk_splits = []
 
     def end_http_chunk_receiving(self) -> None:
         if self._http_chunk_splits is None:
             raise RuntimeError("Called end_chunk_receiving without calling "
                                "begin_chunk_receiving first")
-        if not self._http_chunk_splits or \
-                self._http_chunk_splits[-1] != self.total_bytes:
-            self._http_chunk_splits.append(self.total_bytes)
 
-            # wake up readchunk when end of http chunk received
-            waiter = self._waiter
-            if waiter is not None:
-                self._waiter = None
-                set_result(waiter, False)
+        # self._http_chunk_splits contains logical byte offsets from start of
+        # the body transfer. Each offset is the offset of the end of a chunk.
+        # "Logical" means bytes, accessible for a user.
+        # If no chunks containig logical data were received, current position
+        # is difinitely zero.
+        pos = self._http_chunk_splits[-1] if self._http_chunk_splits else 0
+
+        if self.total_bytes == pos:
+            # We should not add empty chunks here. So we check for that.
+            # Note, when chunked + gzip is used, we can receive a chunk
+            # of compressed data, but that data may not be enough for gzip FSM
+            # to yield any uncompressed data. That's why current position may
+            # not change after receiving a chunk.
+            return
+
+        self._http_chunk_splits.append(self.total_bytes)
+
+        # wake up readchunk when end of http chunk received
+        waiter = self._waiter
+        if waiter is not None:
+            self._waiter = None
+            set_result(waiter, False)
 
     async def _wait(self, func_name: str) -> None:
         # StreamReader uses a future to link the protocol feed_data() method

diff --git a/tests/test_streams.py b/tests/test_streams.py
@@ -719,6 +719,35 @@ async def test_readchunk_with_other_read_calls(self) -> None:
         assert b'' == data
         assert not end_of_chunk
 
+    async def test_read_empty_chunks(self) -> None:
+        """Test that feeding empty chunks does not break stream"""
+        stream = self._make_one()
+
+        # Simulate empty first chunk. This is significant special case
+        stream.begin_http_chunk_receiving()
+        stream.end_http_chunk_receiving()
+
+        stream.begin_http_chunk_receiving()
+        stream.feed_data(b'ungzipped')
+        stream.end_http_chunk_receiving()
+
+        # Possible when compression is enabled.
+        stream.begin_http_chunk_receiving()
+        stream.end_http_chunk_receiving()
+
+        # is also possible
+        stream.begin_http_chunk_receiving()
+        stream.end_http_chunk_receiving()
+
+        stream.begin_http_chunk_receiving()
+        stream.feed_data(b' data')
+        stream.end_http_chunk_receiving()
+
+        stream.feed_eof()
+
+        data = await stream.read()
+        assert data == b'ungzipped data'
+
     async def test_readchunk_separate_http_chunk_tail(self) -> None:
         """Test that stream.readchunk returns (b'', True) when end of
         http chunk received after body
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fix handling of chunked+gzipped response when first chunk does not give uncompressed data