Skip to content

Commit

Permalink
fix more boundary conditions
Browse files Browse the repository at this point in the history
don't miss boundary in small chunks; don't hang if boundary not found till eof

sometimes small multipart body not read at all

small fix for Stream mock
  • Loading branch information
tumb1er committed Feb 1, 2016
1 parent 0ef2382 commit 6edd825
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 2 deletions.
10 changes: 8 additions & 2 deletions aiohttp/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def __init__(self, boundary, headers, content):
self._read_bytes = 0
self._unread = deque()
self._prev_chunk = None
self._content_eof = 0

@asyncio.coroutine
def __aiter__(self):
Expand Down Expand Up @@ -310,13 +311,14 @@ def _read_chunk_from_stream(self, size):
self._prev_chunk = yield from self._content.read(size)

chunk = yield from self._content.read(size)

self._content_eof += int(self._content.at_eof())
assert self._content_eof < 3, "Reading after EOF"

This comment has been minimized.

Copy link
@malthe

malthe Feb 6, 2019

I have seen this assertion fail in real practice.

This comment has been minimized.

Copy link
@kxepal

kxepal Feb 6, 2019

Member

That's fine if something went wrong. Do you have raw example for such response? And which version of aiohttp are you using?

This comment has been minimized.

Copy link
@malthe

malthe Feb 6, 2019

I'm not sure what went wrong but ideally this sort of connection problem should be something I can handle on a higher level by catching e.g. ConnectionError – not just an opaque assertion.

This comment has been minimized.

Copy link
@kxepal

kxepal Feb 6, 2019

Member

Yeah, if connection get broken in the middle and there is something in buffers, then multipart parsing may be incomplete which causes errors. Not sure how it will be easy to distinguish this case from malformed data without having full copy of response in memory (which breaks multipart streaming feature).

This comment has been minimized.

Copy link
@malthe

malthe Feb 6, 2019

To me an assertion is something that is always true. If it turns out it's sometimes false then it's an error mode and we should just raise ConnectionError("Unexpected data (%d bytes)" % num_bytes) (or whatever is reasonable).

This comment has been minimized.

Copy link
@kxepal

kxepal Feb 6, 2019

Member

That's right - it assumes to be so all the time. If it's not, then something when wrong and that's wrong most of times about parser logic.

window = self._prev_chunk + chunk
sub = b'\r\n' + self._boundary
if first_chunk:
idx = window.find(sub)
else:
idx = window.find(sub, len(self._prev_chunk) - len(sub))
idx = window.find(sub, max(0, len(self._prev_chunk) - len(sub)))
if idx >= 0:
# pushing boundary back to content
self._content.unread_data(window[idx:])
Expand All @@ -325,6 +327,10 @@ def _read_chunk_from_stream(self, size):
chunk = window[len(self._prev_chunk):idx]
if not chunk:
self._at_eof = True
if 0 < len(chunk) < len(sub) and not self._content_eof:
self._prev_chunk += chunk
self._at_eof = False
return b''
result = self._prev_chunk
self._prev_chunk = chunk
return result
Expand Down
44 changes: 44 additions & 0 deletions tests/test_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ def __init__(self, content):
def read(self, size=None):
return self.content.read(size)

def at_eof(self):
return self.content.tell() == len(self.content.getbuffer())

@asyncio.coroutine
def readline(self):
return self.content.readline()
Expand Down Expand Up @@ -191,6 +194,47 @@ def prepare(data):
c3 = yield from obj.read_chunk(8)
self.assertEqual(c3, b'!')

def test_read_all_at_once(self):
stream = Stream(b'Hello, World!\r\n--:--\r\n')
obj = aiohttp.multipart.BodyPartReader(self.boundary, {}, stream)
result = yield from obj.read_chunk()
self.assertEqual(b'Hello, World!', result)
result = yield from obj.read_chunk()
self.assertEqual(b'', result)
self.assertTrue(obj.at_eof())

def test_read_incomplete_body_chunked(self):
stream = Stream(b'Hello, World!\r\n-')
obj = aiohttp.multipart.BodyPartReader(self.boundary, {}, stream)
result = b''
with self.assertRaises(AssertionError):
for _ in range(4):
result += yield from obj.read_chunk(7)
self.assertEqual(b'Hello, World!\r\n-', result)

def test_read_boundary_with_incomplete_chunk(self):
stream = Stream(b'')

def prepare(data):
f = asyncio.Future(loop=self.loop)
f.set_result(data)
return f

with mock.patch.object(stream, 'read', side_effect=[
prepare(b'Hello, World'),
prepare(b'!\r\n'),
prepare(b'--:'),
prepare(b'')
]):
obj = aiohttp.multipart.BodyPartReader(
self.boundary, {}, stream)
c1 = yield from obj.read_chunk(12)
self.assertEqual(c1, b'Hello, World')
c2 = yield from obj.read_chunk(8)
self.assertEqual(c2, b'!')
c3 = yield from obj.read_chunk(8)
self.assertEqual(c3, b'')

def test_multi_read_chunk(self):
stream = Stream(b'Hello,\r\n--:\r\n\r\nworld!\r\n--:--')
obj = aiohttp.multipart.BodyPartReader(self.boundary, {}, stream)
Expand Down

0 comments on commit 6edd825

Please sign in to comment.