Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GRMP: Fix/download forces decompression response #85

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 39 additions & 6 deletions google/resumable_media/requests/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,24 @@ def _write_to_stream(self, response):
with response:
# NOTE: This might "donate" ``md5_hash`` to the decoder and replace
# it with a ``_DoNothingHash``.
local_hash = _add_decoder(response.raw, md5_hash)
body_iter = response.iter_content(
chunk_size=_SINGLE_GET_CHUNK_SIZE, decode_unicode=False)
for chunk in body_iter:
self._stream.write(chunk)
local_hash.update(chunk)
encoding = response.headers.get(u'content-encoding', u'').lower()
accept_encoding = response.request.headers.get('accept-encoding',
u'').lower()
if encoding == 'gzip' and accept_encoding == 'gzip':
local_hash = md5_hash
response.raw.decode_content = False
body_iter = self._read_chunk_raw_response(
response, chunk_size=_SINGLE_GET_CHUNK_SIZE)
for chunk in body_iter:
self._stream.write(chunk)
local_hash.update(chunk)
else:
local_hash = _add_decoder(response.raw, md5_hash)
body_iter = response.iter_content(
chunk_size=_SINGLE_GET_CHUNK_SIZE, decode_unicode=False)
for chunk in body_iter:
self._stream.write(chunk)
local_hash.update(chunk)

if expected_md5_hash is None:
return
Expand Down Expand Up @@ -175,6 +187,27 @@ def consume(self, transport):

return result

@staticmethod
def _read_chunk_raw_response(response, chunk_size):
"""Iterates over the response data.
This avoids reading the content at once into memory for
large responses.The chunk size is the number of bytes it should
read into memory.

Args:
response (~requests.Response): The HTTP response object.
chunk_size (int): The number of bytes to be retrieved.

Returns:
A sequence of data describing response data
"""

while True:
chunk = response.raw.read(chunk_size)
if not chunk:
break
yield chunk


class ChunkedDownload(_helpers.RequestsMixin, _download.ChunkedDownload):
"""Download a resource in chunks from a Google API.
Expand Down
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
GOOGLE_AUTH = 'google-auth >= 0.10.0'


@nox.session(python=['2,7', '3.4', '3.5', '3.6', '3.7'])
@nox.session(python=['2.7', '3.4', '3.5', '3.6', '3.7'])
def unit_tests(session):
"""Run the unit test suite."""

Expand Down
83 changes: 83 additions & 0 deletions tests/unit/requests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,38 @@ def test__write_to_stream_with_hash_check_success(self):
chunk_size=download_mod._SINGLE_GET_CHUNK_SIZE,
decode_unicode=False)

def test__write_to_stream_with_raw_response(self):
stream = io.BytesIO()
download = download_mod.Download(EXAMPLE_URL, stream=stream)

chunk1 = b'\x1f\x8b\x08\x08?"\xeaY\x02\xfftmpBazYVY\x00KLJNIMK\xcf' \
b'\xc8\xcc\xca\xce\xc9\xcd\xcb/(,*.)-+\xaf\xa8'
chunk2 = b'\xac20426153\xb7\xb0\xe4J\x1cU4\xaahT\xd1\xa8\xa2QE\xa3' \
b'\x8aF\x15\x8d*'
chunk3 = b'\x1aU\x04S\x04\x00\x04P\xf8\xea@\t\x00\x00'
header_value = u'crc32c=qmNCyg==,md5=KHRs/+ZSrc/FuuR4qz/PZQ=='
headers = {download_mod._HASH_HEADER: header_value,
'content-encoding': 'gzip'}

response = _mock_response(chunks=[chunk1, chunk2, chunk3],
headers=headers)

value = iter([chunk1, chunk2, chunk3])
mock_patch = mock.patch(u'google.resumable_media.requests.download.'
u'Download._read_chunk_raw_response',
return_value=value)

with mock_patch as mock_method:
ret_val = download._write_to_stream(response)
assert ret_val is None

assert stream.getvalue() == chunk1 + chunk2 + chunk3

response.__enter__.assert_called_once_with()
response.__exit__.assert_called_once_with(None, None, None)
mock_method.assert_called_once_with(
response, chunk_size=download_mod._SINGLE_GET_CHUNK_SIZE)

def test__write_to_stream_with_hash_check_fail(self):
stream = io.BytesIO()
download = download_mod.Download(EXAMPLE_URL, stream=stream)
Expand Down Expand Up @@ -131,6 +163,28 @@ def test__write_to_stream_with_hash_check_fail(self):
chunk_size=download_mod._SINGLE_GET_CHUNK_SIZE,
decode_unicode=False)

def test__read_chunk_raw_response(self):
import types
stream = io.BytesIO()
download = download_mod.Download(EXAMPLE_URL, stream=stream)
chunk = b'\x1aU\x04S\x04\x00\x04P\xf8\xea@\t\x00\x00'
response = _mock_raw_response(chunk=chunk)
body_iter = download._read_chunk_raw_response(
response, chunk_size=download_mod._SINGLE_GET_CHUNK_SIZE)
assert isinstance(body_iter, types.GeneratorType)
i = next(body_iter)
assert i == chunk

def test__read_chunk_raw_none_response(self):
stream = io.BytesIO()
download = download_mod.Download(EXAMPLE_URL, stream=stream)
response = _mock_raw_response()
body_iter = download._read_chunk_raw_response(
response, chunk_size=download_mod._SINGLE_GET_CHUNK_SIZE)

i = list(body_iter)
assert len(i) == 0

def _consume_helper(
self, stream=None, end=65536, headers=None, chunks=(),
response_headers=None):
Expand Down Expand Up @@ -418,12 +472,14 @@ def _mock_response(status_code=http_client.OK, chunks=(), headers=None):
u'status_code',
u'headers',
u'raw',
u'request',
],
)
# i.e. context manager returns ``self``.
response.__enter__.return_value = response
response.__exit__.return_value = None
response.iter_content.return_value = iter(chunks)
response.request.headers = {'accept-encoding': 'gzip'}
return response
else:
return mock.Mock(
Expand All @@ -434,3 +490,30 @@ def _mock_response(status_code=http_client.OK, chunks=(), headers=None):
u'headers',
],
)


def read_row(chunk):
read_chunk = mock.Mock(return_value=chunk)
return mock.Mock(read=read_chunk)


def _mock_raw_response(status_code=http_client.OK, chunk=b''):
mock_raw = read_row(chunk)
response = mock.MagicMock(
status_code=int(status_code),
raw=mock_raw,
spec=[
u'__enter__',
u'__exit__',
u'iter_content',
u'status_code',
u'headers',
u'raw',
u'request',
],
)
# i.e. context manager returns ``self``.
response.__enter__.return_value = response
response.__exit__.return_value = None
response.request.headers = {'accept-encoding': 'gzip'}
return response