Skip to content

Commit

Permalink
deflate-encoded content will fail md5 checksum googleapis#37
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyaFaer committed Jul 8, 2019
1 parent 34a36be commit 5154431
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 3 deletions.
37 changes: 34 additions & 3 deletions google/resumable_media/requests/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def _add_decoder(response_raw, md5_hash):
This is so that we can intercept the compressed bytes before they are
decoded.
Only patches if the content encoding is ``gzip``.
Only patches if the content encoding is ``gzip`` and ``deflated``.
Args:
response_raw (urllib3.response.HTTPResponse): The raw response for
Expand All @@ -313,10 +313,13 @@ def _add_decoder(response_raw, md5_hash):
since the caller will no longer need to hash to decoded bytes.
"""
encoding = response_raw.headers.get(u'content-encoding', u'').lower()
if encoding != u'gzip':
if encoding == u'gzip':
response_raw._decoder = _GzipDecoder(md5_hash)
elif encoding == u'deflate':
response_raw._decoder = _DeflateDecoder(md5_hash)
else:
return md5_hash

response_raw._decoder = _GzipDecoder(md5_hash)
return _DoNothingHash()


Expand Down Expand Up @@ -346,3 +349,31 @@ def decompress(self, data):
"""
self._md5_hash.update(data)
return super(_GzipDecoder, self).decompress(data)


class _DeflateDecoder(urllib3.response.DeflateDecoder):
"""Custom subclass of ``urllib3`` decoder for ``deflate``-ed bytes.
Allows an MD5 hash function to see the compressed bytes before they are
decoded. This way the hash of the compressed value can be computed.
Args:
md5_hash (Union[_DoNothingHash, hashlib.md5]): A hash function which
will get updated when it encounters compressed bytes.
"""

def __init__(self, md5_hash):
super(_DeflateDecoder, self).__init__()
self._md5_hash = md5_hash

def decompress(self, data):
"""Decompress the bytes.
Args:
data (bytes): The compressed bytes to be decompressed.
Returns:
bytes: The decompressed bytes from ``data``.
"""
self._md5_hash.update(data)
return super(_DeflateDecoder, self).decompress(data)
29 changes: 29 additions & 0 deletions tests/unit/requests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,18 @@ def test_gzipped(self):
assert isinstance(response_raw._decoder, download_mod._GzipDecoder)
assert response_raw._decoder._md5_hash is mock.sentinel.md5_hash

def test_deflated(self):
headers = {u'content-encoding': u'deflate'}
response_raw = mock.Mock(
headers=headers, spec=[u'headers', u'_decoder'])
md5_hash = download_mod._add_decoder(
response_raw, mock.sentinel.md5_hash)

assert md5_hash is not mock.sentinel.md5_hash
assert isinstance(md5_hash, download_mod._DoNothingHash)
assert isinstance(response_raw._decoder, download_mod._DeflateDecoder)
assert response_raw._decoder._md5_hash is mock.sentinel.md5_hash


class Test_GzipDecoder(object):

Expand All @@ -401,6 +413,23 @@ def test_decompress(self):
md5_hash.update.assert_called_once_with(data)


class Test_DeflateDecoder(object):

def test_constructor(self):
decoder = download_mod._DeflateDecoder(mock.sentinel.md5_hash)
assert decoder._md5_hash is mock.sentinel.md5_hash

def test_decompress(self):
data = b'\xc5\xee\xf7\xff\x00'

md5_hash = mock.Mock(spec=['update'])
decoder = download_mod._DeflateDecoder(md5_hash)
result = decoder.decompress(data)

assert result == b''
md5_hash.update.assert_called_with(data)


def _mock_response(status_code=http_client.OK, chunks=(), headers=None):
if headers is None:
headers = {}
Expand Down

0 comments on commit 5154431

Please sign in to comment.