From 51544310b22af486a608c1a6f33bb5d73ee449a7 Mon Sep 17 00:00:00 2001 From: IlyaFaer Date: Mon, 8 Jul 2019 13:46:39 +0300 Subject: [PATCH] deflate-encoded content will fail md5 checksum #37 --- google/resumable_media/requests/download.py | 37 +++++++++++++++++++-- tests/unit/requests/test_download.py | 29 ++++++++++++++++ 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/google/resumable_media/requests/download.py b/google/resumable_media/requests/download.py index d0bcd990..73e99fd7 100644 --- a/google/resumable_media/requests/download.py +++ b/google/resumable_media/requests/download.py @@ -299,7 +299,7 @@ def _add_decoder(response_raw, md5_hash): This is so that we can intercept the compressed bytes before they are decoded. - Only patches if the content encoding is ``gzip``. + Only patches if the content encoding is ``gzip`` and ``deflated``. Args: response_raw (urllib3.response.HTTPResponse): The raw response for @@ -313,10 +313,13 @@ def _add_decoder(response_raw, md5_hash): since the caller will no longer need to hash to decoded bytes. """ encoding = response_raw.headers.get(u'content-encoding', u'').lower() - if encoding != u'gzip': + if encoding == u'gzip': + response_raw._decoder = _GzipDecoder(md5_hash) + elif encoding == u'deflate': + response_raw._decoder = _DeflateDecoder(md5_hash) + else: return md5_hash - response_raw._decoder = _GzipDecoder(md5_hash) return _DoNothingHash() @@ -346,3 +349,31 @@ def decompress(self, data): """ self._md5_hash.update(data) return super(_GzipDecoder, self).decompress(data) + + +class _DeflateDecoder(urllib3.response.DeflateDecoder): + """Custom subclass of ``urllib3`` decoder for ``deflate``-ed bytes. + + Allows an MD5 hash function to see the compressed bytes before they are + decoded. This way the hash of the compressed value can be computed. + + Args: + md5_hash (Union[_DoNothingHash, hashlib.md5]): A hash function which + will get updated when it encounters compressed bytes. + """ + + def __init__(self, md5_hash): + super(_DeflateDecoder, self).__init__() + self._md5_hash = md5_hash + + def decompress(self, data): + """Decompress the bytes. + + Args: + data (bytes): The compressed bytes to be decompressed. + + Returns: + bytes: The decompressed bytes from ``data``. + """ + self._md5_hash.update(data) + return super(_DeflateDecoder, self).decompress(data) diff --git a/tests/unit/requests/test_download.py b/tests/unit/requests/test_download.py index 4e1715cd..5103adad 100644 --- a/tests/unit/requests/test_download.py +++ b/tests/unit/requests/test_download.py @@ -383,6 +383,18 @@ def test_gzipped(self): assert isinstance(response_raw._decoder, download_mod._GzipDecoder) assert response_raw._decoder._md5_hash is mock.sentinel.md5_hash + def test_deflated(self): + headers = {u'content-encoding': u'deflate'} + response_raw = mock.Mock( + headers=headers, spec=[u'headers', u'_decoder']) + md5_hash = download_mod._add_decoder( + response_raw, mock.sentinel.md5_hash) + + assert md5_hash is not mock.sentinel.md5_hash + assert isinstance(md5_hash, download_mod._DoNothingHash) + assert isinstance(response_raw._decoder, download_mod._DeflateDecoder) + assert response_raw._decoder._md5_hash is mock.sentinel.md5_hash + class Test_GzipDecoder(object): @@ -401,6 +413,23 @@ def test_decompress(self): md5_hash.update.assert_called_once_with(data) +class Test_DeflateDecoder(object): + + def test_constructor(self): + decoder = download_mod._DeflateDecoder(mock.sentinel.md5_hash) + assert decoder._md5_hash is mock.sentinel.md5_hash + + def test_decompress(self): + data = b'\xc5\xee\xf7\xff\x00' + + md5_hash = mock.Mock(spec=['update']) + decoder = download_mod._DeflateDecoder(md5_hash) + result = decoder.decompress(data) + + assert result == b'' + md5_hash.update.assert_called_with(data) + + def _mock_response(status_code=http_client.OK, chunks=(), headers=None): if headers is None: headers = {}