From 6aba616923855d64f68d17b8424e10dd48e1dbec Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Thu, 1 Jun 2023 00:32:47 -0400 Subject: [PATCH] serialize: remove v1 and v2 formats (#310) * serialize: remove v1 and v2 formats Closes #309. * release_notes: document * serialize: remove old imports and helpers --- cachecontrol/serialize.py | 51 +++++++------------------------------ docs/release_notes.rst | 5 ++++ tests/test_serialization.py | 42 ++++++------------------------ 3 files changed, 22 insertions(+), 76 deletions(-) diff --git a/cachecontrol/serialize.py b/cachecontrol/serialize.py index eb95faa1..55ea3fe2 100644 --- a/cachecontrol/serialize.py +++ b/cachecontrol/serialize.py @@ -2,11 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -import base64 import io -import json -import pickle -import zlib from typing import IO, TYPE_CHECKING, Any, Mapping, Optional import msgpack @@ -17,17 +13,6 @@ from requests import PreparedRequest, Request -def _b64_decode_bytes(b: str) -> bytes: - return base64.b64decode(b.encode("ascii")) - - -def _b64_decode_str(s: str) -> str: - return _b64_decode_bytes(s).decode("utf8") - - -_default_body_read = object() - - class Serializer(object): def dumps( self, @@ -167,7 +152,7 @@ def _loads_v0( # The original legacy cache data. This doesn't contain enough # information to construct everything we need, so we'll treat this as # a miss. - return + return None def _loads_v1( self, @@ -175,12 +160,9 @@ def _loads_v1( data: bytes, body_file: Optional["IO[bytes]"] = None, ) -> Optional[HTTPResponse]: - try: - cached = pickle.loads(data) - except ValueError: - return None - - return self.prepare_response(request, cached, body_file) + # The "v1" pickled cache format. This is no longer supported + # for security reasons, so we treat it as a miss. + return None def _loads_v2( self, @@ -188,25 +170,10 @@ def _loads_v2( data: bytes, body_file: Optional["IO[bytes]"] = None, ) -> Optional[HTTPResponse]: - assert body_file is None - try: - cached = json.loads(zlib.decompress(data).decode("utf8")) - except (ValueError, zlib.error): - return None - - # We need to decode the items that we've base64 encoded - cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"]) - cached["response"]["headers"] = dict( - (_b64_decode_str(k), _b64_decode_str(v)) - for k, v in cached["response"]["headers"].items() - ) - cached["response"]["reason"] = _b64_decode_str(cached["response"]["reason"]) - cached["vary"] = dict( - (_b64_decode_str(k), _b64_decode_str(v) if v is not None else v) - for k, v in cached["vary"].items() - ) - - return self.prepare_response(request, cached, body_file) + # The "v2" compressed base64 cache format. + # This has been removed due to age and poor size/performance + # characteristics, so we treat it as a miss. + return None def _loads_v3( self, @@ -217,7 +184,7 @@ def _loads_v3( # Due to Python 2 encoding issues, it's impossible to know for sure # exactly how to load v3 entries, thus we'll treat these as a miss so # that they get rewritten out as v4 entries. - return + return None def _loads_v4( self, diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 480852e3..a1dbb36a 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -7,6 +7,11 @@ Release Notes =============== +Unreleased +========== + +* Support for old serialization formats has been removed. + 0.13.0 ====== diff --git a/tests/test_serialization.py b/tests/test_serialization.py index b22b249c..cc2b0b20 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -38,18 +38,17 @@ def test_load_by_version_v0(self): resp = self.serializer.loads(req, data) assert resp is None - def test_read_version_v1(self): + def test_load_by_version_v1(self): + data = b"cc=1,somedata" req = Mock() - resp = self.serializer._loads_v1(req, pickle.dumps(self.response_data)) - # We have to decode our urllib3 data back into a unicode string. - assert resp.data == "Hello World".encode("utf-8") + resp = self.serializer.loads(req, data) + assert resp is None - def test_read_version_v2(self): + def test_load_by_version_v2(self): + data = b"cc=2,somedata" req = Mock() - compressed_base64_json = b"x\x9c%O\xb9\n\x83@\x10\xfd\x97\xa9-\x92%E\x14R\xe4 +\x16\t\xe6\x10\xbb\xb0\xc7\xe0\x81\xb8\xb2\xbb*A\xfc\xf7\x8c\xa6|\xe7\xbc\x99\xc0\xa2\xebL\xeb\x10\xa2\t\xa4\xd1_\x88\xe0\xc93'\xf9\xbe\xc8X\xf8\x95<=@\x00\x1a\x95\xd1\xf8Q\xa6\xf5\xd8z\x88\xbc\xed1\x80\x12\x85F\xeb\x96h\xca\xc2^\xf3\xac\xd7\xe7\xed\x1b\xf3SC5\x04w\xfa\x1c\x8e\x92_;Y\x1c\x96\x9a\x94]k\xc1\xdf~u\xc7\xc9 \x8fDG\xa0\xe2\xac\x92\xbc\xa9\xc9\xf1\xc8\xcbQ\xe4I\xa3\xc6U\xb9_\x14\xbb\xbdh\xc2\x1c\xd0R\xe1LK$\xd9\x9c\x17\xbe\xa7\xc3l\xb3Y\x80\xad\x94\xff\x0b\x03\xed\xa9V\x17[2\x83\xb0\xf4\xd14\xcf?E\x03Im" - resp = self.serializer._loads_v2(req, compressed_base64_json) - # We have to decode our urllib3 data back into a unicode string. - assert resp.data == "Hello World".encode("utf-8") + resp = self.serializer.loads(req, data) + assert resp is None def test_load_by_version_v3(self): data = b"cc=3,somedata" @@ -63,31 +62,6 @@ def test_read_version_v4(self): # We have to decode our urllib3 data back into a unicode string. assert resp.data == "Hello World".encode("utf-8") - def test_read_v1_serialized_with_py2_TypeError(self): - # This tests how the code handles in reading data that was pickled - # with an old version of cachecontrol running under Python 2 - req = Mock() - py2_pickled_data = b"".join( - [ - b"(dp1\nS'response'\np2\n(dp3\nS'body'\np4\nS'Hello World'\n", - b"p5\nsS'version'\np6\nS'2'\nsS'status'\np7\nI200\n", - b"sS'reason'\np8\nS''\nsS'decode_content'\np9\nI01\n", - b"sS'strict'\np10\nS''\nsS'headers'\np11\n(dp12\n", - b"S'Content-Type'\np13\nS'text/plain'\np14\n", - b"sS'Cache-Control'\np15\nS'public'\np16\n", - b"sS'Expires'\np17\nS'87654'\np18\nsss.", - ] - ) - resp = self.serializer._loads_v1(req, py2_pickled_data) - # We have to decode our urllib3 data back into a unicode - # string. - assert resp.data == "Hello World".encode("utf-8") - - def test_read_v2_corrupted_cache(self): - # This should prevent a regression of bug #134 - req = Mock() - assert self.serializer._loads_v2(req, b"") is None - def test_read_latest_version_streamable(self, url): original_resp = requests.get(url, stream=True) req = original_resp.request