serialize: remove v1 and v2 formats (#310)

* serialize: remove v1 and v2 formats Closes #309. * release_notes: document * serialize: remove old imports and helpers
psf · Jun 1, 2023 · 6aba616 · 6aba616
1 parent 71e071c
commit 6aba616
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 76 deletions.
diff --git a/cachecontrol/serialize.py b/cachecontrol/serialize.py
@@ -2,11 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import base64
 import io
-import json
-import pickle
-import zlib
 from typing import IO, TYPE_CHECKING, Any, Mapping, Optional
 
 import msgpack
@@ -17,17 +13,6 @@
     from requests import PreparedRequest, Request
 
 
-def _b64_decode_bytes(b: str) -> bytes:
-    return base64.b64decode(b.encode("ascii"))
-
-
-def _b64_decode_str(s: str) -> str:
-    return _b64_decode_bytes(s).decode("utf8")
-
-
-_default_body_read = object()
-
-
 class Serializer(object):
     def dumps(
         self,
@@ -167,46 +152,28 @@ def _loads_v0(
         # The original legacy cache data. This doesn't contain enough
         # information to construct everything we need, so we'll treat this as
         # a miss.
-        return
+        return None
 
     def _loads_v1(
         self,
         request: "Request",
         data: bytes,
         body_file: Optional["IO[bytes]"] = None,
     ) -> Optional[HTTPResponse]:
-        try:
-            cached = pickle.loads(data)
-        except ValueError:
-            return None
-
-        return self.prepare_response(request, cached, body_file)
+        # The "v1" pickled cache format. This is no longer supported
+        # for security reasons, so we treat it as a miss.
+        return None
 
     def _loads_v2(
         self,
         request: "Request",
         data: bytes,
         body_file: Optional["IO[bytes]"] = None,
     ) -> Optional[HTTPResponse]:
-        assert body_file is None
-        try:
-            cached = json.loads(zlib.decompress(data).decode("utf8"))
-        except (ValueError, zlib.error):
-            return None
-
-        # We need to decode the items that we've base64 encoded
-        cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"])
-        cached["response"]["headers"] = dict(
-            (_b64_decode_str(k), _b64_decode_str(v))
-            for k, v in cached["response"]["headers"].items()
-        )
-        cached["response"]["reason"] = _b64_decode_str(cached["response"]["reason"])
-        cached["vary"] = dict(
-            (_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
-            for k, v in cached["vary"].items()
-        )
-
-        return self.prepare_response(request, cached, body_file)
+        # The "v2" compressed base64 cache format.
+        # This has been removed due to age and poor size/performance
+        # characteristics, so we treat it as a miss.
+        return None
 
     def _loads_v3(
         self,
@@ -217,7 +184,7 @@ def _loads_v3(
         # Due to Python 2 encoding issues, it's impossible to know for sure
         # exactly how to load v3 entries, thus we'll treat these as a miss so
         # that they get rewritten out as v4 entries.
-        return
+        return None
 
     def _loads_v4(
         self,

diff --git a/docs/release_notes.rst b/docs/release_notes.rst
@@ -7,6 +7,11 @@
  Release Notes
 ===============
 
+Unreleased
+==========
+
+* Support for old serialization formats has been removed.
+
 0.13.0
 ======
 

diff --git a/tests/test_serialization.py b/tests/test_serialization.py
@@ -38,18 +38,17 @@ def test_load_by_version_v0(self):
         resp = self.serializer.loads(req, data)
         assert resp is None
 
-    def test_read_version_v1(self):
+    def test_load_by_version_v1(self):
+        data = b"cc=1,somedata"
         req = Mock()
-        resp = self.serializer._loads_v1(req, pickle.dumps(self.response_data))
-        # We have to decode our urllib3 data back into a unicode string.
-        assert resp.data == "Hello World".encode("utf-8")
+        resp = self.serializer.loads(req, data)
+        assert resp is None
 
-    def test_read_version_v2(self):
+    def test_load_by_version_v2(self):
+        data = b"cc=2,somedata"
         req = Mock()
-        compressed_base64_json = b"x\x9c%O\xb9\n\x83@\x10\xfd\x97\xa9-\x92%E\x14R\xe4 +\x16\t\xe6\x10\xbb\xb0\xc7\xe0\x81\xb8\xb2\xbb*A\xfc\xf7\x8c\xa6|\xe7\xbc\x99\xc0\xa2\xebL\xeb\x10\xa2\t\xa4\xd1_\x88\xe0\xc93'\xf9\xbe\xc8X\xf8\x95<=@\x00\x1a\x95\xd1\xf8Q\xa6\xf5\xd8z\x88\xbc\xed1\x80\x12\x85F\xeb\x96h\xca\xc2^\xf3\xac\xd7\xe7\xed\x1b\xf3SC5\x04w\xfa\x1c\x8e\x92_;Y\x1c\x96\x9a\x94]k\xc1\xdf~u\xc7\xc9 \x8fDG\xa0\xe2\xac\x92\xbc\xa9\xc9\xf1\xc8\xcbQ\xe4I\xa3\xc6U\xb9_\x14\xbb\xbdh\xc2\x1c\xd0R\xe1LK$\xd9\x9c\x17\xbe\xa7\xc3l\xb3Y\x80\xad\x94\xff\x0b\x03\xed\xa9V\x17[2\x83\xb0\xf4\xd14\xcf?E\x03Im"
-        resp = self.serializer._loads_v2(req, compressed_base64_json)
-        # We have to decode our urllib3 data back into a unicode string.
-        assert resp.data == "Hello World".encode("utf-8")
+        resp = self.serializer.loads(req, data)
+        assert resp is None
 
     def test_load_by_version_v3(self):
         data = b"cc=3,somedata"
@@ -63,31 +62,6 @@ def test_read_version_v4(self):
         # We have to decode our urllib3 data back into a unicode string.
         assert resp.data == "Hello World".encode("utf-8")
 
-    def test_read_v1_serialized_with_py2_TypeError(self):
-        # This tests how the code handles in reading data that was pickled
-        # with an old version of cachecontrol running under Python 2
-        req = Mock()
-        py2_pickled_data = b"".join(
-            [
-                b"(dp1\nS'response'\np2\n(dp3\nS'body'\np4\nS'Hello World'\n",
-                b"p5\nsS'version'\np6\nS'2'\nsS'status'\np7\nI200\n",
-                b"sS'reason'\np8\nS''\nsS'decode_content'\np9\nI01\n",
-                b"sS'strict'\np10\nS''\nsS'headers'\np11\n(dp12\n",
-                b"S'Content-Type'\np13\nS'text/plain'\np14\n",
-                b"sS'Cache-Control'\np15\nS'public'\np16\n",
-                b"sS'Expires'\np17\nS'87654'\np18\nsss.",
-            ]
-        )
-        resp = self.serializer._loads_v1(req, py2_pickled_data)
-        # We have to decode our urllib3 data back into a unicode
-        # string.
-        assert resp.data == "Hello World".encode("utf-8")
-
-    def test_read_v2_corrupted_cache(self):
-        # This should prevent a regression of bug #134
-        req = Mock()
-        assert self.serializer._loads_v2(req, b"") is None
-
     def test_read_latest_version_streamable(self, url):
         original_resp = requests.get(url, stream=True)
         req = original_resp.request