Skip to content

Commit

Permalink
serialize: remove v1 and v2 formats (#310)
Browse files Browse the repository at this point in the history
* serialize: remove v1 and v2 formats

Closes #309.

* release_notes: document

* serialize: remove old imports and helpers
  • Loading branch information
woodruffw authored Jun 1, 2023
1 parent 71e071c commit 6aba616
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 76 deletions.
51 changes: 9 additions & 42 deletions cachecontrol/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0

import base64
import io
import json
import pickle
import zlib
from typing import IO, TYPE_CHECKING, Any, Mapping, Optional

import msgpack
Expand All @@ -17,17 +13,6 @@
from requests import PreparedRequest, Request


def _b64_decode_bytes(b: str) -> bytes:
return base64.b64decode(b.encode("ascii"))


def _b64_decode_str(s: str) -> str:
return _b64_decode_bytes(s).decode("utf8")


_default_body_read = object()


class Serializer(object):
def dumps(
self,
Expand Down Expand Up @@ -167,46 +152,28 @@ def _loads_v0(
# The original legacy cache data. This doesn't contain enough
# information to construct everything we need, so we'll treat this as
# a miss.
return
return None

def _loads_v1(
self,
request: "Request",
data: bytes,
body_file: Optional["IO[bytes]"] = None,
) -> Optional[HTTPResponse]:
try:
cached = pickle.loads(data)
except ValueError:
return None

return self.prepare_response(request, cached, body_file)
# The "v1" pickled cache format. This is no longer supported
# for security reasons, so we treat it as a miss.
return None

def _loads_v2(
self,
request: "Request",
data: bytes,
body_file: Optional["IO[bytes]"] = None,
) -> Optional[HTTPResponse]:
assert body_file is None
try:
cached = json.loads(zlib.decompress(data).decode("utf8"))
except (ValueError, zlib.error):
return None

# We need to decode the items that we've base64 encoded
cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"])
cached["response"]["headers"] = dict(
(_b64_decode_str(k), _b64_decode_str(v))
for k, v in cached["response"]["headers"].items()
)
cached["response"]["reason"] = _b64_decode_str(cached["response"]["reason"])
cached["vary"] = dict(
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
for k, v in cached["vary"].items()
)

return self.prepare_response(request, cached, body_file)
# The "v2" compressed base64 cache format.
# This has been removed due to age and poor size/performance
# characteristics, so we treat it as a miss.
return None

def _loads_v3(
self,
Expand All @@ -217,7 +184,7 @@ def _loads_v3(
# Due to Python 2 encoding issues, it's impossible to know for sure
# exactly how to load v3 entries, thus we'll treat these as a miss so
# that they get rewritten out as v4 entries.
return
return None

def _loads_v4(
self,
Expand Down
5 changes: 5 additions & 0 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
Release Notes
===============

Unreleased
==========

* Support for old serialization formats has been removed.

0.13.0
======

Expand Down
42 changes: 8 additions & 34 deletions tests/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,17 @@ def test_load_by_version_v0(self):
resp = self.serializer.loads(req, data)
assert resp is None

def test_read_version_v1(self):
def test_load_by_version_v1(self):
data = b"cc=1,somedata"
req = Mock()
resp = self.serializer._loads_v1(req, pickle.dumps(self.response_data))
# We have to decode our urllib3 data back into a unicode string.
assert resp.data == "Hello World".encode("utf-8")
resp = self.serializer.loads(req, data)
assert resp is None

def test_read_version_v2(self):
def test_load_by_version_v2(self):
data = b"cc=2,somedata"
req = Mock()
compressed_base64_json = b"x\x9c%O\xb9\n\x83@\x10\xfd\x97\xa9-\x92%E\x14R\xe4 +\x16\t\xe6\x10\xbb\xb0\xc7\xe0\x81\xb8\xb2\xbb*A\xfc\xf7\x8c\xa6|\xe7\xbc\x99\xc0\xa2\xebL\xeb\x10\xa2\t\xa4\xd1_\x88\xe0\xc93'\xf9\xbe\xc8X\xf8\x95<=@\x00\x1a\x95\xd1\xf8Q\xa6\xf5\xd8z\x88\xbc\xed1\x80\x12\x85F\xeb\x96h\xca\xc2^\xf3\xac\xd7\xe7\xed\x1b\xf3SC5\x04w\xfa\x1c\x8e\x92_;Y\x1c\x96\x9a\x94]k\xc1\xdf~u\xc7\xc9 \x8fDG\xa0\xe2\xac\x92\xbc\xa9\xc9\xf1\xc8\xcbQ\xe4I\xa3\xc6U\xb9_\x14\xbb\xbdh\xc2\x1c\xd0R\xe1LK$\xd9\x9c\x17\xbe\xa7\xc3l\xb3Y\x80\xad\x94\xff\x0b\x03\xed\xa9V\x17[2\x83\xb0\xf4\xd14\xcf?E\x03Im"
resp = self.serializer._loads_v2(req, compressed_base64_json)
# We have to decode our urllib3 data back into a unicode string.
assert resp.data == "Hello World".encode("utf-8")
resp = self.serializer.loads(req, data)
assert resp is None

def test_load_by_version_v3(self):
data = b"cc=3,somedata"
Expand All @@ -63,31 +62,6 @@ def test_read_version_v4(self):
# We have to decode our urllib3 data back into a unicode string.
assert resp.data == "Hello World".encode("utf-8")

def test_read_v1_serialized_with_py2_TypeError(self):
# This tests how the code handles in reading data that was pickled
# with an old version of cachecontrol running under Python 2
req = Mock()
py2_pickled_data = b"".join(
[
b"(dp1\nS'response'\np2\n(dp3\nS'body'\np4\nS'Hello World'\n",
b"p5\nsS'version'\np6\nS'2'\nsS'status'\np7\nI200\n",
b"sS'reason'\np8\nS''\nsS'decode_content'\np9\nI01\n",
b"sS'strict'\np10\nS''\nsS'headers'\np11\n(dp12\n",
b"S'Content-Type'\np13\nS'text/plain'\np14\n",
b"sS'Cache-Control'\np15\nS'public'\np16\n",
b"sS'Expires'\np17\nS'87654'\np18\nsss.",
]
)
resp = self.serializer._loads_v1(req, py2_pickled_data)
# We have to decode our urllib3 data back into a unicode
# string.
assert resp.data == "Hello World".encode("utf-8")

def test_read_v2_corrupted_cache(self):
# This should prevent a regression of bug #134
req = Mock()
assert self.serializer._loads_v2(req, b"") is None

def test_read_latest_version_streamable(self, url):
original_resp = requests.get(url, stream=True)
req = original_resp.request
Expand Down

0 comments on commit 6aba616

Please sign in to comment.