Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configurable Serialization #63

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
Version 0.5.0
-------------

Unreleased

- Cache types now have configurable serializers. :pr:`63`


Version 0.4.1
-------------

Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ testpaths = tests
filterwarnings =
error
default::DeprecationWarning:cachelib.uwsgi
default::DeprecationWarning:cachelib.redis

[coverage:run]
branch = True
Expand Down
23 changes: 13 additions & 10 deletions src/cachelib/file.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import errno
import logging
import os
import pickle
import tempfile
import typing as _t
from hashlib import md5
from pathlib import Path
from time import time

from cachelib.base import BaseCache
from cachelib.serializers import FileSystemSerializer


class FileSystemCache(BaseCache):
Expand All @@ -32,6 +32,8 @@ class FileSystemCache(BaseCache):
#: keep amount of files in a cache element
_fs_count_file = "__wz_cache_count"

serializer = FileSystemSerializer()

def __init__(
self,
cache_dir: str,
Expand Down Expand Up @@ -96,7 +98,8 @@ def _remove_expired(self, now: float) -> None:
for fname in self._list_dir():
try:
with open(fname, "rb") as f:
expires = pickle.load(f)
expires = self.serializer.load(f)
print(expires)
if expires != 0 and expires < now:
os.remove(fname)
self._update_count(delta=-1)
Expand All @@ -114,7 +117,7 @@ def _remove_older(self) -> bool:
for fname in self._list_dir():
try:
with open(fname, "rb") as f:
exp_fname_tuples.append((pickle.load(f), fname))
exp_fname_tuples.append((self.serializer.load(f), fname))
except FileNotFoundError:
pass
except (OSError, EOFError):
Expand Down Expand Up @@ -181,12 +184,12 @@ def get(self, key: str) -> _t.Any:
filename = self._get_filename(key)
try:
with open(filename, "rb") as f:
pickle_time = pickle.load(f)
pickle_time = self.serializer.load(f)
if pickle_time == 0 or pickle_time >= time():
return pickle.load(f)
return self.serializer.load(f)
except FileNotFoundError:
pass
except (OSError, EOFError, pickle.PickleError):
except (OSError, EOFError):
logging.warning(
"Exception raised while handling cache file '%s'",
filename,
Expand Down Expand Up @@ -223,8 +226,8 @@ def set(
suffix=self._fs_transaction_suffix, dir=self._path
)
with os.fdopen(fd, "wb") as f:
pickle.dump(timeout, f, 1)
pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
self.serializer.dump(timeout, f) # this returns bool
self.serializer.dump(value, f)
os.replace(tmp, filename)
os.chmod(filename, self._mode)
fsize = Path(filename).stat().st_size
Expand Down Expand Up @@ -259,14 +262,14 @@ def has(self, key: str) -> bool:
filename = self._get_filename(key)
try:
with open(filename, "rb") as f:
pickle_time = pickle.load(f)
pickle_time = self.serializer.load(f)
if pickle_time == 0 or pickle_time >= time():
return True
else:
return False
except FileNotFoundError: # if there is no file there is no key
return False
except (OSError, EOFError, pickle.PickleError):
except (OSError, EOFError):
logging.warning(
"Exception raised while handling cache file '%s'",
filename,
Expand Down
44 changes: 20 additions & 24 deletions src/cachelib/redis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pickle
import typing as _t
import warnings

from cachelib.base import BaseCache
from cachelib.serializers import RedisSerializer


class RedisCache(BaseCache):
Expand All @@ -26,6 +27,8 @@ class RedisCache(BaseCache):
Any additional keyword arguments will be passed to ``redis.Redis``.
"""

serializer = RedisSerializer()

def __init__(
self,
host: _t.Any = "localhost",
Expand Down Expand Up @@ -60,29 +63,22 @@ def _normalize_timeout(self, timeout: _t.Optional[int]) -> int:
return timeout

def dump_object(self, value: _t.Any) -> bytes:
"""Dumps an object into a string for redis. By default it serializes
integers as regular string and pickle dumps everything else.
"""
if isinstance(type(value), int):
return str(value).encode("ascii")
return b"!" + pickle.dumps(value)

def load_object(self, value: _t.Optional[bytes]) -> _t.Any:
"""The reversal of :meth:`dump_object`. This might be called with
None.
"""
if value is None:
return None
if value.startswith(b"!"):
try:
return pickle.loads(value[1:])
except pickle.PickleError:
return None
try:
return int(value)
except ValueError:
# before 0.8 we did not have serialization. Still support that.
return value
warnings.warn(
"'dump_object' is deprecated and will be removed in the future."
"This is a proxy call to 'RedisCache.serializer.dumps'",
DeprecationWarning,
stacklevel=2,
)
return self.serializer.dumps(value)

def load_object(self, value: _t.Any) -> _t.Any:
warnings.warn(
"'load_object' is deprecated and will be removed in the future."
"This is a proxy call to 'RedisCache.serializer.loads'",
DeprecationWarning,
stacklevel=2,
)
return self.serializer.loads(value)

def get(self, key: str) -> _t.Any:
return self.load_object(self._client.get(self.key_prefix + key))
Expand Down
105 changes: 105 additions & 0 deletions src/cachelib/serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import logging
import pickle
import typing as _t


class BaseSerializer:
"""This is the base interface for all default serializers.

BaseSerializer.load and BaseSerializer.dump will
default to pickle.load and pickle.dump. This is currently
used only by FileSystemCache which dumps/loads to/from a file stream.
"""

def _warn(self, e: pickle.PickleError) -> None:
logging.warning(
f"An exception has been raised during a pickling operation: {e}"
)

def dump(
self, value: int, f: _t.IO, protocol: int = pickle.HIGHEST_PROTOCOL
) -> None:
try:
pickle.dump(value, f, protocol)
except (pickle.PickleError, pickle.PicklingError) as e:
self._warn(e)

def load(self, f: _t.BinaryIO) -> _t.Any:
try:
data = pickle.load(f)
except pickle.PickleError as e:
self._warn(e)
return None
else:
return data

"""BaseSerializer.loads and BaseSerializer.dumps
work on top of pickle.loads and pickle.dumps. Dumping/loading
strings and byte strings is the default for most cache types.
"""

def dumps(self, value: _t.Any, protocol: int = pickle.HIGHEST_PROTOCOL) -> bytes:
try:
serialized = pickle.dumps(value, protocol)
except (pickle.PickleError, pickle.PicklingError) as e:
self._warn(e)
return serialized

def loads(self, bvalue: bytes) -> _t.Any:
try:
data = pickle.loads(bvalue)
except pickle.PickleError as e:
self._warn(e)
return None
else:
return data


"""Default serializers for each cache type.

The following classes can be used to further customize
serialiation behaviour. Alternatively, any serializer can be
overriden in order to use a custom serializer with a different
strategy altogether.
"""


class UWSGISerializer(BaseSerializer):
"""Default serializer for UWSGICache."""


class SimpleSerializer(BaseSerializer):
"""Default serializer for SimpleCache."""


class FileSystemSerializer(BaseSerializer):
"""Default serializer for FileSystemCache."""


class RedisSerializer(BaseSerializer):
"""Default serializer for RedisCache."""

def dumps(self, value: _t.Any, protocol: int = pickle.HIGHEST_PROTOCOL) -> bytes:
"""Dumps an object into a string for redis. By default it serializes
integers as regular string and pickle dumps everything else.
"""
if isinstance(type(value), int):
return str(value).encode("ascii")
return b"!" + pickle.dumps(value, protocol)

def loads(self, value: _t.Optional[bytes]) -> _t.Any:
"""The reversal of :meth:`dump_object`. This might be called with
None.
"""
if value is None:
return None
if value.startswith(b"!"):
try:
return pickle.loads(value[1:])
except pickle.PickleError:
return None
try:
return int(value)
except ValueError:
# before 0.8 we did not have serialization. Still support that.
return value
18 changes: 12 additions & 6 deletions src/cachelib/simple.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pickle
import typing as _t
from time import time

from cachelib.base import BaseCache
from cachelib.serializers import SimpleSerializer


class SimpleCache(BaseCache):
Expand All @@ -19,7 +19,13 @@ class SimpleCache(BaseCache):
0 indicates that the cache never expires.
"""

def __init__(self, threshold: int = 500, default_timeout: int = 300):
serializer = SimpleSerializer()

def __init__(
self,
threshold: int = 500,
default_timeout: int = 300,
):
BaseCache.__init__(self, default_timeout)
self._cache: _t.Dict[str, _t.Any] = {}
self._threshold = threshold or 500 # threshold = 0
Expand Down Expand Up @@ -62,22 +68,22 @@ def get(self, key: str) -> _t.Any:
try:
expires, value = self._cache[key]
if expires == 0 or expires > time():
return pickle.loads(value)
except (KeyError, pickle.PickleError):
return self.serializer.loads(value)
except KeyError:
return None

def set(
self, key: str, value: _t.Any, timeout: _t.Optional[int] = None
) -> _t.Optional[bool]:
expires = self._normalize_timeout(timeout)
self._prune()
self._cache[key] = (expires, pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
self._cache[key] = (expires, self.serializer.dumps(value))
return True

def add(self, key: str, value: _t.Any, timeout: _t.Optional[int] = None) -> bool:
expires = self._normalize_timeout(timeout)
self._prune()
item = (expires, pickle.dumps(value, pickle.HIGHEST_PROTOCOL))
item = (expires, self.serializer.dumps(value))
if key in self._cache:
return False
self._cache.setdefault(key, item)
Expand Down
22 changes: 17 additions & 5 deletions src/cachelib/uwsgi.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pickle
import platform
import typing as _t

from cachelib.base import BaseCache
from cachelib.serializers import UWSGISerializer


class UWSGICache(BaseCache):
Expand All @@ -20,7 +20,13 @@ class UWSGICache(BaseCache):
the cache.
"""

def __init__(self, default_timeout: int = 300, cache: str = ""):
serializer = UWSGISerializer()

def __init__(
self,
default_timeout: int = 300,
cache: str = "",
):
BaseCache.__init__(self, default_timeout)

if platform.python_implementation() == "PyPy":
Expand All @@ -44,7 +50,7 @@ def get(self, key: str) -> _t.Any:
rv = self._uwsgi.cache_get(key, self.cache)
if rv is None:
return
return pickle.loads(rv)
return self.serializer.loads(rv)

def delete(self, key: str) -> bool:
return bool(self._uwsgi.cache_del(key, self.cache))
Expand All @@ -53,14 +59,20 @@ def set(
self, key: str, value: _t.Any, timeout: _t.Optional[int] = None
) -> _t.Optional[bool]:
result = self._uwsgi.cache_update(
key, pickle.dumps(value), self._normalize_timeout(timeout), self.cache
key,
self.serializer.dumps(value),
self._normalize_timeout(timeout),
self.cache,
) # type: bool
return result

def add(self, key: str, value: _t.Any, timeout: _t.Optional[int] = None) -> bool:
return bool(
self._uwsgi.cache_set(
key, pickle.dumps(value), self._normalize_timeout(timeout), self.cache
key,
self.serializer.dumps(value),
self._normalize_timeout(timeout),
self.cache,
)
)

Expand Down
Loading