Skip to content

Commit

Permalink
Use an self-clearing subclass to store hash cache
Browse files Browse the repository at this point in the history
Rather than attempting to remove the hash cache from the object state on
deserialization or serialization, instead we store the hash cache in an
object that reduces to None, thus clearing itself when pickled or
copied.

This fixes GH python-attrs#494 and python-attrs#613.

Co-authored-by: Matt Wozniski <[email protected]>
  • Loading branch information
pganssle and godlygeek committed Feb 5, 2020
1 parent 8c00f75 commit 4e7315e
Showing 1 changed file with 47 additions and 31 deletions.
78 changes: 47 additions & 31 deletions src/attr/_make.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,30 @@ def __repr__(self):
"""


class _CacheHashWrapper(int):
"""An integer subclass that pickles / copies as None
This is used for non-slots classes with ``cache_hash=True``, to avoid
serializing a potentially (even likely) invalid hash value. Since ``None``
is the default value for uncalculated hashes, whenever this is copied,
the copy's value for hte hash should automatically reset.
See GH #613 for more details.
"""

if PY2:
# For some reason `type(None)` isn't callable in Python 2, but we don't
# actually need a constructor for None objects, we just need any
# available function that returns None.
def __reduce__(self, _none_constructor=getattr, _args=(0, "", None)):
return _none_constructor, _args

else:

def __reduce__(self, _none_constructor=type(None), _args=()):
return _none_constructor, _args


def attrib(
default=NOTHING,
validator=None,
Expand Down Expand Up @@ -523,34 +547,6 @@ def _patch_original_class(self):
for name, value in self._cls_dict.items():
setattr(cls, name, value)

# Attach __setstate__. This is necessary to clear the hash code
# cache on deserialization. See issue
# https://github.com/python-attrs/attrs/issues/482 .
# Note that this code only handles setstate for dict classes.
# For slotted classes, see similar code in _create_slots_class .
if self._cache_hash:
existing_set_state_method = getattr(cls, "__setstate__", None)
if existing_set_state_method:
raise NotImplementedError(
"Currently you cannot use hash caching if "
"you specify your own __setstate__ method."
"See https://github.com/python-attrs/attrs/issues/494 ."
)

# Clears the cached hash state on serialization; for frozen
# classes we need to bypass the class's setattr method.
if self._frozen:

def cache_hash_set_state(chss_self, _):
object.__setattr__(chss_self, _hash_cache_field, None)

else:

def cache_hash_set_state(chss_self, _):
setattr(chss_self, _hash_cache_field, None)

cls.__setstate__ = cache_hash_set_state

return cls

def _create_slots_class(self):
Expand Down Expand Up @@ -1103,22 +1099,42 @@ def _make_hash(cls, attrs, frozen, cache_hash):
unique_filename = _generate_unique_filename(cls, "hash")
type_hash = hash(unique_filename)

method_lines = ["def __hash__(self):"]
hash_def = "def __hash__(self"
hash_func = "hash(("
closing_braces = "))"
if not cache_hash:
hash_def += "):"
else:
if not PY2:
hash_def += ", *"

hash_def += (
", _cache_wrapper="
+ "__import__('attr._make')._make._CacheHashWrapper):"
)
hash_func = "_cache_wrapper(" + hash_func
closing_braces += ")"

method_lines = [hash_def]

def append_hash_computation_lines(prefix, indent):
"""
Generate the code for actually computing the hash code.
Below this will either be returned directly or used to compute
a value which is then cached, depending on the value of cache_hash
"""

method_lines.extend(
[indent + prefix + "hash((", indent + " %d," % (type_hash,)]
[
indent + prefix + hash_func,
indent + " %d," % (type_hash,),
]
)

for a in attrs:
method_lines.append(indent + " self.%s," % a.name)

method_lines.append(indent + " ))")
method_lines.append(indent + " " + closing_braces)

if cache_hash:
method_lines.append(tab + "if self.%s is None:" % _hash_cache_field)
Expand Down

0 comments on commit 4e7315e

Please sign in to comment.