From 7cd8e1ce906e0ea02eca9ff54559f8f1caa4baa1 Mon Sep 17 00:00:00 2001 From: Guido Imperiale Date: Wed, 11 Sep 2019 10:55:17 +0100 Subject: [PATCH] Identical hashes do not guarantee equality --- pint/testsuite/test_issues.py | 20 +++++++++- pint/util.py | 74 ++++++++++++++++++++++------------- 2 files changed, 66 insertions(+), 28 deletions(-) diff --git a/pint/testsuite/test_issues.py b/pint/testsuite/test_issues.py index 6ca703586..275ed338b 100644 --- a/pint/testsuite/test_issues.py +++ b/pint/testsuite/test_issues.py @@ -11,7 +11,7 @@ from pint.unit import UnitsContainer from pint.util import ParserHelper -from pint.compat import np, long_type +from pint.compat import np from pint.errors import UndefinedUnitError, DimensionalityError from pint.testsuite import QuantityTestCase, helpers @@ -699,3 +699,21 @@ def test_issue856b(self): ureg2.define('test123 = 456 kg') assert ureg1('1 test123').to('kg').magnitude == 123 assert ureg2('1 test123').to('kg').magnitude == 456 + + def test_issue876(self): + # Same hash must not imply equality. + + # As an implementation detail of CPython, hash(-1) == hash(-2) This test is + # useless in potential alternative Python implementations where hash(-1) != + # hash(-2); one would need to find hash collisions specific for each + # implementation + + a = UnitsContainer({"[mass]": -1}) + b = UnitsContainer({"[mass]": -2}) + c = UnitsContainer({"[mass]": -3}) + + # Guarantee working on alternative Python implementations + assert (hash(-1) == hash(-2)) == (hash(a) == hash(b)) + assert (hash(-1) == hash(-3)) == (hash(a) == hash(c)) + assert a != b + assert a != c diff --git a/pint/util.py b/pint/util.py index f5282a300..d8ebcace0 100644 --- a/pint/util.py +++ b/pint/util.py @@ -286,16 +286,20 @@ def remove(self, keys): """ Create a new UnitsContainer purged from given keys. """ - d = udict(self._d) - return UnitsContainer(((key, d[key]) for key in d if key not in keys)) + new = self.copy() + for k in keys: + new._d.pop(k, None) + new._hash = None + return new def rename(self, oldkey, newkey): """ Create a new UnitsContainer in which an entry has been renamed. """ - d = udict(self._d) - d[newkey] = d.pop(oldkey) - return UnitsContainer(d) + new = self.copy() + new._d[newkey] = new._d.pop(oldkey) + new._hash = None + return new def __iter__(self): return iter(self._d) @@ -323,7 +327,14 @@ def __setstate__(self, state): def __eq__(self, other): if isinstance(other, UnitsContainer): + out = UnitsContainer.__hash__(self) == UnitsContainer.__hash__(other) + # Different hashes guarantee that the actual contents are different, but + # identical hashes give no guarantee of equality + # e.g. in CPython, hash(-1) == hash(-2) + if not out: + return False other = other._d + elif isinstance(other, string_types): other = ParserHelper.from_string(other) other = other._d @@ -345,20 +356,25 @@ def format_babel(self, spec, **kwspec): return format_unit(self, spec, **kwspec) def __copy__(self): - return UnitsContainer(self._d) + # Skip expensive health checks performed by __init__ + out = object.__new__(self.__class__) + out._d = self._d.copy() + out._hash = self._hash + return out def __mul__(self, other): - d = udict(self._d) if not isinstance(other, self.__class__): err = 'Cannot multiply UnitsContainer by {}' raise TypeError(err.format(type(other))) + + new = self.copy() for key, value in other.items(): - d[key] += value - keys = [key for key, value in d.items() if value == 0] - for key in keys: - del d[key] + new._d[key] += value + if new._d[key] == 0: + del new._d[key] - return UnitsContainer(d) + new._hash = None + return new __rmul__ = __mul__ @@ -366,26 +382,26 @@ def __pow__(self, other): if not isinstance(other, NUMERIC_TYPES): err = 'Cannot power UnitsContainer by {}' raise TypeError(err.format(type(other))) - d = udict(self._d) - for key, value in d.items(): - d[key] *= other - return UnitsContainer(d) + + new = self.copy() + for key, value in new._d.items(): + new._d[key] *= other + new._hash = None + return new def __truediv__(self, other): if not isinstance(other, self.__class__): err = 'Cannot divide UnitsContainer by {}' raise TypeError(err.format(type(other))) - d = udict(self._d) - + new = self.copy() for key, value in other.items(): - d[key] -= value - - keys = [key for key, value in d.items() if value == 0] - for key in keys: - del d[key] + new._d[key] -= value + if new._d[key] == 0: + del new._d[key] - return UnitsContainer(d) + new._hash = None + return new def __rtruediv__(self, other): if not isinstance(other, self.__class__) and other != 1: @@ -473,7 +489,9 @@ def _from_string(cls, input_string): for key, value in ret.items())) def __copy__(self): - return ParserHelper(scale=self.scale, **self) + new = super(ParserHelper, self).__copy__() + new.scale = self.scale + return new def copy(self): return self.__copy__() @@ -492,9 +510,11 @@ def __setstate__(self, state): self._d, self._hash, self.scale = state def __eq__(self, other): - if isinstance(other, self.__class__): - return self.scale == other.scale and\ + if isinstance(other, ParserHelper): + return ( + self.scale == other.scale and super(ParserHelper, self).__eq__(other) + ) elif isinstance(other, string_types): return self == ParserHelper.from_string(other) elif isinstance(other, Number):