From fd8db276f05bad4a4eee354e464ccbe46a10439a Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Thu, 16 May 2024 10:10:54 +0100 Subject: [PATCH 1/4] WIP: typing aiida.common --- .pre-commit-config.yaml | 1 - src/aiida/__init__.py | 2 +- src/aiida/common/hashing.py | 49 ++++++++++++++++++++----------------- src/aiida/common/utils.py | 2 +- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 42cfd9584d..cad92cb781 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -115,7 +115,6 @@ repos: src/aiida/cmdline/utils/common.py| src/aiida/cmdline/utils/echo.py| src/aiida/common/extendeddicts.py| - src/aiida/common/hashing.py| src/aiida/common/utils.py| src/aiida/engine/daemon/execmanager.py| src/aiida/engine/processes/calcjobs/manager.py| diff --git a/src/aiida/__init__.py b/src/aiida/__init__.py index e136eeafa7..75f2a96387 100644 --- a/src/aiida/__init__.py +++ b/src/aiida/__init__.py @@ -42,7 +42,7 @@ def get_strict_version(): :returns: StrictVersion instance with the current version :rtype: :class:`!distutils.version.StrictVersion` """ - from distutils.version import StrictVersion + from distutils.version import StrictVersion # type: ignore[import-not-found] from aiida.common.warnings import warn_deprecation diff --git a/src/aiida/common/hashing.py b/src/aiida/common/hashing.py index 9fed9f8a6d..3a99182274 100644 --- a/src/aiida/common/hashing.py +++ b/src/aiida/common/hashing.py @@ -41,7 +41,8 @@ def get_random_string(length: int = 12) -> str: return ''.join(secrets.choice(alphabet) for i in range(length)) -BLAKE2B_OPTIONS = { +# Relaxed typing needed due to https://github.com/python/mypy/issues/5382 +BLAKE2B_OPTIONS: dict[str, typing.Any] = { 'fanout': 0, # unlimited fanout/depth mode 'depth': 2, # has fixed depth of 2 'digest_size': 32, # we do not need a cryptographically relevant digest @@ -74,7 +75,7 @@ def chunked_file_hash( return hasher.hexdigest() -def make_hash(object_to_hash, **kwargs): +def make_hash(object_to_hash: typing.Any, **kwargs) -> str: """Makes a hash from a dictionary, list, tuple or set to any level, that contains only other hashable or nonhashable types (including lists, tuples, sets, and dictionaries). @@ -110,14 +111,14 @@ def make_hash(object_to_hash, **kwargs): @singledispatch -def _make_hash(object_to_hash, **_): +def _make_hash(object_to_hash: typing.Any, **_): """Implementation of the ``make_hash`` function. The hash is created as a 28 byte integer, and only later converted to a string. """ raise HashingError(f'Value of type {type(object_to_hash)} cannot be hashed') -def _single_digest(obj_type, obj_bytes=b''): +def _single_digest(obj_type: str, obj_bytes: bytes = b'') -> bytes: return hashlib.blake2b(obj_bytes, person=obj_type.encode('ascii'), node_depth=0, **BLAKE2B_OPTIONS).digest() @@ -125,19 +126,19 @@ def _single_digest(obj_type, obj_bytes=b''): @_make_hash.register(bytes) -def _(bytes_obj, **kwargs): +def _(bytes_obj: bytes, **kwargs) -> list[bytes]: """Hash arbitrary byte strings.""" return [_single_digest('str', bytes_obj)] @_make_hash.register(str) -def _(val, **kwargs): +def _(val: str, **kwargs) -> list[bytes]: """Convert strings explicitly to bytes.""" return [_single_digest('str', val.encode('utf-8'))] @_make_hash.register(abc.Sequence) -def _(sequence_obj, **kwargs): +def _(sequence_obj: abc.Sequence, **kwargs) -> list[bytes]: # unpack the list and use the elements return ( [_single_digest('list(')] @@ -147,7 +148,7 @@ def _(sequence_obj, **kwargs): @_make_hash.register(abc.Set) -def _(set_obj, **kwargs): +def _(set_obj: abc.Set, **kwargs) -> list[bytes]: # turn the set objects into a list of hashes which are always sortable, # then return a flattened list of the hashes return ( @@ -158,7 +159,7 @@ def _(set_obj, **kwargs): @_make_hash.register(abc.Mapping) -def _(mapping, **kwargs): +def _(mapping: abc.Mapping, **kwargs) -> list[bytes]: """Hashing arbitrary mapping containers (dict, OrderedDict) by first sorting by hashed keys""" def hashed_key_mapping(): @@ -178,7 +179,7 @@ def hashed_key_mapping(): @_make_hash.register(OrderedDict) -def _(mapping, **kwargs): +def _(mapping: OrderedDict, **kwargs) -> list[bytes]: """Hashing of OrderedDicts :param odict_as_unordered: hash OrderedDicts as normal dicts (mostly for testing) @@ -196,7 +197,7 @@ def _(mapping, **kwargs): @_make_hash.register(numbers.Real) -def _(val, **kwargs): +def _(val: numbers.Real, **kwargs) -> list[bytes]: """Before hashing a float, convert to a string (via rounding) and with a fixed number of digits after the comma. Note that the `_single_digest` requires a bytes object so we need to encode the utf-8 string first """ @@ -204,19 +205,21 @@ def _(val, **kwargs): @_make_hash.register(Decimal) -def _(val, **kwargs): +def _(val: Decimal, **kwargs) -> list[bytes]: """While a decimal can be converted exactly to a string which captures all characteristics of the underlying implementation, we also need compatibility with "equal" representations as int or float. Hence we are checking for the exponent (which is negative if there is a fractional component, 0 otherwise) and get the same hash as for a corresponding float or int. """ - if val.as_tuple().exponent < 0: + # TODO: This is a possible problem! + # Unsupported operand types for < ("Literal['n']" and "int") + if val.as_tuple().exponent < 0: # type: ignore[operator] return [_single_digest('float', float_to_text(val, sig=AIIDA_FLOAT_PRECISION).encode('utf-8'))] return [_single_digest('int', f'{val}'.encode('utf-8'))] @_make_hash.register(numbers.Complex) -def _(val, **kwargs): +def _(val: numbers.Complex, **kwargs) -> list[bytes]: """In case of a complex number, use the same encoding of two floats and join with a special symbol (a ! here).""" return [ _single_digest( @@ -229,23 +232,23 @@ def _(val, **kwargs): @_make_hash.register(numbers.Integral) -def _(val, **kwargs): +def _(val: numbers.Integral, **kwargs) -> list[bytes]: """Get the hash of the little-endian signed long long representation of the integer""" return [_single_digest('int', f'{val}'.encode('utf-8'))] @_make_hash.register(bool) -def _(val, **kwargs): +def _(val: bool, **kwargs) -> list[bytes]: return [_single_digest('bool', b'\x01' if val else b'\x00')] @_make_hash.register(type(None)) -def _(val, **kwargs): +def _(val: type[None], **kwargs) -> list[bytes]: return [_single_digest('none')] @_make_hash.register(datetime) -def _(val, **kwargs): +def _(val: datetime, **kwargs) -> list[bytes]: """Hashes the little-endian rep of the float .""" # see also https://stackoverflow.com/a/8778548 for an excellent elaboration if val.tzinfo is None or val.utcoffset() is None: @@ -256,18 +259,18 @@ def _(val, **kwargs): @_make_hash.register(date) -def _(val, **kwargs): +def _(val: date, **kwargs) -> list[bytes]: """Hashes the string representation in ISO format of the `datetime.date` object.""" return [_single_digest('date', val.isoformat().encode('utf-8'))] @_make_hash.register(uuid.UUID) -def _(val, **kwargs): +def _(val: uuid.UUID, **kwargs) -> list[bytes]: return [_single_digest('uuid', val.bytes)] @_make_hash.register(DatetimePrecision) -def _(datetime_precision, **kwargs): +def _(datetime_precision: DatetimePrecision, **kwargs) -> list[bytes]: """Hashes for DatetimePrecision object""" return ( [_single_digest('dt_prec')] @@ -281,7 +284,7 @@ def _(datetime_precision, **kwargs): @_make_hash.register(Folder) -def _(folder, **kwargs): +def _(folder: Folder, **kwargs) -> list[bytes]: """Hash the content of a Folder object. The name of the folder itself is actually ignored :param ignored_folder_content: list of filenames to be ignored for the hashing """ @@ -306,7 +309,7 @@ def folder_digests(subfolder): return [_single_digest('folder')] + list(folder_digests(folder)) -def float_to_text(value, sig): +def float_to_text(value: typing.SupportsFloat, sig: int) -> str: """Convert float to text string for computing hash. Preseve up to N significant number given by sig. diff --git a/src/aiida/common/utils.py b/src/aiida/common/utils.py index 54fe5453d5..f41cbef636 100644 --- a/src/aiida/common/utils.py +++ b/src/aiida/common/utils.py @@ -21,7 +21,7 @@ from .lang import classproperty -def get_new_uuid(): +def get_new_uuid() -> str: """Return a new UUID (typically to be used for new nodes).""" import uuid From f1674d9b38a4b65195979434d64cab049397bc5f Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Tue, 21 May 2024 20:29:41 +0100 Subject: [PATCH 2/4] Hash Decimal['NaN'] --- src/aiida/common/hashing.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/aiida/common/hashing.py b/src/aiida/common/hashing.py index 3a99182274..9aeaf0a130 100644 --- a/src/aiida/common/hashing.py +++ b/src/aiida/common/hashing.py @@ -111,7 +111,7 @@ def make_hash(object_to_hash: typing.Any, **kwargs) -> str: @singledispatch -def _make_hash(object_to_hash: typing.Any, **_): +def _make_hash(object_to_hash: typing.Any, **_) -> list[bytes]: """Implementation of the ``make_hash`` function. The hash is created as a 28 byte integer, and only later converted to a string. """ @@ -211,9 +211,11 @@ def _(val: Decimal, **kwargs) -> list[bytes]: for the exponent (which is negative if there is a fractional component, 0 otherwise) and get the same hash as for a corresponding float or int. """ - # TODO: This is a possible problem! - # Unsupported operand types for < ("Literal['n']" and "int") - if val.as_tuple().exponent < 0: # type: ignore[operator] + exponent = val.as_tuple().exponent + # This is a fallback for Decimal('NaN') and similar + if isinstance(exponent, str): + return [_single_digest('str', f'{val}'.encode('utf-8'))] + if exponent < 0: return [_single_digest('float', float_to_text(val, sig=AIIDA_FLOAT_PRECISION).encode('utf-8'))] return [_single_digest('int', f'{val}'.encode('utf-8'))] From 78809b40ca17b0c201a328d979e20b3eda0f932d Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Tue, 21 May 2024 20:40:28 +0100 Subject: [PATCH 3/4] Add tests --- tests/common/test_hashing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/common/test_hashing.py b/tests/common/test_hashing.py index c39992127b..2ef50253f4 100644 --- a/tests/common/test_hashing.py +++ b/tests/common/test_hashing.py @@ -184,6 +184,10 @@ def test_decimal(self): assert make_hash(Decimal('3141')) == make_hash(3141) + assert make_hash(Decimal('NaN')) == make_hash('NaN') + assert make_hash(Decimal('Inf')) == make_hash('Infinity') + assert make_hash(Decimal('-Inf')) == make_hash('-Infinity') + def test_unhashable_type(self): class MadeupClass: pass From dd7551de2116552fe9eb1bc0e3590a957c1e3207 Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Tue, 21 May 2024 22:11:37 +0100 Subject: [PATCH 4/4] appease precommit gods --- src/aiida/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiida/__init__.py b/src/aiida/__init__.py index 75f2a96387..e136eeafa7 100644 --- a/src/aiida/__init__.py +++ b/src/aiida/__init__.py @@ -42,7 +42,7 @@ def get_strict_version(): :returns: StrictVersion instance with the current version :rtype: :class:`!distutils.version.StrictVersion` """ - from distutils.version import StrictVersion # type: ignore[import-not-found] + from distutils.version import StrictVersion from aiida.common.warnings import warn_deprecation