-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: more hash, sizeof and eq implementations (#609)
### Summary of Changes - adds specific hash-implementations to classical classification and regression models - use more properties from table transformer to calculate hash - add eq to imputer strategies and svm kernels - adds general reusable structural hashing infrastructure for future usage
- Loading branch information
Showing
37 changed files
with
772 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
"""Utilities for Safe-DS.""" | ||
|
||
from ._hashing import _structural_hash | ||
|
||
__all__ = [ | ||
"_structural_hash", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import functools | ||
import operator | ||
import struct | ||
from typing import Any | ||
|
||
import xxhash | ||
|
||
|
||
def _structural_hash(*value: Any) -> int: | ||
""" | ||
Calculate a deterministic hash value, based on the provided values. | ||
Parameters | ||
---------- | ||
value | ||
Variable amount of values to hash | ||
Returns | ||
------- | ||
hash | ||
Deterministic hash value | ||
""" | ||
return xxhash.xxh3_64(_value_to_bytes(value)).intdigest() | ||
|
||
|
||
def _value_to_bytes(value: Any) -> bytes: | ||
""" | ||
Convert any value to a deterministically hashable representation. | ||
Parameters | ||
---------- | ||
value | ||
Object to convert to a byte representation for deterministic structural hashing | ||
Returns | ||
------- | ||
bytes | ||
Byte representation of the provided value | ||
""" | ||
if value is None: | ||
return b"\0" | ||
elif isinstance(value, bytes): | ||
return value | ||
elif isinstance(value, bool): | ||
return b"\1" if value else b"\0" | ||
elif isinstance(value, int) and value < 0: | ||
return value.to_bytes(8, signed=True) | ||
elif isinstance(value, int) and value >= 0: | ||
return value.to_bytes(8) | ||
elif isinstance(value, str): | ||
return value.encode("utf-8") | ||
elif isinstance(value, float): | ||
return struct.pack("d", value) | ||
elif isinstance(value, list | tuple): | ||
return functools.reduce(operator.add, [_value_to_bytes(entry) for entry in value], len(value).to_bytes(8)) | ||
elif isinstance(value, frozenset | set): | ||
return functools.reduce( | ||
operator.add, | ||
sorted([_value_to_bytes(entry) for entry in value]), | ||
len(value).to_bytes(8), | ||
) | ||
elif isinstance(value, dict): | ||
return functools.reduce( | ||
operator.add, | ||
sorted([_value_to_bytes(key) + _value_to_bytes(entry) for key, entry in value.items()]), | ||
len(value).to_bytes(8), | ||
) | ||
else: | ||
return _value_to_bytes(hash(value)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.