From 47cc1fce3bc1098e7b100a27239b41c267e2c45f Mon Sep 17 00:00:00 2001 From: szelenka Date: Tue, 22 Nov 2022 08:47:06 -0500 Subject: [PATCH 1/2] Changing default hash_algo to sha512 instead of md5 --- src/prefect/utilities/hashing.py | 6 ++--- tests/utilities/test_hashing.py | 43 ++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/prefect/utilities/hashing.py b/src/prefect/utilities/hashing.py index 02732d69fab9..b486c7e13276 100644 --- a/src/prefect/utilities/hashing.py +++ b/src/prefect/utilities/hashing.py @@ -7,7 +7,7 @@ from prefect.serializers import JSONSerializer -def stable_hash(*args: Union[str, bytes], hash_algo=hashlib.md5) -> str: +def stable_hash(*args: Union[str, bytes], hash_algo=hashlib.sha512) -> str: """Given some arguments, produces a stable 64-bit hash of their contents. Supports bytes and strings. Strings will be UTF-8 encoded. @@ -27,7 +27,7 @@ def stable_hash(*args: Union[str, bytes], hash_algo=hashlib.md5) -> str: return h.hexdigest() -def file_hash(path: str, hash_algo=hashlib.md5) -> str: +def file_hash(path: str, hash_algo=hashlib.sha512) -> str: """Given a path to a file, produces a stable hash of the file contents. Args: @@ -41,7 +41,7 @@ def file_hash(path: str, hash_algo=hashlib.md5) -> str: return stable_hash(contents, hash_algo=hash_algo) -def hash_objects(*args, hash_algo=hashlib.md5, **kwargs) -> Optional[str]: +def hash_objects(*args, hash_algo=hashlib.sha512, **kwargs) -> Optional[str]: """ Attempt to hash objects by dumping to JSON or serializing with cloudpickle. On failure of both, `None` will be returned diff --git a/tests/utilities/test_hashing.py b/tests/utilities/test_hashing.py index 6b28ec1dd12c..4900db2a2427 100644 --- a/tests/utilities/test_hashing.py +++ b/tests/utilities/test_hashing.py @@ -8,12 +8,36 @@ @pytest.mark.parametrize( "inputs,hash_algo,expected", [ - (("hello",), None, "5d41402abc4b2a76b9719d911017c592"), - (("goodbye",), None, "69faab6268350295550de7d587bc323d"), - ((b"goodbye",), None, "69faab6268350295550de7d587bc323d"), - (("hello", "goodbye"), None, "441add4718519b71e42d329a834d6d5e"), - (("hello", b"goodbye"), None, "441add4718519b71e42d329a834d6d5e"), - (("goodbye", "hello"), None, "c04d8ccb6b9368703e62be93358094f9"), + ( + ("hello",), + None, + "9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043", + ), + ( + ("goodbye",), + None, + "de2c0320cdff37271049dfa8cb835ffd54200216253a1dfbad75a1ae51bd30bb499e14e37fe993ba2ea57b863fc56304de94073d880c9c18eb0a469cde211d02", + ), + ( + (b"goodbye",), + None, + "de2c0320cdff37271049dfa8cb835ffd54200216253a1dfbad75a1ae51bd30bb499e14e37fe993ba2ea57b863fc56304de94073d880c9c18eb0a469cde211d02", + ), + ( + ("hello", "goodbye"), + None, + "be68ed8f7c48c3c78af4ab2a1c8ba497f469a55c171a6d81e9386f0f2245ed4c8bc85a2135ef8d839151dad1361522cdfbb25d0f252395c29b81a9445b52ca83", + ), + ( + ("hello", b"goodbye"), + None, + "be68ed8f7c48c3c78af4ab2a1c8ba497f469a55c171a6d81e9386f0f2245ed4c8bc85a2135ef8d839151dad1361522cdfbb25d0f252395c29b81a9445b52ca83", + ), + ( + ("goodbye", "hello"), + None, + "b779b909718f0c4d9b85f0a1a71bb58cec74e6deece4eadb522261e0cee267a7eb7f4d57327bc369c1774c0cf7d5185db0d5e3f04cc88aff78d1899249bd26d5", + ), ( ("goodbye", "hello"), hashlib.sha256, @@ -52,6 +76,9 @@ def test_file_hash_hashes(self, tmp_path): f.write("0") val = file_hash(tmp_path.joinpath("test.py")) - assert val == hashlib.md5(b"0").hexdigest() + assert val == hashlib.sha512(b"0").hexdigest() # Check if the hash is stable - assert val == "cfcd208495d565ef66e7dff9f98764da" + assert ( + val + == "31bca02094eb78126a517b206a88c73cfa9ec6f704c7030d18212cace820f025f00bf0ea68dbf3f3a5436ca63b53bf7bf80ad8d5de7d8359d0b7fed9dbc3ab99" + ) From dd8c5c07bbfecd8712313bc31bc905c333acf67c Mon Sep 17 00:00:00 2001 From: szelenka Date: Tue, 22 Nov 2022 12:27:26 -0500 Subject: [PATCH 2/2] reverting sha512 to md5, adding usedforsecurity=False as default --- src/prefect/utilities/hashing.py | 13 +++++++--- tests/utilities/test_hashing.py | 43 ++++++-------------------------- 2 files changed, 18 insertions(+), 38 deletions(-) diff --git a/src/prefect/utilities/hashing.py b/src/prefect/utilities/hashing.py index b486c7e13276..204acf76dea7 100644 --- a/src/prefect/utilities/hashing.py +++ b/src/prefect/utilities/hashing.py @@ -1,4 +1,6 @@ import hashlib +import sys +from functools import partial from pathlib import Path from typing import Optional, Union @@ -6,8 +8,13 @@ from prefect.serializers import JSONSerializer +if sys.version_info[:2] >= (3, 9): + _md5 = partial(hashlib.md5, usedforsecurity=False) +else: + _md5 = hashlib.md5 -def stable_hash(*args: Union[str, bytes], hash_algo=hashlib.sha512) -> str: + +def stable_hash(*args: Union[str, bytes], hash_algo=_md5) -> str: """Given some arguments, produces a stable 64-bit hash of their contents. Supports bytes and strings. Strings will be UTF-8 encoded. @@ -27,7 +34,7 @@ def stable_hash(*args: Union[str, bytes], hash_algo=hashlib.sha512) -> str: return h.hexdigest() -def file_hash(path: str, hash_algo=hashlib.sha512) -> str: +def file_hash(path: str, hash_algo=_md5) -> str: """Given a path to a file, produces a stable hash of the file contents. Args: @@ -41,7 +48,7 @@ def file_hash(path: str, hash_algo=hashlib.sha512) -> str: return stable_hash(contents, hash_algo=hash_algo) -def hash_objects(*args, hash_algo=hashlib.sha512, **kwargs) -> Optional[str]: +def hash_objects(*args, hash_algo=_md5, **kwargs) -> Optional[str]: """ Attempt to hash objects by dumping to JSON or serializing with cloudpickle. On failure of both, `None` will be returned diff --git a/tests/utilities/test_hashing.py b/tests/utilities/test_hashing.py index 4900db2a2427..6b28ec1dd12c 100644 --- a/tests/utilities/test_hashing.py +++ b/tests/utilities/test_hashing.py @@ -8,36 +8,12 @@ @pytest.mark.parametrize( "inputs,hash_algo,expected", [ - ( - ("hello",), - None, - "9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043", - ), - ( - ("goodbye",), - None, - "de2c0320cdff37271049dfa8cb835ffd54200216253a1dfbad75a1ae51bd30bb499e14e37fe993ba2ea57b863fc56304de94073d880c9c18eb0a469cde211d02", - ), - ( - (b"goodbye",), - None, - "de2c0320cdff37271049dfa8cb835ffd54200216253a1dfbad75a1ae51bd30bb499e14e37fe993ba2ea57b863fc56304de94073d880c9c18eb0a469cde211d02", - ), - ( - ("hello", "goodbye"), - None, - "be68ed8f7c48c3c78af4ab2a1c8ba497f469a55c171a6d81e9386f0f2245ed4c8bc85a2135ef8d839151dad1361522cdfbb25d0f252395c29b81a9445b52ca83", - ), - ( - ("hello", b"goodbye"), - None, - "be68ed8f7c48c3c78af4ab2a1c8ba497f469a55c171a6d81e9386f0f2245ed4c8bc85a2135ef8d839151dad1361522cdfbb25d0f252395c29b81a9445b52ca83", - ), - ( - ("goodbye", "hello"), - None, - "b779b909718f0c4d9b85f0a1a71bb58cec74e6deece4eadb522261e0cee267a7eb7f4d57327bc369c1774c0cf7d5185db0d5e3f04cc88aff78d1899249bd26d5", - ), + (("hello",), None, "5d41402abc4b2a76b9719d911017c592"), + (("goodbye",), None, "69faab6268350295550de7d587bc323d"), + ((b"goodbye",), None, "69faab6268350295550de7d587bc323d"), + (("hello", "goodbye"), None, "441add4718519b71e42d329a834d6d5e"), + (("hello", b"goodbye"), None, "441add4718519b71e42d329a834d6d5e"), + (("goodbye", "hello"), None, "c04d8ccb6b9368703e62be93358094f9"), ( ("goodbye", "hello"), hashlib.sha256, @@ -76,9 +52,6 @@ def test_file_hash_hashes(self, tmp_path): f.write("0") val = file_hash(tmp_path.joinpath("test.py")) - assert val == hashlib.sha512(b"0").hexdigest() + assert val == hashlib.md5(b"0").hexdigest() # Check if the hash is stable - assert ( - val - == "31bca02094eb78126a517b206a88c73cfa9ec6f704c7030d18212cace820f025f00bf0ea68dbf3f3a5436ca63b53bf7bf80ad8d5de7d8359d0b7fed9dbc3ab99" - ) + assert val == "cfcd208495d565ef66e7dff9f98764da"