From 89effbfd1a6c0bb2b7dcf7f8be4955cbb588ddb5 Mon Sep 17 00:00:00 2001 From: Jiayu Liu Date: Sat, 16 Oct 2021 00:14:52 +0800 Subject: [PATCH 1/3] add digest python function --- python/src/functions.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/python/src/functions.rs b/python/src/functions.rs index 8611ca54b566..cecf28d2e778 100644 --- a/python/src/functions.rs +++ b/python/src/functions.rs @@ -93,6 +93,18 @@ fn random() -> expression::Expression { } } +/// Computes a binary hash of the given data. type is the algorithm to use. +/// Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, blake2b, and blake3. +#[pyfunction(value, method)] +fn digest( + value: expression::Expression, + method: expression::Expression, +) -> expression::Expression { + expression::Expression { + expr: logical_plan::digest(value.expr, method.expr), + } +} + /// Concatenates the text representations of all the arguments. /// NULL arguments are ignored. #[pyfunction(args = "*")] @@ -340,6 +352,7 @@ pub fn init(module: &PyModule) -> PyResult<()> { module.add_function(wrap_pyfunction!(ltrim, module)?)?; module.add_function(wrap_pyfunction!(max, module)?)?; module.add_function(wrap_pyfunction!(md5, module)?)?; + module.add_function(wrap_pyfunction!(digest, module)?)?; module.add_function(wrap_pyfunction!(min, module)?)?; module.add_function(wrap_pyfunction!(now, module)?)?; module.add_function(wrap_pyfunction!(octet_length, module)?)?; From 2b66e9f33e14e7155a58af3b63fce8649c8da69e Mon Sep 17 00:00:00 2001 From: Jiayu Liu Date: Sat, 16 Oct 2021 17:51:06 +0800 Subject: [PATCH 2/3] add test result --- python/tests/test_string_functions.py | 72 +++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/python/tests/test_string_functions.py b/python/tests/test_string_functions.py index ea064a6b2e9f..965f08707285 100644 --- a/python/tests/test_string_functions.py +++ b/python/tests/test_string_functions.py @@ -47,3 +47,75 @@ def test_string_functions(df): ] ) assert result.column(1) == pa.array(["hello", "world", "!"]) + + +def test_hash_functions(df): + df = df.select( + *[ + f.digest(f.col("a"), f.lit(m)) + for m in ("md5", "sha256", "sha512", "blake2s", "blake3") + ] + ) + result = df.collect() + assert len(result) == 1 + result = result[0] + b = bytearray.fromhex + assert result.column(0) == pa.array( + [ + b("8B1A9953C4611296A827ABF8C47804D7"), + b("F5A7924E621E84C9280A9A27E1BCB7F6"), + b("9033E0E305F247C0C3C80D0C7848C8B3"), + ] + ) + assert result.column(1) == pa.array( + [ + b( + "185F8DB32271FE25F561A6FC938B2E264306EC304EDA518007D1764826381969" + ), + b( + "78AE647DC5544D227130A0682A51E30BC7777FBB6D8A8F17007463A3ECD1D524" + ), + b( + "BB7208BC9B5D7C04F1236A82A0093A5E33F40423D5BA8D4266F7092C3BA43B62" + ), + ] + ) + assert result.column(2) == pa.array( + [ + b( + "3615F80C9D293ED7402687F94B22D58E529B8CC7916F8FAC7FDDF7FBD5AF4CF777D3D795A7A00A16BF7E7F3FB9561EE9BAAE480DA9FE7A18769E71886B03F315" + ), + b( + "8EA77393A42AB8FA92500FB077A9509CC32BC95E72712EFA116EDAF2EDFAE34FBB682EFDD6C5DD13C117E08BD4AAEF71291D8AACE2F890273081D0677C16DF0F" + ), + b( + "3831A6A6155E509DEE59A7F451EB35324D8F8F2DF6E3708894740F98FDEE23889F4DE5ADB0C5010DFB555CDA77C8AB5DC902094C52DE3278F35A75EBC25F093A" + ), + ] + ) + assert result.column(3) == pa.array( + [ + b( + "F73A5FBF881F89B814871F46E26AD3FA37CB2921C5E8561618639015B3CCBB71" + ), + b( + "B792A0383FB9E7A189EC150686579532854E44B71AC394831DAED169BA85CCC5" + ), + b( + "27988A0E51812297C77A433F635233346AEE29A829DCF4F46E0F58F402C6CFCB" + ), + ] + ) + assert result.column(4) == pa.array( + [ + b( + "FBC2B0516EE8744D293B980779178A3508850FDCFE965985782C39601B65794F" + ), + b( + "BF73D18575A736E4037D45F9E316085B86C19BE6363DE6AA789E13DEAACC1C4E" + ), + b( + "C8D11B9F7237E4034ADBCD2005735F9BC4C597C75AD89F4492BEC8F77D15F7EB" + ), + ] + ) From 927ae16c7a61727e55421ed604a2a780bf5eecdb Mon Sep 17 00:00:00 2001 From: Jiayu Liu Date: Sat, 16 Oct 2021 18:17:26 +0800 Subject: [PATCH 3/3] ignore long lines --- .github/workflows/python_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_test.yaml b/.github/workflows/python_test.yaml index 5419adb38828..17e7abb8b91f 100644 --- a/.github/workflows/python_test.yaml +++ b/.github/workflows/python_test.yaml @@ -49,7 +49,7 @@ jobs: - name: Run Linters run: | source venv/bin/activate - flake8 python + flake8 python --ignore=E501 black --line-length 79 --check python - name: Run tests run: |