From 3cf5e1d11ff95b670c01f7cffa71412ad5609916 Mon Sep 17 00:00:00 2001 From: Jiayu Liu Date: Sat, 9 Oct 2021 09:50:07 +0800 Subject: [PATCH] add blake2s and blake2b function --- datafusion/Cargo.toml | 3 ++- .../src/physical_plan/crypto_expressions.rs | 24 +++++++++++++++---- datafusion/tests/sql.rs | 12 ++++++++++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml index 327fcd33de95..ea9ca218b017 100644 --- a/datafusion/Cargo.toml +++ b/datafusion/Cargo.toml @@ -39,7 +39,7 @@ path = "src/lib.rs" [features] default = ["crypto_expressions", "regex_expressions", "unicode_expressions"] simd = ["arrow/simd"] -crypto_expressions = ["md-5", "sha2"] +crypto_expressions = ["md-5", "sha2", "blake2"] regex_expressions = ["regex", "lazy_static"] unicode_expressions = ["unicode-segmentation"] # Used for testing ONLY: causes all values to hash to the same value (test for collisions) @@ -64,6 +64,7 @@ tokio-stream = "0.1" log = "^0.4" md-5 = { version = "^0.9.1", optional = true } sha2 = { version = "^0.9.1", optional = true } +blake2 = { version = "^0.9.2", optional = true } ordered-float = "2.0" unicode-segmentation = { version = "^1.7.1", optional = true } regex = { version = "^1.4.3", optional = true } diff --git a/datafusion/src/physical_plan/crypto_expressions.rs b/datafusion/src/physical_plan/crypto_expressions.rs index 8fa9f4493f7a..8c575bcd0264 100644 --- a/datafusion/src/physical_plan/crypto_expressions.rs +++ b/datafusion/src/physical_plan/crypto_expressions.rs @@ -28,8 +28,9 @@ use arrow::{ }, datatypes::DataType, }; +use blake2::{Blake2b, Blake2s, Digest}; use md5::Md5; -use sha2::{Digest as SHA2Digest, Sha224, Sha256, Sha384, Sha512}; +use sha2::{Sha224, Sha256, Sha384, Sha512}; use std::any::type_name; use std::fmt::Write; use std::sync::Arc; @@ -48,6 +49,8 @@ enum DigestAlgorithm { Sha256, Sha384, Sha512, + Blake2s, + Blake2b, } fn digest_process( @@ -112,6 +115,8 @@ impl DigestAlgorithm { Self::Sha256 => digest_to_scalar!(Sha256, value), Self::Sha384 => digest_to_scalar!(Sha384, value), Self::Sha512 => digest_to_scalar!(Sha512, value), + Self::Blake2b => digest_to_scalar!(Blake2b, value), + Self::Blake2s => digest_to_scalar!(Blake2s, value), }) } @@ -135,6 +140,8 @@ impl DigestAlgorithm { Self::Sha256 => digest_to_array!(Sha256, input_value), Self::Sha384 => digest_to_array!(Sha384, input_value), Self::Sha512 => digest_to_array!(Sha512, input_value), + Self::Blake2b => digest_to_array!(Blake2b, input_value), + Self::Blake2s => digest_to_array!(Blake2s, input_value), }; Ok(ColumnarValue::Array(array)) } @@ -155,6 +162,8 @@ impl FromStr for DigestAlgorithm { "sha256" => Self::Sha256, "sha384" => Self::Sha384, "sha512" => Self::Sha512, + "blake2b" => Self::Blake2b, + "blake2s" => Self::Blake2s, _ => { return Err(DataFusionError::Plan(format!( "There is no built-in digest algorithm named {}", @@ -237,24 +246,31 @@ define_digest_function!( Sha224, "computes sha224 hash digest of the given input" ); - define_digest_function!( sha256, Sha256, "computes sha256 hash digest of the given input" ); - define_digest_function!( sha384, Sha384, "computes sha384 hash digest of the given input" ); - define_digest_function!( sha512, Sha512, "computes sha512 hash digest of the given input" ); +define_digest_function!( + blake2b, + Blake2b, + "computes blake2b hash digest of the given input" +); +define_digest_function!( + blake2s, + Blake2s, + "computes blake2s hash digest of the given input" +); /// Digest computes a binary hash of the given data, accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]. /// Second argument is the algorithm to use. diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 4ed07af27e2f..801451f81d86 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -4050,6 +4050,18 @@ async fn test_crypto_expressions() -> Result<()> { test_expression!("digest('','sha512')", "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"); test_expression!("sha512(NULL)", "NULL"); test_expression!("digest(NULL,'sha512')", "NULL"); + test_expression!("digest(NULL,'blake2s')", "NULL"); + test_expression!("digest(NULL,'blake2b')", "NULL"); + test_expression!("digest('','blake2b')", "786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce"); + test_expression!("digest('tom','blake2b')", "482499a18da10a18d8d35ab5eb4c635551ec5b8d3ff37c3e87a632caf6680fe31566417834b4732e26e0203d1cad4f5366cb7ab57d89694e4c1fda3e26af2c23"); + test_expression!( + "digest('','blake2s')", + "69217a3079908094e11121d042354a7c1f55b6482ca1a51e1b250dfd1ed0eef9" + ); + test_expression!( + "digest('tom','blake2s')", + "5fc3f2b3a07cade5023c3df566e4d697d3823ba1b72bfb3e84cf7e768b2e7529" + ); Ok(()) }