From 6c1562c52765ec039673364844b95ba07137586b Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Thu, 25 Mar 2021 13:05:37 -0500 Subject: [PATCH 1/2] Add in support for murmur3 hashing of structs Signed-off-by: Robert (Bobby) Evans --- docs/supported_ops.md | 2 +- integration_tests/src/main/python/repart_test.py | 1 + .../main/scala/com/nvidia/spark/rapids/GpuOverrides.scala | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/supported_ops.md b/docs/supported_ops.md index d39cf62d1e0..650eb3a091b 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -9730,7 +9730,7 @@ Accelerator support is described below. NS NS NS -NS +PS* (missing nested BINARY, CALENDAR, ARRAY, MAP, UDT) NS diff --git a/integration_tests/src/main/python/repart_test.py b/integration_tests/src/main/python/repart_test.py index 7225ab451a1..e77ce88fc80 100644 --- a/integration_tests/src/main/python/repart_test.py +++ b/integration_tests/src/main/python/repart_test.py @@ -92,6 +92,7 @@ def test_repartion_df(num_parts, length): ([('a', decimal_gen_64bit)], ['a']), ([('a', string_gen)], ['a']), ([('a', null_gen)], ['a']), + ([('a', StructGen([('c0', boolean_gen), ('c1', StructGen([('cc0', boolean_gen), ('cc1', string_gen)]))]))], ['a']), ([('a', byte_gen)], [f.col('a') - 5]), ([('a', long_gen)], [f.col('a') + 15]), ([('a', byte_gen), ('b', boolean_gen)], ['a', 'b']), diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 4902fecfe73..317fc3bd013 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -2312,7 +2312,7 @@ object GpuOverrides { "Murmur3 hash operator", ExprChecks.projectNotLambda(TypeSig.INT, TypeSig.INT, repeatingParamCheck = Some(RepeatingParamCheck("input", - TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL, + (TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL + TypeSig.STRUCT).nested(), TypeSig.all))), (a, conf, p, r) => new ExprMeta[Murmur3Hash](a, conf, p, r) { override val childExprs: Seq[BaseExprMeta[_]] = a.children @@ -2484,7 +2484,8 @@ object GpuOverrides { // TODO In 0.5 we should make the checks self documenting, and look more like what // SparkPlan and Expression support // https://github.com/NVIDIA/spark-rapids/issues/1915 - val sig = TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL + val sig = (TypeSig.commonCudfTypes + TypeSig.NULL + + TypeSig.DECIMAL + TypeSig.STRUCT).nested() hp.children.foreach { child => sig.tagExprParam(this, child, "hash_key") } From cf3d776c6afdeab9e05840e61b3f6adbbdb94361 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Thu, 15 Apr 2021 11:33:17 -0500 Subject: [PATCH 2/2] Updated docs --- docs/supported_ops.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/supported_ops.md b/docs/supported_ops.md index 14064cd730f..3874b53a420 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -19925,7 +19925,7 @@ as `a` don't show up in the table. They are controlled by the rules for NS NS NS -NS +PS* (missing nested BINARY, CALENDAR, ARRAY, MAP, UDT) NS