From 56fd23366a536c0b47805a1787b139d887c3af58 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Tue, 28 Dec 2021 18:36:32 +0800 Subject: [PATCH 1/4] Support max on single-level struct in aggregation context Signed-off-by: Chong Gao --- docs/supported_ops.md | 12 ++++++------ .../src/main/python/hash_aggregate_test.py | 19 +++++++++++++++++++ .../nvidia/spark/rapids/GpuOverrides.scala | 8 ++++++-- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/docs/supported_ops.md b/docs/supported_ops.md index dc2d68ef73f..f1b250ec0e7 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -15141,7 +15141,7 @@ are limited. NS NS -NS +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
NS @@ -15162,7 +15162,7 @@ are limited. NS NS -NS +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
NS @@ -15184,7 +15184,7 @@ are limited. NS NS -NS +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
NS @@ -15205,7 +15205,7 @@ are limited. NS NS -NS +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
NS @@ -15227,7 +15227,7 @@ are limited. NS NS -NS +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
NS @@ -15248,7 +15248,7 @@ are limited. NS NS -NS +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
NS diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py index 9aad3638dc3..a3b56d01daf 100644 --- a/integration_tests/src/main/python/hash_aggregate_test.py +++ b/integration_tests/src/main/python/hash_aggregate_test.py @@ -1695,3 +1695,22 @@ def test_groupby_std_variance_partial_replace_fallback(data_gen, exist_classes=','.join(exist_clz), non_exist_classes=','.join(non_exist_clz), conf=local_conf) + +@ignore_order +@pytest.mark.parametrize('data_type', all_gen + [NullGen()], ids=idfn) +def test_max_single_level_struct(data_type): + data_gen = [ + ('a', StructGen([ + ('aa', data_type), + ('ab', data_type)])), + ('b', IntegerGen())] + assert_gpu_and_cpu_are_equal_sql( + lambda spark : gen_df(spark, data_gen, length=1024), + "hash_agg_table", + 'select b, max(a) from hash_agg_table group by b', + _no_nans_float_conf) + assert_gpu_and_cpu_are_equal_sql( + lambda spark : gen_df(spark, data_gen, length=1024), + "hash_agg_table", + 'select max(a) from hash_agg_table', + _no_nans_float_conf) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 5ba4c9b5b7f..ec4b907729a 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -2236,9 +2236,13 @@ object GpuOverrides extends Logging { expr[Max]( "Max aggregate operator", ExprChecks.fullAgg( - TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL, TypeSig.orderable, + // Max supports single level struct, e.g.: max(struct(string, string)) + (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT) + .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL), + TypeSig.orderable, Seq(ParamCheck("input", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL) + (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT) + .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL) .withPsNote(TypeEnum.DOUBLE, nanAggPsNote) .withPsNote(TypeEnum.FLOAT, nanAggPsNote), TypeSig.orderable)) From 8c5e6de7113234e5eb23d16ab7ff0811a7262dee Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Fri, 31 Dec 2021 11:01:33 +0800 Subject: [PATCH 2/4] Refactor Signed-off-by: Chong Gao --- .../src/main/python/hash_aggregate_test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py index a3b56d01daf..7b05d506e23 100644 --- a/integration_tests/src/main/python/hash_aggregate_test.py +++ b/integration_tests/src/main/python/hash_aggregate_test.py @@ -1697,20 +1697,20 @@ def test_groupby_std_variance_partial_replace_fallback(data_gen, conf=local_conf) @ignore_order -@pytest.mark.parametrize('data_type', all_gen + [NullGen()], ids=idfn) -def test_max_single_level_struct(data_type): - data_gen = [ +@pytest.mark.parametrize('data_gen', all_gen + [NullGen()], ids=idfn) +def test_max_single_level_struct(data_gen): + df_gen = [ ('a', StructGen([ - ('aa', data_type), - ('ab', data_type)])), + ('aa', data_gen), + ('ab', data_gen)])), ('b', IntegerGen())] assert_gpu_and_cpu_are_equal_sql( - lambda spark : gen_df(spark, data_gen, length=1024), + lambda spark : gen_df(spark, df_gen, length=1024), "hash_agg_table", 'select b, max(a) from hash_agg_table group by b', _no_nans_float_conf) assert_gpu_and_cpu_are_equal_sql( - lambda spark : gen_df(spark, data_gen, length=1024), + lambda spark : gen_df(spark, df_gen, length=1024), "hash_agg_table", 'select max(a) from hash_agg_table', _no_nans_float_conf) From fbb40d89f9755c7bb2fb9e59195e203c455a808d Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 6 Jan 2022 18:17:36 +0800 Subject: [PATCH 3/4] Support min and max on single-level Signed-off-by: Chong Gao --- docs/supported_ops.md | 12 ++--- .../src/main/python/hash_aggregate_test.py | 42 ++++++++++++--- .../nvidia/spark/rapids/GpuOverrides.scala | 54 +++++++++++++------ 3 files changed, 79 insertions(+), 29 deletions(-) diff --git a/docs/supported_ops.md b/docs/supported_ops.md index f1b250ec0e7..d9a89142756 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -15227,7 +15227,7 @@ are limited. NS NS -PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
+NS NS @@ -15248,7 +15248,7 @@ are limited. NS NS -PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
+NS NS @@ -15300,7 +15300,7 @@ are limited. NS NS -NS +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
NS @@ -15321,7 +15321,7 @@ are limited. NS NS -NS +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
NS @@ -15343,7 +15343,7 @@ are limited. NS NS -NS +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
NS @@ -15364,7 +15364,7 @@ are limited. NS NS -NS +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT
NS diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py index 7b05d506e23..41e5a2d0473 100644 --- a/integration_tests/src/main/python/hash_aggregate_test.py +++ b/integration_tests/src/main/python/hash_aggregate_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -1696,21 +1696,49 @@ def test_groupby_std_variance_partial_replace_fallback(data_gen, non_exist_classes=','.join(non_exist_clz), conf=local_conf) -@ignore_order -@pytest.mark.parametrize('data_gen', all_gen + [NullGen()], ids=idfn) -def test_max_single_level_struct(data_gen): +# +# For max value of (2147483647,-930759675) and (2147483647,None), GPU returns (2147483647,None), but CPU returns (2147483647,-930759675) +# This should be a bug of CUDF, is checking with CUDF team: https://github.com/rapidsai/cudf/issues/8974#issuecomment-1006400752 +# Just add nullable = False to make cases passing. +# TODO if above bug fixed, should remove nullable = False +# +gens_for_max_min_test = [ByteGen(nullable = False), ShortGen(nullable = False), + IntegerGen(nullable = False), LongGen(nullable = False), FloatGen(nullable = False, no_nans = True), + DoubleGen(nullable = False, no_nans = True), StringGen(nullable = False), BooleanGen(nullable = False), + DateGen(nullable = False), TimestampGen(nullable = False), + DecimalGen(precision=12, scale=2, nullable = False), + DecimalGen(precision=36, scale=5, nullable = False), + null_gen] + +@ignore_order(local=True) +@pytest.mark.parametrize('data_gen', gens_for_max_min_test, ids=idfn) +def test_min_max_for_single_level_struct(data_gen): df_gen = [ ('a', StructGen([ ('aa', data_gen), ('ab', data_gen)])), - ('b', IntegerGen())] + ('b', RepeatSeqGen(IntegerGen(), length=20))] + + # test max assert_gpu_and_cpu_are_equal_sql( - lambda spark : gen_df(spark, df_gen, length=1024), + lambda spark : gen_df(spark, df_gen), "hash_agg_table", 'select b, max(a) from hash_agg_table group by b', _no_nans_float_conf) assert_gpu_and_cpu_are_equal_sql( - lambda spark : gen_df(spark, df_gen, length=1024), + lambda spark : gen_df(spark, df_gen), "hash_agg_table", 'select max(a) from hash_agg_table', _no_nans_float_conf) + + # test min + assert_gpu_and_cpu_are_equal_sql( + lambda spark : gen_df(spark, df_gen, length=1024), + "hash_agg_table", + 'select b, min(a) from hash_agg_table group by b', + _no_nans_float_conf) + assert_gpu_and_cpu_are_equal_sql( + lambda spark : gen_df(spark, df_gen, length=1024), + "hash_agg_table", + 'select min(a) from hash_agg_table', + _no_nans_float_conf) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index ec4b907729a..4ba17c66390 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -2235,18 +2235,27 @@ object GpuOverrides extends Logging { }), expr[Max]( "Max aggregate operator", - ExprChecks.fullAgg( - // Max supports single level struct, e.g.: max(struct(string, string)) - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT) - .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL), - TypeSig.orderable, - Seq(ParamCheck("input", + ExprChecksImpl( + ExprChecks.reductionAndGroupByAgg( + // Max supports single level struct, e.g.: max(struct(string, string)) (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT) - .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL) + .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL), + TypeSig.orderable, + Seq(ParamCheck("input", + (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT) + .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL) .withPsNote(TypeEnum.DOUBLE, nanAggPsNote) .withPsNote(TypeEnum.FLOAT, nanAggPsNote), - TypeSig.orderable)) - ), + TypeSig.orderable))).asInstanceOf[ExprChecksImpl].contexts + ++ + ExprChecks.windowOnly( + (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL), + TypeSig.orderable, + Seq(ParamCheck("input", + (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL) + .withPsNote(TypeEnum.DOUBLE, nanAggPsNote) + .withPsNote(TypeEnum.FLOAT, nanAggPsNote), + TypeSig.orderable))).asInstanceOf[ExprChecksImpl].contexts), (max, conf, p, r) => new AggExprMeta[Max](max, conf, p, r) { override def tagAggForGpu(): Unit = { val dataType = max.child.dataType @@ -2261,14 +2270,27 @@ object GpuOverrides extends Logging { }), expr[Min]( "Min aggregate operator", - ExprChecks.fullAgg( - TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL, TypeSig.orderable, - Seq(ParamCheck("input", - (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL) + ExprChecksImpl( + ExprChecks.reductionAndGroupByAgg( + // Min supports single level struct, e.g.: max(struct(string, string)) + (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT) + .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL), + TypeSig.orderable, + Seq(ParamCheck("input", + (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT) + .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL) .withPsNote(TypeEnum.DOUBLE, nanAggPsNote) .withPsNote(TypeEnum.FLOAT, nanAggPsNote), - TypeSig.orderable)) - ), + TypeSig.orderable))).asInstanceOf[ExprChecksImpl].contexts + ++ + ExprChecks.windowOnly( + (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL), + TypeSig.orderable, + Seq(ParamCheck("input", + (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL) + .withPsNote(TypeEnum.DOUBLE, nanAggPsNote) + .withPsNote(TypeEnum.FLOAT, nanAggPsNote), + TypeSig.orderable))).asInstanceOf[ExprChecksImpl].contexts), (a, conf, p, r) => new AggExprMeta[Min](a, conf, p, r) { override def tagAggForGpu(): Unit = { val dataType = a.child.dataType From 908097c2da997268667603cac9857e1721739cb6 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 13 Jan 2022 13:21:19 +0800 Subject: [PATCH 4/4] Update test case after Cudf fixed bug about null Signed-off-by: Chong Gao --- .../src/main/python/hash_aggregate_test.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py index 41e5a2d0473..265cd35772f 100644 --- a/integration_tests/src/main/python/hash_aggregate_test.py +++ b/integration_tests/src/main/python/hash_aggregate_test.py @@ -1697,21 +1697,17 @@ def test_groupby_std_variance_partial_replace_fallback(data_gen, conf=local_conf) # -# For max value of (2147483647,-930759675) and (2147483647,None), GPU returns (2147483647,None), but CPU returns (2147483647,-930759675) -# This should be a bug of CUDF, is checking with CUDF team: https://github.com/rapidsai/cudf/issues/8974#issuecomment-1006400752 -# Just add nullable = False to make cases passing. -# TODO if above bug fixed, should remove nullable = False +# test min max on single level structure # -gens_for_max_min_test = [ByteGen(nullable = False), ShortGen(nullable = False), - IntegerGen(nullable = False), LongGen(nullable = False), FloatGen(nullable = False, no_nans = True), - DoubleGen(nullable = False, no_nans = True), StringGen(nullable = False), BooleanGen(nullable = False), - DateGen(nullable = False), TimestampGen(nullable = False), - DecimalGen(precision=12, scale=2, nullable = False), - DecimalGen(precision=36, scale=5, nullable = False), +gens_for_max_min = [byte_gen, short_gen, int_gen, long_gen, + FloatGen(no_nans = True), DoubleGen(no_nans = True), + string_gen, boolean_gen, + date_gen, timestamp_gen, + DecimalGen(precision=12, scale=2), + DecimalGen(precision=36, scale=5), null_gen] - @ignore_order(local=True) -@pytest.mark.parametrize('data_gen', gens_for_max_min_test, ids=idfn) +@pytest.mark.parametrize('data_gen', gens_for_max_min, ids=idfn) def test_min_max_for_single_level_struct(data_gen): df_gen = [ ('a', StructGen([