From 56fd23366a536c0b47805a1787b139d887c3af58 Mon Sep 17 00:00:00 2001
From: Chong Gao <res_life@163.com>
Date: Tue, 28 Dec 2021 18:36:32 +0800
Subject: [PATCH 1/4] Support max on single-level struct in aggregation context

Signed-off-by: Chong Gao <res_life@163.com>
---
 docs/supported_ops.md                         | 12 ++++++------
 .../src/main/python/hash_aggregate_test.py    | 19 +++++++++++++++++++
 .../nvidia/spark/rapids/GpuOverrides.scala    |  8 ++++++--
 3 files changed, 31 insertions(+), 8 deletions(-)
diff --git a/docs/supported_ops.md b/docs/supported_ops.md
index dc2d68ef73f..f1b250ec0e7 100644
--- a/docs/supported_ops.md
+++ b/docs/supported_ops.md
@@ -15141,7 +15141,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
@@ -15162,7 +15162,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
@@ -15184,7 +15184,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
@@ -15205,7 +15205,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
@@ -15227,7 +15227,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
@@ -15248,7 +15248,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index 9aad3638dc3..a3b56d01daf 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -1695,3 +1695,22 @@ def test_groupby_std_variance_partial_replace_fallback(data_gen,
         exist_classes=','.join(exist_clz),
         non_exist_classes=','.join(non_exist_clz),
         conf=local_conf)
+
+@ignore_order
+@pytest.mark.parametrize('data_type',  all_gen + [NullGen()], ids=idfn)
+def test_max_single_level_struct(data_type):
+    data_gen = [
+        ('a', StructGen([
+                ('aa', data_type),
+                ('ab', data_type)])),
+        ('b', IntegerGen())]
+    assert_gpu_and_cpu_are_equal_sql(
+        lambda spark : gen_df(spark, data_gen, length=1024),
+        "hash_agg_table",
+        'select b, max(a) from hash_agg_table group by b',
+        _no_nans_float_conf)
+    assert_gpu_and_cpu_are_equal_sql(
+        lambda spark : gen_df(spark, data_gen, length=1024),
+        "hash_agg_table",
+        'select max(a) from hash_agg_table',
+        _no_nans_float_conf)
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
index 5ba4c9b5b7f..ec4b907729a 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -2236,9 +2236,13 @@ object GpuOverrides extends Logging {
     expr[Max](
       "Max aggregate operator",
       ExprChecks.fullAgg(
-        TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL, TypeSig.orderable,
+        // Max supports single level struct, e.g.:  max(struct(string, string))
+        (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT)
+          .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL),
+        TypeSig.orderable,
         Seq(ParamCheck("input",
-          (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL)
+          (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT)
+            .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL)
               .withPsNote(TypeEnum.DOUBLE, nanAggPsNote)
               .withPsNote(TypeEnum.FLOAT, nanAggPsNote),
           TypeSig.orderable))

From 8c5e6de7113234e5eb23d16ab7ff0811a7262dee Mon Sep 17 00:00:00 2001
From: Chong Gao <res_life@163.com>
Date: Fri, 31 Dec 2021 11:01:33 +0800
Subject: [PATCH 2/4] Refactor

Signed-off-by: Chong Gao <res_life@163.com>
---
 .../src/main/python/hash_aggregate_test.py         | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index a3b56d01daf..7b05d506e23 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -1697,20 +1697,20 @@ def test_groupby_std_variance_partial_replace_fallback(data_gen,
         conf=local_conf)
 
 @ignore_order
-@pytest.mark.parametrize('data_type',  all_gen + [NullGen()], ids=idfn)
-def test_max_single_level_struct(data_type):
-    data_gen = [
+@pytest.mark.parametrize('data_gen',  all_gen + [NullGen()], ids=idfn)
+def test_max_single_level_struct(data_gen):
+    df_gen = [
         ('a', StructGen([
-                ('aa', data_type),
-                ('ab', data_type)])),
+                ('aa', data_gen),
+                ('ab', data_gen)])),
         ('b', IntegerGen())]
     assert_gpu_and_cpu_are_equal_sql(
-        lambda spark : gen_df(spark, data_gen, length=1024),
+        lambda spark : gen_df(spark, df_gen, length=1024),
         "hash_agg_table",
         'select b, max(a) from hash_agg_table group by b',
         _no_nans_float_conf)
     assert_gpu_and_cpu_are_equal_sql(
-        lambda spark : gen_df(spark, data_gen, length=1024),
+        lambda spark : gen_df(spark, df_gen, length=1024),
         "hash_agg_table",
         'select max(a) from hash_agg_table',
         _no_nans_float_conf)

From fbb40d89f9755c7bb2fb9e59195e203c455a808d Mon Sep 17 00:00:00 2001
From: Chong Gao <res_life@163.com>
Date: Thu, 6 Jan 2022 18:17:36 +0800
Subject: [PATCH 3/4] Support min and max on single-level

Signed-off-by: Chong Gao <res_life@163.com>
---
 docs/supported_ops.md                         | 12 ++---
 .../src/main/python/hash_aggregate_test.py    | 42 ++++++++++++---
 .../nvidia/spark/rapids/GpuOverrides.scala    | 54 +++++++++++++------
 3 files changed, 79 insertions(+), 29 deletions(-)

diff --git a/docs/supported_ops.md b/docs/supported_ops.md
index f1b250ec0e7..d9a89142756 100644
--- a/docs/supported_ops.md
+++ b/docs/supported_ops.md
@@ -15227,7 +15227,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
+<td><b>NS</b></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
@@ -15248,7 +15248,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
+<td><b>NS</b></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
@@ -15300,7 +15300,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
@@ -15321,7 +15321,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
@@ -15343,7 +15343,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
@@ -15364,7 +15364,7 @@ are limited.
 <td><b>NS</b></td>
 <td><b>NS</b></td>
 <td> </td>
-<td><b>NS</b></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, ARRAY, STRUCT, UDT</em></td>
 <td><b>NS</b></td>
 </tr>
 <tr>
diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index 7b05d506e23..41e5a2d0473 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -1696,21 +1696,49 @@ def test_groupby_std_variance_partial_replace_fallback(data_gen,
         non_exist_classes=','.join(non_exist_clz),
         conf=local_conf)
 
-@ignore_order
-@pytest.mark.parametrize('data_gen',  all_gen + [NullGen()], ids=idfn)
-def test_max_single_level_struct(data_gen):
+#
+# For max value of (2147483647,-930759675) and (2147483647,None), GPU returns (2147483647,None), but CPU returns (2147483647,-930759675)
+# This should be a bug of CUDF, is checking with CUDF team: https://github.com/rapidsai/cudf/issues/8974#issuecomment-1006400752
+# Just add nullable = False to make cases passing.
+# TODO if above bug fixed, should remove nullable = False
+#
+gens_for_max_min_test = [ByteGen(nullable = False), ShortGen(nullable = False),
+    IntegerGen(nullable = False), LongGen(nullable = False), FloatGen(nullable = False, no_nans = True),
+    DoubleGen(nullable = False, no_nans = True), StringGen(nullable = False), BooleanGen(nullable = False),
+    DateGen(nullable = False), TimestampGen(nullable = False),
+    DecimalGen(precision=12, scale=2, nullable = False),
+    DecimalGen(precision=36, scale=5, nullable = False),
+    null_gen]
+
+@ignore_order(local=True)
+@pytest.mark.parametrize('data_gen',  gens_for_max_min_test, ids=idfn)
+def test_min_max_for_single_level_struct(data_gen):
     df_gen = [
         ('a', StructGen([
                 ('aa', data_gen),
                 ('ab', data_gen)])),
-        ('b', IntegerGen())]
+        ('b', RepeatSeqGen(IntegerGen(), length=20))]
+
+    # test max
     assert_gpu_and_cpu_are_equal_sql(
-        lambda spark : gen_df(spark, df_gen, length=1024),
+        lambda spark : gen_df(spark, df_gen),
         "hash_agg_table",
         'select b, max(a) from hash_agg_table group by b',
         _no_nans_float_conf)
     assert_gpu_and_cpu_are_equal_sql(
-        lambda spark : gen_df(spark, df_gen, length=1024),
+        lambda spark : gen_df(spark, df_gen),
         "hash_agg_table",
         'select max(a) from hash_agg_table',
         _no_nans_float_conf)
+
+    # test min
+    assert_gpu_and_cpu_are_equal_sql(
+        lambda spark : gen_df(spark, df_gen, length=1024),
+        "hash_agg_table",
+        'select b, min(a) from hash_agg_table group by b',
+        _no_nans_float_conf)
+    assert_gpu_and_cpu_are_equal_sql(
+        lambda spark : gen_df(spark, df_gen, length=1024),
+        "hash_agg_table",
+        'select min(a) from hash_agg_table',
+        _no_nans_float_conf)
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
index ec4b907729a..4ba17c66390 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -2235,18 +2235,27 @@ object GpuOverrides extends Logging {
       }),
     expr[Max](
       "Max aggregate operator",
-      ExprChecks.fullAgg(
-        // Max supports single level struct, e.g.:  max(struct(string, string))
-        (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT)
-          .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL),
-        TypeSig.orderable,
-        Seq(ParamCheck("input",
+      ExprChecksImpl(
+        ExprChecks.reductionAndGroupByAgg(
+          // Max supports single level struct, e.g.:  max(struct(string, string))
           (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT)
-            .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL)
+            .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL),
+          TypeSig.orderable,
+          Seq(ParamCheck("input",
+            (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT)
+              .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL)
               .withPsNote(TypeEnum.DOUBLE, nanAggPsNote)
               .withPsNote(TypeEnum.FLOAT, nanAggPsNote),
-          TypeSig.orderable))
-      ),
+            TypeSig.orderable))).asInstanceOf[ExprChecksImpl].contexts
+          ++
+          ExprChecks.windowOnly(
+            (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL),
+            TypeSig.orderable,
+            Seq(ParamCheck("input",
+              (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL)
+                .withPsNote(TypeEnum.DOUBLE, nanAggPsNote)
+                .withPsNote(TypeEnum.FLOAT, nanAggPsNote),
+              TypeSig.orderable))).asInstanceOf[ExprChecksImpl].contexts),
       (max, conf, p, r) => new AggExprMeta[Max](max, conf, p, r) {
         override def tagAggForGpu(): Unit = {
           val dataType = max.child.dataType
@@ -2261,14 +2270,27 @@ object GpuOverrides extends Logging {
       }),
     expr[Min](
       "Min aggregate operator",
-      ExprChecks.fullAgg(
-        TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL, TypeSig.orderable,
-        Seq(ParamCheck("input",
-          (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL)
+      ExprChecksImpl(
+        ExprChecks.reductionAndGroupByAgg(
+          // Min supports single level struct, e.g.:  max(struct(string, string))
+          (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT)
+            .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL),
+          TypeSig.orderable,
+          Seq(ParamCheck("input",
+            (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL + TypeSig.STRUCT)
+              .nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL)
               .withPsNote(TypeEnum.DOUBLE, nanAggPsNote)
               .withPsNote(TypeEnum.FLOAT, nanAggPsNote),
-          TypeSig.orderable))
-      ),
+            TypeSig.orderable))).asInstanceOf[ExprChecksImpl].contexts
+          ++
+          ExprChecks.windowOnly(
+            (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL),
+            TypeSig.orderable,
+            Seq(ParamCheck("input",
+              (TypeSig.commonCudfTypes + TypeSig.DECIMAL_128_FULL + TypeSig.NULL)
+                .withPsNote(TypeEnum.DOUBLE, nanAggPsNote)
+                .withPsNote(TypeEnum.FLOAT, nanAggPsNote),
+              TypeSig.orderable))).asInstanceOf[ExprChecksImpl].contexts),
       (a, conf, p, r) => new AggExprMeta[Min](a, conf, p, r) {
         override def tagAggForGpu(): Unit = {
           val dataType = a.child.dataType

From 908097c2da997268667603cac9857e1721739cb6 Mon Sep 17 00:00:00 2001
From: Chong Gao <res_life@163.com>
Date: Thu, 13 Jan 2022 13:21:19 +0800
Subject: [PATCH 4/4] Update test case after Cudf fixed bug about null

Signed-off-by: Chong Gao <res_life@163.com>
---
 .../src/main/python/hash_aggregate_test.py    | 20 ++++++++-----------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index 41e5a2d0473..265cd35772f 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -1697,21 +1697,17 @@ def test_groupby_std_variance_partial_replace_fallback(data_gen,
         conf=local_conf)
 
 #
-# For max value of (2147483647,-930759675) and (2147483647,None), GPU returns (2147483647,None), but CPU returns (2147483647,-930759675)
-# This should be a bug of CUDF, is checking with CUDF team: https://github.com/rapidsai/cudf/issues/8974#issuecomment-1006400752
-# Just add nullable = False to make cases passing.
-# TODO if above bug fixed, should remove nullable = False
+# test min max on single level structure
 #
-gens_for_max_min_test = [ByteGen(nullable = False), ShortGen(nullable = False),
-    IntegerGen(nullable = False), LongGen(nullable = False), FloatGen(nullable = False, no_nans = True),
-    DoubleGen(nullable = False, no_nans = True), StringGen(nullable = False), BooleanGen(nullable = False),
-    DateGen(nullable = False), TimestampGen(nullable = False),
-    DecimalGen(precision=12, scale=2, nullable = False),
-    DecimalGen(precision=36, scale=5, nullable = False),
+gens_for_max_min = [byte_gen, short_gen, int_gen, long_gen,
+    FloatGen(no_nans = True), DoubleGen(no_nans = True),
+    string_gen, boolean_gen,
+    date_gen, timestamp_gen,
+    DecimalGen(precision=12, scale=2),
+    DecimalGen(precision=36, scale=5),
     null_gen]
-
 @ignore_order(local=True)
-@pytest.mark.parametrize('data_gen',  gens_for_max_min_test, ids=idfn)
+@pytest.mark.parametrize('data_gen',  gens_for_max_min, ids=idfn)
 def test_min_max_for_single_level_struct(data_gen):
     df_gen = [
         ('a', StructGen([