Optimize DECIMAL128 sum aggregations [databricks] (#4688)

* Optimize DECIMAL128 sum aggregations Signed-off-by: Jason Lowe <[email protected]> * Fix regression in window sum * Update for review comments * Explicitly upcast input to avoid libcudf sort-based aggregation issue * Lower batch limit in agg tests to better exercise sort-based aggregations * Remove redundant method override
NVIDIA · Feb 8, 2022 · f3a5cd3 · f3a5cd3
1 parent aa2126d
commit f3a5cd3
Show file tree

Hide file tree

Showing 2 changed files with 382 additions and 294 deletions.
diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -34,7 +34,7 @@
                       }
 
 _no_nans_float_smallbatch_conf = copy_and_update(_no_nans_float_conf,
-        {'spark.rapids.sql.batchSizeBytes' : '1000'})
+        {'spark.rapids.sql.batchSizeBytes' : '250'})
 
 _no_nans_float_conf_partial = copy_and_update(_no_nans_float_conf,
         {'spark.rapids.sql.hashAgg.replaceMode': 'partial'})
@@ -339,7 +339,7 @@ def test_hash_reduction_sum_count_action(data_gen):
 # Make sure that we can do computation in the group by columns
 @ignore_order
 def test_computation_in_grpby_columns():
-    conf = {'spark.rapids.sql.batchSizeBytes' : '1000'}
+    conf = {'spark.rapids.sql.batchSizeBytes' : '250'}
     data_gen = [
             ('a', RepeatSeqGen(StringGen('a{1,20}'), length=50)),
             ('b', short_gen)]