Skip to content

Commit

Permalink
Support GpuFirst and GpuLast on nested types under reduction aggregat…
Browse files Browse the repository at this point in the history
…ions (#4337)

Signed-off-by: sperlingxx <[email protected]>
  • Loading branch information
sperlingxx authored Dec 9, 2021
1 parent 61f51ff commit c7483f2
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 49 deletions.
24 changes: 12 additions & 12 deletions docs/supported_ops.md
Original file line number Diff line number Diff line change
Expand Up @@ -14730,9 +14730,9 @@ are limited.
<td>S</td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><b>NS</b></td>
</tr>
<tr>
Expand All @@ -14751,9 +14751,9 @@ are limited.
<td>S</td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><b>NS</b></td>
</tr>
<tr>
Expand Down Expand Up @@ -14863,9 +14863,9 @@ are limited.
<td>S</td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><b>NS</b></td>
</tr>
<tr>
Expand All @@ -14884,9 +14884,9 @@ are limited.
<td>S</td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><b>NS</b></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types BINARY, CALENDAR, UDT</em></td>
<td><b>NS</b></td>
</tr>
<tr>
Expand Down
32 changes: 13 additions & 19 deletions integration_tests/src/main/python/hash_aggregate_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1008,28 +1008,22 @@ def test_count_distinct_with_nan_floats(data_gen):
_nested_gens = array_gens_sample + struct_gens_sample + map_gens_sample

@pytest.mark.parametrize('data_gen', decimal_gens + decimal_128_gens, ids=idfn)
def test_first_last_reductions_extra_types(data_gen):
def test_first_last_reductions_decimal_types(data_gen):
assert_gpu_and_cpu_are_equal_collect(
# Coalesce and sort are to make sure that first and last, which are non-deterministic
# become deterministic
lambda spark : unary_op_df(spark, data_gen)\
.coalesce(1).selectExpr(
'first(a)',
'last(a)'),
conf = allow_negative_scale_of_decimal_conf)
# Coalesce and sort are to make sure that first and last, which are non-deterministic
# become deterministic
lambda spark: unary_op_df(spark, data_gen).coalesce(1).selectExpr(
'first(a)', 'last(a)', 'first(a, true)', 'last(a, true)'),
conf=allow_negative_scale_of_decimal_conf)

# TODO: https://github.com/NVIDIA/spark-rapids/issues/3221
@allow_non_gpu('HashAggregateExec', 'SortAggregateExec',
'ShuffleExchangeExec', 'HashPartitioning',
'AggregateExpression', 'Alias', 'First', 'Last')
@pytest.mark.parametrize('data_gen', _nested_gens, ids=idfn)
def test_first_last_reductions_nested_types_fallback(data_gen):
assert_cpu_and_gpu_are_equal_collect_with_capture(
lambda spark: unary_op_df(spark, data_gen, num_slices=1)\
.selectExpr('first(a)', 'last(a)', 'first(a, True)', 'last(a, True)'),
exist_classes='First,Last',
non_exist_classes='GpuFirst,GpuLast',
conf=allow_negative_scale_of_decimal_conf)
def test_first_last_reductions_nested_types(data_gen):
assert_gpu_and_cpu_are_equal_collect(
# Coalesce and sort are to make sure that first and last, which are non-deterministic
# become deterministic
lambda spark: unary_op_df(spark, data_gen).coalesce(1).selectExpr(
'first(a)', 'last(a)', 'first(a, true)', 'last(a, true)'),
conf=allow_negative_scale_of_decimal_conf)

@pytest.mark.parametrize('data_gen', non_nan_all_basic_gens, ids=idfn)
@pytest.mark.parametrize('parameterless', ['true', pytest.param('false', marks=pytest.mark.xfail(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2294,14 +2294,7 @@ object GpuOverrides extends Logging {
}),
expr[First](
"first aggregate operator", {
val checks = ExprChecks.aggNotWindow(
TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128_FULL, TypeSig.all,
Seq(ParamCheck("input",
TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128_FULL, TypeSig.all))
).asInstanceOf[ExprChecksImpl]
// TODO: support GpuFirst on nested types for reduction
// https://github.com/NVIDIA/spark-rapids/issues/3221
val nestedChecks = ContextChecks(
ExprChecks.aggNotWindow(
(TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP +
TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128_FULL).nested(),
TypeSig.all,
Expand All @@ -2310,7 +2303,6 @@ object GpuOverrides extends Logging {
TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128_FULL).nested(),
TypeSig.all))
)
ExprChecksImpl(checks.contexts ++ Map(GroupByAggExprContext -> nestedChecks))
},
(a, conf, p, r) => new AggExprMeta[First](a, conf, p, r) {
override def convertToGpu(childExprs: Seq[Expression]): GpuExpression =
Expand All @@ -2321,14 +2313,7 @@ object GpuOverrides extends Logging {
}),
expr[Last](
"last aggregate operator", {
val checks = ExprChecks.aggNotWindow(
TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128_FULL, TypeSig.all,
Seq(ParamCheck("input",
TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128_FULL, TypeSig.all))
).asInstanceOf[ExprChecksImpl]
// TODO: support GpuLast on nested types for reduction
// https://github.com/NVIDIA/spark-rapids/issues/3221
val nestedChecks = ContextChecks(
ExprChecks.aggNotWindow(
(TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP +
TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128_FULL).nested(),
TypeSig.all,
Expand All @@ -2337,7 +2322,6 @@ object GpuOverrides extends Logging {
TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128_FULL).nested(),
TypeSig.all))
)
ExprChecksImpl(checks.contexts ++ Map(GroupByAggExprContext -> nestedChecks))
},
(a, conf, p, r) => new AggExprMeta[Last](a, conf, p, r) {
override def convertToGpu(childExprs: Seq[Expression]): GpuExpression =
Expand Down

0 comments on commit c7483f2

Please sign in to comment.