diff --git a/docs/supported_ops.md b/docs/supported_ops.md index 2eb45dbef5b..db228cb9385 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -354,9 +354,9 @@ Accelerator supports are described below. S NS NS +PS* (missing nested BINARY, CALENDAR, MAP, UDT) NS -NS -NS +PS* (missing nested BINARY, CALENDAR, MAP, UDT) NS @@ -538,9 +538,9 @@ Accelerator supports are described below. S NS NS +PS* (missing nested BINARY, CALENDAR, MAP, UDT) NS -NS -NS +PS* (missing nested BINARY, CALENDAR, MAP, UDT) NS diff --git a/integration_tests/src/main/python/array_test.py b/integration_tests/src/main/python/array_test.py index 06c78654a9e..412f0c56ac4 100644 --- a/integration_tests/src/main/python/array_test.py +++ b/integration_tests/src/main/python/array_test.py @@ -14,11 +14,9 @@ import pytest -from asserts import assert_gpu_and_cpu_are_equal_collect +from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_are_equal_sql from data_gen import * -from marks import incompat from pyspark.sql.types import * -import pyspark.sql.functions as f # Once we support arrays as literals then we can support a[null] and # negative indexes for all array gens. When that happens @@ -55,3 +53,31 @@ def test_make_array(data_gen): lambda spark : binary_op_df(spark, data_gen).selectExpr( 'array(a, b)', 'array(b, a, null, {}, {})'.format(s1, s2))) + + +@pytest.mark.parametrize('data_gen', single_level_array_gens, ids=idfn) +def test_orderby_array(data_gen): + assert_gpu_and_cpu_are_equal_sql( + lambda spark : unary_op_df(spark, data_gen), + 'array_table', + 'select array_table.a, array_table.a[0] as first_val from array_table order by first_val', + conf=allow_negative_scale_of_decimal_conf) + + +@pytest.mark.parametrize('data_gen', [ArrayGen(ArrayGen(short_gen, max_length=10), max_length=10), + ArrayGen(ArrayGen(string_gen, max_length=10), max_length=10)], ids=idfn) +def test_orderby_array_of_arrays(data_gen): + assert_gpu_and_cpu_are_equal_sql( + lambda spark : unary_op_df(spark, data_gen), + 'array_table', + 'select array_table.a, array_table.a[0][0] as first_val from array_table order by first_val') + + +@pytest.mark.parametrize('data_gen', [ArrayGen(StructGen([['child0', byte_gen], + ['child1', string_gen], + ['child2', float_gen]]))], ids=idfn) +def test_orderby_array_of_structs(data_gen): + assert_gpu_and_cpu_are_equal_sql( + lambda spark : unary_op_df(spark, data_gen), + 'array_table', + 'select array_table.a, array_table.a[0].child0 as first_val from array_table order by first_val') \ No newline at end of file diff --git a/integration_tests/src/main/python/struct_test.py b/integration_tests/src/main/python/struct_test.py index 004550049cd..bde3b4992e4 100644 --- a/integration_tests/src/main/python/struct_test.py +++ b/integration_tests/src/main/python/struct_test.py @@ -14,11 +14,9 @@ import pytest -from asserts import assert_gpu_and_cpu_are_equal_collect +from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_are_equal_sql from data_gen import * -from marks import incompat from pyspark.sql.types import * -import pyspark.sql.functions as f @pytest.mark.parametrize('data_gen', [StructGen([["first", boolean_gen], ["second", byte_gen], ["third", float_gen]]), StructGen([["first", short_gen], ["second", int_gen], ["third", long_gen]]), @@ -32,6 +30,7 @@ def test_struct_get_item(data_gen): 'a.second', 'a.third')) + @pytest.mark.parametrize('data_gen', all_basic_gens + [decimal_gen_default, decimal_gen_scale_precision], ids=idfn) def test_make_struct(data_gen): assert_gpu_and_cpu_are_equal_collect( @@ -39,3 +38,21 @@ def test_make_struct(data_gen): 'struct(a, b)', 'named_struct("foo", b, "bar", 5, "end", a)')) + +@pytest.mark.parametrize('data_gen', [StructGen([["first", boolean_gen], ["second", byte_gen], ["third", float_gen]]), + StructGen([["first", short_gen], ["second", int_gen], ["third", long_gen]]), + StructGen([["first", long_gen], ["second", long_gen], ["third", long_gen]]), + StructGen([["first", string_gen], ["second", ArrayGen(string_gen)], ["third", ArrayGen(string_gen)]])], ids=idfn) +def test_orderby_struct(data_gen): + assert_gpu_and_cpu_are_equal_sql( + lambda spark : unary_op_df(spark, data_gen), + 'struct_table', + 'select struct_table.a, struct_table.a.first as val from struct_table order by val') + + +@pytest.mark.parametrize('data_gen', [StructGen([["first", string_gen], ["second", ArrayGen(string_gen)], ["third", ArrayGen(string_gen)]])], ids=idfn) +def test_orderby_struct_2(data_gen): + assert_gpu_and_cpu_are_equal_sql( + lambda spark : unary_op_df(spark, data_gen), + 'struct_table', + 'select struct_table.a, struct_table.a.second[0] as val from struct_table order by val') \ No newline at end of file diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 841d6ca1e73..dff2c9dc905 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -2335,7 +2335,8 @@ object GpuOverrides { }), exec[ShuffleExchangeExec]( "The backend for most data being exchanged between processes", - ExecChecks(TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL, TypeSig.all), + ExecChecks((TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL + TypeSig.ARRAY + + TypeSig.STRUCT).nested(), TypeSig.all), (shuffle, conf, p, r) => new GpuShuffleMeta(shuffle, conf, p, r)), exec[UnionExec]( "The backend for the union operator", @@ -2386,7 +2387,10 @@ object GpuOverrides { (agg, conf, p, r) => new GpuSortAggregateMeta(agg, conf, p, r)), exec[SortExec]( "The backend for the sort operator", - ExecChecks(TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL, TypeSig.all), + // The SortOrder TypeSig will govern what types can actually be used as sorting key data type. + // The types below are allowed as inputs and outputs. + ExecChecks((TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL + TypeSig.ARRAY + + TypeSig.STRUCT).nested(), TypeSig.all), (sort, conf, p, r) => new GpuSortMeta(sort, conf, p, r)), exec[ExpandExec]( "The backend for the expand operator",