diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index e2974789ea1..e3ef4190fd2 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -88,8 +88,6 @@ std::unique_ptr distinct(table_view const& input, /** * @copydoc cudf::stable_distinct - * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
stable_distinct(table_view const& input, std::vector const& keys, diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp index c386b3a22b4..181af11adb8 100644 --- a/cpp/include/cudf/stream_compaction.hpp +++ b/cpp/include/cudf/stream_compaction.hpp @@ -320,6 +320,7 @@ std::unique_ptr distinct_indices( * @param keep Copy any, first, last, or none of the found duplicates * @param nulls_equal Flag to specify whether null elements should be considered as equal * @param nans_equal Flag to specify whether NaN elements should be considered as equal + * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned table * @return Table with distinct rows, preserving input order */ @@ -329,6 +330,7 @@ std::unique_ptr
stable_distinct( duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/src/stream_compaction/stable_distinct.cu b/cpp/src/stream_compaction/stable_distinct.cu index 27b5a92ab69..074d4fd7d1a 100644 --- a/cpp/src/stream_compaction/stable_distinct.cu +++ b/cpp/src/stream_compaction/stable_distinct.cu @@ -79,11 +79,11 @@ std::unique_ptr
stable_distinct(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::stable_distinct( - input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr); + return detail::stable_distinct(input, keys, keep, nulls_equal, nans_equal, stream, mr); } } // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 9f14455f42d..eef09954647 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -700,6 +700,7 @@ ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_ROLLING_TEST streams/rolling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_STREAM_COMPACTION_TEST streams/stream_compaction_test.cpp STREAM_MODE testing) ConfigureTest( STREAM_STRINGS_TEST streams/strings/case_test.cpp diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp new file mode 100644 index 00000000000..56443870602 --- /dev/null +++ b/cpp/tests/streams/stream_compaction_test.cpp @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +auto constexpr null{0}; // null at current level +auto constexpr XXX{0}; // null pushed down from parent level +auto constexpr NaN = std::numeric_limits::quiet_NaN(); +auto constexpr KEEP_ANY = cudf::duplicate_keep_option::KEEP_ANY; +auto constexpr KEEP_FIRST = cudf::duplicate_keep_option::KEEP_FIRST; +auto constexpr KEEP_LAST = cudf::duplicate_keep_option::KEEP_LAST; +auto constexpr KEEP_NONE = cudf::duplicate_keep_option::KEEP_NONE; +auto constexpr NULL_EQUAL = cudf::null_equality::EQUAL; +auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL; +auto constexpr NAN_EQUAL = cudf::nan_equality::ALL_EQUAL; +auto constexpr NAN_UNEQUAL = cudf::nan_equality::UNEQUAL; + +using int32s_col = cudf::test::fixed_width_column_wrapper; +using floats_col = cudf::test::fixed_width_column_wrapper; + +using cudf::nan_policy; +using cudf::null_equality; +using cudf::null_policy; +using cudf::test::iterators::no_nulls; +using cudf::test::iterators::null_at; +using cudf::test::iterators::nulls_at; + +struct StableDistinctKeepAny : public cudf::test::BaseFixture {}; + +struct StableDistinctKeepFirstLastNone : public cudf::test::BaseFixture {}; + +TEST_F(StableDistinctKeepAny, NoNullsTableWithNaNs) +{ + // Column(s) used to test KEEP_ANY needs to have same rows in contiguous + // groups for equivalent keys because KEEP_ANY is nondeterministic. + auto const col1 = int32s_col{6, 6, 6, 1, 1, 1, 3, 5, 8, 5}; + auto const col2 = floats_col{6, 6, 6, 1, 1, 1, 3, 4, 9, 4}; + auto const keys1 = int32s_col{20, 20, 20, 15, 15, 15, 20, 19, 21, 9}; + auto const keys2 = floats_col{19., 19., 19., NaN, NaN, NaN, 20., 20., 9., 21.}; + + auto const input = cudf::table_view{{col1, col2, keys1, keys2}}; + auto const key_idx = std::vector{2, 3}; + + // NaNs are unequal. + { + auto const exp_col1 = int32s_col{6, 1, 1, 1, 3, 5, 8, 5}; + auto const exp_col2 = floats_col{6, 1, 1, 1, 3, 4, 9, 4}; + auto const exp_keys1 = int32s_col{20, 15, 15, 15, 20, 19, 21, 9}; + auto const exp_keys2 = floats_col{19., NaN, NaN, NaN, 20., 20., 9., 21.}; + auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // NaNs are equal. + { + auto const exp_col1 = int32s_col{6, 1, 3, 5, 8, 5}; + auto const exp_col2 = floats_col{6, 1, 3, 4, 9, 4}; + auto const exp_keys1 = int32s_col{20, 15, 20, 19, 21, 9}; + auto const exp_keys2 = floats_col{19., NaN, 20., 20., 9., 21.}; + auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} + +TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs) +{ + auto constexpr null{0.0}; // shadow the global `null` variable of type int + + // Column(s) used to test KEEP_ANY needs to have same rows in contiguous + // groups for equivalent keys because KEEP_ANY is nondeterministic. + auto const col = int32s_col{5, 4, 4, 1, 1, 1, 8, 8, 1}; + auto const keys = floats_col{{20., null, null, NaN, NaN, NaN, 19., 19., 21.}, nulls_at({1, 2})}; + auto const input = cudf::table_view{{col, keys}}; + auto const key_idx = std::vector{1}; + + // Nulls are equal, NaNs are unequal. + { + auto const exp_col = int32s_col{5, 4, 1, 1, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, NaN, NaN, NaN, 19., 21.}, null_at(1)}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // Nulls are equal, NaNs are equal. + { + auto const exp_col = int32s_col{5, 4, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, NaN, 19., 21.}, null_at(1)}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // Nulls are unequal, NaNs are unequal. + { + auto const exp_col = int32s_col{5, 4, 4, 1, 1, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, null, NaN, NaN, NaN, 19., 21.}, nulls_at({1, 2})}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // Nulls are unequal, NaNs are equal. + { + auto const exp_col = int32s_col{5, 4, 4, 1, 8, 1}; + auto const exp_keys = floats_col{{20., null, null, NaN, 19., 21.}, nulls_at({1, 2})}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} + +TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual) +{ + // Column(s) used to test needs to have different rows for the same keys. + auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6}; + auto const keys = floats_col{20., NaN, NaN, 19., 21., 19., 22.}; + auto const input = cudf::table_view{{col, keys}}; + auto const key_idx = std::vector{1}; + + // KEEP_FIRST + { + auto const exp_col = int32s_col{0, 1, 3, 4, 6}; + auto const exp_keys = floats_col{20., NaN, 19., 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_FIRST, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_LAST + { + auto const exp_col = int32s_col{0, 2, 4, 5, 6}; + auto const exp_keys = floats_col{20., NaN, 21., 19., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_LAST, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_NONE + { + auto const exp_col = int32s_col{0, 4, 6}; + auto const exp_keys = floats_col{20., 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_NONE, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} + +TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsUnequal) +{ + // Column(s) used to test needs to have different rows for the same keys. + auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6, 7}; + auto const keys = floats_col{20., NaN, NaN, 19., 21., 19., 22., 20.}; + auto const input = cudf::table_view{{col, keys}}; + auto const key_idx = std::vector{1}; + + // KEEP_FIRST + { + auto const exp_col = int32s_col{0, 1, 2, 3, 4, 6}; + auto const exp_keys = floats_col{20., NaN, NaN, 19., 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_FIRST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_LAST + { + auto const exp_col = int32s_col{1, 2, 4, 5, 6, 7}; + auto const exp_keys = floats_col{NaN, NaN, 21., 19., 22., 20.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_LAST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + // KEEP_NONE + { + auto const exp_col = int32s_col{1, 2, 4, 6}; + auto const exp_keys = floats_col{NaN, NaN, 21., 22.}; + auto const expected = cudf::table_view{{exp_col, exp_keys}}; + + auto const result = cudf::stable_distinct( + input, key_idx, KEEP_NONE, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +}