diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp
index e2974789ea1..e3ef4190fd2 100644
--- a/cpp/include/cudf/detail/stream_compaction.hpp
+++ b/cpp/include/cudf/detail/stream_compaction.hpp
@@ -88,8 +88,6 @@ std::unique_ptr
distinct(table_view const& input,
/**
* @copydoc cudf::stable_distinct
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr stable_distinct(table_view const& input,
std::vector const& keys,
diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp
index c386b3a22b4..181af11adb8 100644
--- a/cpp/include/cudf/stream_compaction.hpp
+++ b/cpp/include/cudf/stream_compaction.hpp
@@ -320,6 +320,7 @@ std::unique_ptr distinct_indices(
* @param keep Copy any, first, last, or none of the found duplicates
* @param nulls_equal Flag to specify whether null elements should be considered as equal
* @param nans_equal Flag to specify whether NaN elements should be considered as equal
+ * @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned table
* @return Table with distinct rows, preserving input order
*/
@@ -329,6 +330,7 @@ std::unique_ptr stable_distinct(
duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY,
null_equality nulls_equal = null_equality::EQUAL,
nan_equality nans_equal = nan_equality::ALL_EQUAL,
+ rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
/**
diff --git a/cpp/src/stream_compaction/stable_distinct.cu b/cpp/src/stream_compaction/stable_distinct.cu
index 27b5a92ab69..074d4fd7d1a 100644
--- a/cpp/src/stream_compaction/stable_distinct.cu
+++ b/cpp/src/stream_compaction/stable_distinct.cu
@@ -79,11 +79,11 @@ std::unique_ptr stable_distinct(table_view const& input,
duplicate_keep_option keep,
null_equality nulls_equal,
nan_equality nans_equal,
+ rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
- return detail::stable_distinct(
- input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
+ return detail::stable_distinct(input, keys, keep, nulls_equal, nans_equal, stream, mr);
}
} // namespace cudf
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 9f14455f42d..eef09954647 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -700,6 +700,7 @@ ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_ROLLING_TEST streams/rolling_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing)
+ConfigureTest(STREAM_STREAM_COMPACTION_TEST streams/stream_compaction_test.cpp STREAM_MODE testing)
ConfigureTest(
STREAM_STRINGS_TEST
streams/strings/case_test.cpp
diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp
new file mode 100644
index 00000000000..56443870602
--- /dev/null
+++ b/cpp/tests/streams/stream_compaction_test.cpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+auto constexpr null{0}; // null at current level
+auto constexpr XXX{0}; // null pushed down from parent level
+auto constexpr NaN = std::numeric_limits::quiet_NaN();
+auto constexpr KEEP_ANY = cudf::duplicate_keep_option::KEEP_ANY;
+auto constexpr KEEP_FIRST = cudf::duplicate_keep_option::KEEP_FIRST;
+auto constexpr KEEP_LAST = cudf::duplicate_keep_option::KEEP_LAST;
+auto constexpr KEEP_NONE = cudf::duplicate_keep_option::KEEP_NONE;
+auto constexpr NULL_EQUAL = cudf::null_equality::EQUAL;
+auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL;
+auto constexpr NAN_EQUAL = cudf::nan_equality::ALL_EQUAL;
+auto constexpr NAN_UNEQUAL = cudf::nan_equality::UNEQUAL;
+
+using int32s_col = cudf::test::fixed_width_column_wrapper;
+using floats_col = cudf::test::fixed_width_column_wrapper;
+
+using cudf::nan_policy;
+using cudf::null_equality;
+using cudf::null_policy;
+using cudf::test::iterators::no_nulls;
+using cudf::test::iterators::null_at;
+using cudf::test::iterators::nulls_at;
+
+struct StableDistinctKeepAny : public cudf::test::BaseFixture {};
+
+struct StableDistinctKeepFirstLastNone : public cudf::test::BaseFixture {};
+
+TEST_F(StableDistinctKeepAny, NoNullsTableWithNaNs)
+{
+ // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+ // groups for equivalent keys because KEEP_ANY is nondeterministic.
+ auto const col1 = int32s_col{6, 6, 6, 1, 1, 1, 3, 5, 8, 5};
+ auto const col2 = floats_col{6, 6, 6, 1, 1, 1, 3, 4, 9, 4};
+ auto const keys1 = int32s_col{20, 20, 20, 15, 15, 15, 20, 19, 21, 9};
+ auto const keys2 = floats_col{19., 19., 19., NaN, NaN, NaN, 20., 20., 9., 21.};
+
+ auto const input = cudf::table_view{{col1, col2, keys1, keys2}};
+ auto const key_idx = std::vector{2, 3};
+
+ // NaNs are unequal.
+ {
+ auto const exp_col1 = int32s_col{6, 1, 1, 1, 3, 5, 8, 5};
+ auto const exp_col2 = floats_col{6, 1, 1, 1, 3, 4, 9, 4};
+ auto const exp_keys1 = int32s_col{20, 15, 15, 15, 20, 19, 21, 9};
+ auto const exp_keys2 = floats_col{19., NaN, NaN, NaN, 20., 20., 9., 21.};
+ auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+
+ // NaNs are equal.
+ {
+ auto const exp_col1 = int32s_col{6, 1, 3, 5, 8, 5};
+ auto const exp_col2 = floats_col{6, 1, 3, 4, 9, 4};
+ auto const exp_keys1 = int32s_col{20, 15, 20, 19, 21, 9};
+ auto const exp_keys2 = floats_col{19., NaN, 20., 20., 9., 21.};
+ auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+}
+
+TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs)
+{
+ auto constexpr null{0.0}; // shadow the global `null` variable of type int
+
+ // Column(s) used to test KEEP_ANY needs to have same rows in contiguous
+ // groups for equivalent keys because KEEP_ANY is nondeterministic.
+ auto const col = int32s_col{5, 4, 4, 1, 1, 1, 8, 8, 1};
+ auto const keys = floats_col{{20., null, null, NaN, NaN, NaN, 19., 19., 21.}, nulls_at({1, 2})};
+ auto const input = cudf::table_view{{col, keys}};
+ auto const key_idx = std::vector{1};
+
+ // Nulls are equal, NaNs are unequal.
+ {
+ auto const exp_col = int32s_col{5, 4, 1, 1, 1, 8, 1};
+ auto const exp_keys = floats_col{{20., null, NaN, NaN, NaN, 19., 21.}, null_at(1)};
+ auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+
+ // Nulls are equal, NaNs are equal.
+ {
+ auto const exp_col = int32s_col{5, 4, 1, 8, 1};
+ auto const exp_keys = floats_col{{20., null, NaN, 19., 21.}, null_at(1)};
+ auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+
+ // Nulls are unequal, NaNs are unequal.
+ {
+ auto const exp_col = int32s_col{5, 4, 4, 1, 1, 1, 8, 1};
+ auto const exp_keys = floats_col{{20., null, null, NaN, NaN, NaN, 19., 21.}, nulls_at({1, 2})};
+ auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+
+ // Nulls are unequal, NaNs are equal.
+ {
+ auto const exp_col = int32s_col{5, 4, 4, 1, 8, 1};
+ auto const exp_keys = floats_col{{20., null, null, NaN, 19., 21.}, nulls_at({1, 2})};
+ auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_ANY, NULL_UNEQUAL, NAN_EQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual)
+{
+ // Column(s) used to test needs to have different rows for the same keys.
+ auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6};
+ auto const keys = floats_col{20., NaN, NaN, 19., 21., 19., 22.};
+ auto const input = cudf::table_view{{col, keys}};
+ auto const key_idx = std::vector{1};
+
+ // KEEP_FIRST
+ {
+ auto const exp_col = int32s_col{0, 1, 3, 4, 6};
+ auto const exp_keys = floats_col{20., NaN, 19., 21., 22.};
+ auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_FIRST, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+
+ // KEEP_LAST
+ {
+ auto const exp_col = int32s_col{0, 2, 4, 5, 6};
+ auto const exp_keys = floats_col{20., NaN, 21., 19., 22.};
+ auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_LAST, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+
+ // KEEP_NONE
+ {
+ auto const exp_col = int32s_col{0, 4, 6};
+ auto const exp_keys = floats_col{20., 21., 22.};
+ auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_NONE, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+}
+
+TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsUnequal)
+{
+ // Column(s) used to test needs to have different rows for the same keys.
+ auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6, 7};
+ auto const keys = floats_col{20., NaN, NaN, 19., 21., 19., 22., 20.};
+ auto const input = cudf::table_view{{col, keys}};
+ auto const key_idx = std::vector{1};
+
+ // KEEP_FIRST
+ {
+ auto const exp_col = int32s_col{0, 1, 2, 3, 4, 6};
+ auto const exp_keys = floats_col{20., NaN, NaN, 19., 21., 22.};
+ auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_FIRST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+
+ // KEEP_LAST
+ {
+ auto const exp_col = int32s_col{1, 2, 4, 5, 6, 7};
+ auto const exp_keys = floats_col{NaN, NaN, 21., 19., 22., 20.};
+ auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_LAST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+
+ // KEEP_NONE
+ {
+ auto const exp_col = int32s_col{1, 2, 4, 6};
+ auto const exp_keys = floats_col{NaN, NaN, 21., 22.};
+ auto const expected = cudf::table_view{{exp_col, exp_keys}};
+
+ auto const result = cudf::stable_distinct(
+ input, key_idx, KEEP_NONE, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream());
+ CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
+ }
+}