From 7c55a9ad277b683c6d19fa5766d7f2d367e82bbe Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 20 May 2022 15:42:08 -0400 Subject: [PATCH 1/7] Pass custom equal and stream to map.contains --- cpp/src/stream_compaction/distinct.cu | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index d698c547a61..03bd20f5e61 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -82,7 +82,12 @@ std::unique_ptr distinct(table_view const& input, auto counting_iter = thrust::make_counting_iterator(0); rmm::device_uvector index_exists_in_map(num_rows, stream, mr); // enumerate all indices to check if they are present in the map. - key_map.contains(counting_iter, counting_iter + num_rows, index_exists_in_map.begin(), hash_key); + key_map.contains(counting_iter, + counting_iter + num_rows, + index_exists_in_map.begin(), + hash_key, + key_equal, + stream.value()); auto const output_size{key_map.get_size()}; From 86858e7f9d2868cf09efea6ab5e7bd3b3488858d Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 20 May 2022 16:06:15 -0400 Subject: [PATCH 2/7] Correct misuse of row equal --- cpp/src/stream_compaction/distinct.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 03bd20f5e61..1faa2720d66 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -86,7 +86,7 @@ std::unique_ptr
distinct(table_view const& input, counting_iter + num_rows, index_exists_in_map.begin(), hash_key, - key_equal, + thrust::equal_to{}, stream.value()); auto const output_size{key_map.get_size()}; From 7e2e7e558f3fce8f0b340bbe6a0f23a2fb55bfcc Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Sun, 22 May 2022 12:35:26 -0400 Subject: [PATCH 3/7] Update cuco git tag --- cpp/cmake/thirdparty/get_cucollections.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake index 332b0d9dc96..d65da97f632 100644 --- a/cpp/cmake/thirdparty/get_cucollections.cmake +++ b/cpp/cmake/thirdparty/get_cucollections.cmake @@ -22,7 +22,7 @@ function(find_and_configure_cucollections) GLOBAL_TARGETS cuco::cuco BUILD_EXPORT_SET cudf-exports CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections - GIT_TAG 8b15f06f38d034e815bc72045ca3403787f75e07 + GIT_TAG ebaba1ae378a5272116414b6d7ae5847e5cf5715 EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS} OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF" ) From ea0c61592b2e5e13e0454112e3c4b74483987418 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Sun, 22 May 2022 12:47:15 -0400 Subject: [PATCH 4/7] Cleanups: use retrieve_all instead of contains --- cpp/src/stream_compaction/distinct.cu | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 1faa2720d66..22fe7244720 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -79,6 +80,7 @@ std::unique_ptr
distinct(table_view const& input, // insert distinct indices into the map. key_map.insert(iter, iter + num_rows, hash_key, key_equal, stream.value()); + /* auto counting_iter = thrust::make_counting_iterator(0); rmm::device_uvector index_exists_in_map(num_rows, stream, mr); // enumerate all indices to check if they are present in the map. @@ -88,6 +90,7 @@ std::unique_ptr
distinct(table_view const& input, hash_key, thrust::equal_to{}, stream.value()); + */ auto const output_size{key_map.get_size()}; @@ -95,12 +98,16 @@ std::unique_ptr
distinct(table_view const& input, auto distinct_indices = cudf::make_numeric_column( data_type{type_id::INT32}, output_size, mask_state::UNALLOCATED, stream, mr); auto mutable_view = mutable_column_device_view::create(*distinct_indices, stream); + key_map.retrieve_all( + mutable_view->begin(), thrust::make_discard_iterator(), stream.value()); + /* thrust::copy_if(rmm::exec_policy(stream), counting_iter, counting_iter + num_rows, index_exists_in_map.begin(), mutable_view->begin(), thrust::identity{}); + */ // run gather operation to establish new order return detail::gather(input, From 348bc84dcce09859abfc62368a51dc63e1282dc2 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Sun, 22 May 2022 14:34:09 -0400 Subject: [PATCH 5/7] Fix a bug in distinct test: sort before compare --- cpp/tests/stream_compaction/distinct_tests.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cpp/tests/stream_compaction/distinct_tests.cpp b/cpp/tests/stream_compaction/distinct_tests.cpp index 1c3e07dad2d..4934fcd696a 100644 --- a/cpp/tests/stream_compaction/distinct_tests.cpp +++ b/cpp/tests/stream_compaction/distinct_tests.cpp @@ -420,13 +420,15 @@ TEST_F(Distinct, SlicedStructsOfLists) using lists_col = cudf::test::lists_column_wrapper; using structs_col = cudf::test::structs_column_wrapper; + auto const idx = + cudf::test::fixed_width_column_wrapper{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; auto const structs = [] { auto child = lists_col{ {0, 0}, {0, 0}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}, {5, 5}}; return structs_col{{child}}; }(); - auto const input_original = cudf::table_view({structs}); + auto const input_original = cudf::table_view({idx, structs}); auto const input = cudf::slice(input_original, {2, 12})[0]; auto const expected_structs = [] { @@ -435,8 +437,9 @@ TEST_F(Distinct, SlicedStructsOfLists) }(); auto const expected = cudf::table_view({expected_structs}); - auto const result = cudf::distinct(input, {0}); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + auto const result = cudf::distinct(input, {1}); + auto const sorted_result = cudf::sort_by_key(*result, result->select({0})); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, cudf::table_view{{sorted_result->get_column(1)}}); } TEST_F(Distinct, StructWithNullElement) From c6366cb99cf5ec32b72922fc2c4a4b2e5fbc1c3e Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Sun, 22 May 2022 14:34:23 -0400 Subject: [PATCH 6/7] Cleanups --- cpp/src/stream_compaction/distinct.cu | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 22fe7244720..55de9eebf0c 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -80,34 +80,13 @@ std::unique_ptr
distinct(table_view const& input, // insert distinct indices into the map. key_map.insert(iter, iter + num_rows, hash_key, key_equal, stream.value()); - /* - auto counting_iter = thrust::make_counting_iterator(0); - rmm::device_uvector index_exists_in_map(num_rows, stream, mr); - // enumerate all indices to check if they are present in the map. - key_map.contains(counting_iter, - counting_iter + num_rows, - index_exists_in_map.begin(), - hash_key, - thrust::equal_to{}, - stream.value()); - */ - auto const output_size{key_map.get_size()}; - - // write distinct indices to a numeric column auto distinct_indices = cudf::make_numeric_column( data_type{type_id::INT32}, output_size, mask_state::UNALLOCATED, stream, mr); auto mutable_view = mutable_column_device_view::create(*distinct_indices, stream); + // write distinct indices to a numeric column key_map.retrieve_all( mutable_view->begin(), thrust::make_discard_iterator(), stream.value()); - /* - thrust::copy_if(rmm::exec_policy(stream), - counting_iter, - counting_iter + num_rows, - index_exists_in_map.begin(), - mutable_view->begin(), - thrust::identity{}); - */ // run gather operation to establish new order return detail::gather(input, From ef695ee3a116fb8cbc19791f9d8a26927e454328 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 23 May 2022 22:19:04 -0400 Subject: [PATCH 7/7] Get rid of unnecessary view creation --- cpp/src/stream_compaction/distinct.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 55de9eebf0c..d3b31dccf77 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -83,10 +83,10 @@ std::unique_ptr
distinct(table_view const& input, auto const output_size{key_map.get_size()}; auto distinct_indices = cudf::make_numeric_column( data_type{type_id::INT32}, output_size, mask_state::UNALLOCATED, stream, mr); - auto mutable_view = mutable_column_device_view::create(*distinct_indices, stream); // write distinct indices to a numeric column - key_map.retrieve_all( - mutable_view->begin(), thrust::make_discard_iterator(), stream.value()); + key_map.retrieve_all(distinct_indices->mutable_view().begin(), + thrust::make_discard_iterator(), + stream.value()); // run gather operation to establish new order return detail::gather(input,