From 4a8085ae5ccc89300fdd075894cdac6c2147bfc7 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 1 Feb 2023 17:58:44 -0800 Subject: [PATCH 01/14] building equality::self_comparator --- .../cudf/table/experimental/row_operators.cuh | 19 ++- cpp/src/groupby/hash/groupby.cu | 153 +++++++++++------- cpp/src/groupby/sort/common_utils.cuh | 2 + cpp/src/groupby/sort/group_nunique.cu | 58 ++++--- cpp/src/groupby/sort/group_rank_scan.cu | 79 ++++++--- cpp/src/groupby/sort/sort_helper.cu | 27 +++- cpp/src/reductions/scan/rank_scan.cu | 34 ++-- cpp/src/search/contains_table.cu | 69 +++++--- cpp/src/stream_compaction/distinct.cu | 13 +- cpp/src/stream_compaction/distinct_reduce.cu | 25 ++- cpp/src/stream_compaction/distinct_reduce.cuh | 2 + cpp/src/stream_compaction/unique.cu | 73 ++++++--- .../table/experimental_row_operator_tests.cu | 30 +++- 13 files changed, 403 insertions(+), 181 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 0dc0f4e5315..94182b0d758 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -1131,11 +1131,13 @@ struct nan_equal_physical_equality_comparator { * returns false, representing unequal rows. If the rows are compared without mismatched elements, * the rows are equal. * + * @tparam has_nested_columns compile-time optimization for primitive types * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values * rather than logical elements, defaults to a comparator for which `NaN == NaN`. */ -template class device_row_comparator { friend class self_comparator; ///< Allow self_comparator to access private members @@ -1246,14 +1248,14 @@ class device_row_comparator { template () and - not cudf::is_nested()), + (not has_nested_columns or not cudf::is_nested())), typename... Args> __device__ bool operator()(Args...) { CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); } - template ())> + template ())> __device__ bool operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept { @@ -1437,6 +1439,7 @@ class self_comparator { * * `F(i,j)` returns true if and only if row `i` compares equal to row `j`. * + * @tparam has_nested_columns compile-time optimization for primitive types * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a comparator for which `NaN == NaN`. @@ -1445,13 +1448,15 @@ class self_comparator { * @param comparator Physical element equality comparison functor. * @return A binary callable object */ - template auto equal_to(Nullate nullate = {}, null_equality nulls_are_equal = null_equality::EQUAL, PhysicalEqualityComparator comparator = {}) const noexcept { - return device_row_comparator{nullate, *d_t, *d_t, nulls_are_equal, comparator}; + return device_row_comparator{ + nullate, *d_t, *d_t, nulls_are_equal, comparator}; } private: @@ -1539,6 +1544,7 @@ class two_table_comparator { * Similarly, `F(rhs_index_type i, lhs_index_type j)` returns true if and only if row `i` of the * right table compares equal to row `j` of the left table. * + * @tparam has_nested_columns compile-time optimization for primitive types * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a `NaN == NaN` equality comparator. @@ -1554,7 +1560,8 @@ class two_table_comparator { PhysicalEqualityComparator comparator = {}) const noexcept { return strong_index_comparator_adapter{ - device_row_comparator(nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)}; + device_row_comparator( + nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)}; } private: diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 50173d6a987..1d4a47e2500 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -68,12 +68,14 @@ namespace { // TODO: replace it with `cuco::static_map` // https://github.com/rapidsai/cudf/issues/10401 +template using map_type = concurrent_unordered_map< cudf::size_type, cudf::size_type, cudf::experimental::row::hash::device_row_hasher, - cudf::experimental::row::equality::device_row_comparator>; + cudf::experimental::row::equality::device_row_comparator>; /** * @brief List of aggregation operations that can be computed with a hash-based @@ -189,13 +191,14 @@ class groupby_simple_aggregations_collector final } }; +template class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer { column_view col; data_type result_type; cudf::detail::result_cache* sparse_results; cudf::detail::result_cache* dense_results; device_span gather_map; - map_type const& map; + map_type const& map; bitmask_type const* __restrict__ row_bitmask; rmm::cuda_stream_view stream; rmm::mr::device_memory_resource* mr; @@ -207,7 +210,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, device_span gather_map, - map_type const& map, + map_type const& map, bitmask_type const* row_bitmask, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -336,7 +339,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final rmm::exec_policy(stream), thrust::make_counting_iterator(0), col.size(), - ::cudf::detail::var_hash_functor{ + ::cudf::detail::var_hash_functor>{ map, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof}); sparse_results->add_result(col, agg, std::move(var_result)); dense_results->add_result(col, agg, to_dense_agg_result(agg)); @@ -394,12 +397,13 @@ flatten_single_pass_aggs(host_span requests) * * @see groupby_null_templated() */ +template void sparse_to_dense_results(table_view const& keys, host_span requests, cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, device_span gather_map, - map_type const& map, + map_type const& map, bool keys_have_nulls, null_policy include_null_keys, rmm::cuda_stream_view stream, @@ -461,10 +465,11 @@ auto create_sparse_results_table(table_view const& flattened_values, * @brief Computes all aggregations from `requests` that require a single pass * over the data and stores the results in `sparse_results` */ +template void compute_single_pass_aggs(table_view const& keys, host_span requests, cudf::detail::result_cache* sparse_results, - map_type& map, + map_type& map, bool keys_have_nulls, null_policy include_null_keys, rmm::cuda_stream_view stream) @@ -484,16 +489,16 @@ void compute_single_pass_aggs(table_view const& keys, auto row_bitmask = skip_key_rows_with_nulls ? cudf::detail::bitmask_and(keys, stream).first : rmm::device_buffer{}; - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - keys.num_rows(), - hash::compute_single_pass_aggs_fn{map, - *d_values, - *d_sparse_table, - d_aggs.data(), - static_cast(row_bitmask.data()), - skip_key_rows_with_nulls}); + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + keys.num_rows(), + hash::compute_single_pass_aggs_fn>{ + map, + *d_values, + *d_sparse_table, + d_aggs.data(), + static_cast(row_bitmask.data()), + skip_key_rows_with_nulls}); // Add results back to sparse_results cache auto sparse_result_cols = sparse_table.release(); for (size_t i = 0; i < aggs.size(); i++) { @@ -507,7 +512,8 @@ void compute_single_pass_aggs(table_view const& keys, * @brief Computes and returns a device vector containing all populated keys in * `map`. */ -rmm::device_uvector extract_populated_keys(map_type const& map, +template +rmm::device_uvector extract_populated_keys(map_type const& map, size_type num_keys, rmm::cuda_stream_view stream) { @@ -566,52 +572,91 @@ std::unique_ptr groupby(table_view const& keys, auto preprocessed_keys = cudf::experimental::row::hash::preprocessed_table::create(keys, stream); auto const comparator = cudf::experimental::row::equality::self_comparator{preprocessed_keys}; auto const row_hash = cudf::experimental::row::hash::row_hasher{std::move(preprocessed_keys)}; - auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); auto const d_row_hash = row_hash.device_hasher(has_null); size_type constexpr unused_key{std::numeric_limits::max()}; size_type constexpr unused_value{std::numeric_limits::max()}; - using allocator_type = typename map_type::allocator_type; - - auto map = map_type::create(compute_hash_table_size(num_keys), - stream, - unused_key, - unused_value, - d_row_hash, - d_key_equal, - allocator_type()); - // Cache of sparse results where the location of aggregate value in each // column is indexed by the hash map cudf::detail::result_cache sparse_results(requests.size()); - // Compute all single pass aggs first - compute_single_pass_aggs( - keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream); - - // Extract the populated indices from the hash map and create a gather map. - // Gathering using this map from sparse results will give dense results. - auto gather_map = extract_populated_keys(*map, keys.num_rows(), stream); - - // Compact all results from sparse_results and insert into cache - sparse_to_dense_results(keys, - requests, - &sparse_results, - cache, - gather_map, - *map, - keys_have_nulls, - include_null_keys, - stream, - mr); - - return cudf::detail::gather(keys, - gather_map, - out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); + if (cudf::detail::has_nested_columns(keys)) { + using allocator_type = typename map_type::allocator_type; + + auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); + auto const map = map_type::create(compute_hash_table_size(num_keys), + stream, + unused_key, + unused_value, + d_row_hash, + d_key_equal, + allocator_type()); + // Compute all single pass aggs first + compute_single_pass_aggs( + keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream); + + // Extract the populated indices from the hash map and create a gather map. + // Gathering using this map from sparse results will give dense results. + auto gather_map = extract_populated_keys(*map, keys.num_rows(), stream); + + // Compact all results from sparse_results and insert into cache + sparse_to_dense_results(keys, + requests, + &sparse_results, + cache, + gather_map, + *map, + keys_have_nulls, + include_null_keys, + stream, + mr); + + return cudf::detail::gather(keys, + gather_map, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + } else { + using allocator_type = typename map_type::allocator_type; + + auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); + auto const map = map_type::create(compute_hash_table_size(num_keys), + stream, + unused_key, + unused_value, + d_row_hash, + d_key_equal, + allocator_type()); + + // Compute all single pass aggs first + compute_single_pass_aggs( + keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream); + + // Extract the populated indices from the hash map and create a gather map. + // Gathering using this map from sparse results will give dense results. + auto gather_map = extract_populated_keys(*map, keys.num_rows(), stream); + + // Compact all results from sparse_results and insert into cache + sparse_to_dense_results(keys, + requests, + &sparse_results, + cache, + gather_map, + *map, + keys_have_nulls, + include_null_keys, + stream, + mr); + + return cudf::detail::gather(keys, + gather_map, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + } } } // namespace diff --git a/cpp/src/groupby/sort/common_utils.cuh b/cpp/src/groupby/sort/common_utils.cuh index fe5d7c325ca..d0cf82a24eb 100644 --- a/cpp/src/groupby/sort/common_utils.cuh +++ b/cpp/src/groupby/sort/common_utils.cuh @@ -39,6 +39,8 @@ struct permuted_row_equality_comparator { { } + permuted_row_equality_comparator() = default; + /** * @brief Returns true if the two rows at the specified indices in the permuted * order are equivalent. diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index c411e654913..ba4ce10274e 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -33,10 +33,10 @@ namespace groupby { namespace detail { namespace { -template +template struct is_unique_iterator_fn { using comparator_type = - typename cudf::experimental::row::equality::device_row_comparator; + typename cudf::experimental::row::equality::device_row_comparator; Nullate nulls; column_device_view const v; @@ -91,24 +91,46 @@ std::unique_ptr group_nunique(column_view const& values, auto const values_view = table_view{{values}}; auto const comparator = cudf::experimental::row::equality::self_comparator{values_view, stream}; - auto const d_equal = comparator.equal_to( - cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); auto const d_values_view = column_device_view::create(values, stream); - auto const is_unique_iterator = - thrust::make_transform_iterator(thrust::counting_iterator(0), - is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, - *d_values_view, - d_equal, - null_handling, - group_offsets.data(), - group_labels.data()}); - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - is_unique_iterator, - thrust::make_discard_iterator(), - result->mutable_view().begin()); + + if (cudf::detail::has_nested_columns(values_view)) { + auto const d_equal = comparator.equal_to( + cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); + + auto const is_unique_iterator = + thrust::make_transform_iterator(thrust::counting_iterator(0), + is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, + *d_values_view, + d_equal, + null_handling, + group_offsets.data(), + group_labels.data()}); + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + is_unique_iterator, + thrust::make_discard_iterator(), + result->mutable_view().begin()); + } else { + auto const d_equal = comparator.equal_to( + cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); + + auto const is_unique_iterator = + thrust::make_transform_iterator(thrust::counting_iterator(0), + is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, + *d_values_view, + d_equal, + null_handling, + group_offsets.data(), + group_labels.data()}); + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + is_unique_iterator, + thrust::make_discard_iterator(), + result->mutable_view().begin()); + } return result; } diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 149f026ffe6..4a452611a1e 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -71,36 +71,67 @@ std::unique_ptr rank_generator(column_view const& grouped_values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + auto const grouped_values_view = table_view{{grouped_values}}; auto const comparator = - cudf::experimental::row::equality::self_comparator{table_view{{grouped_values}}, stream}; - auto const d_equal = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); - auto const permuted_equal = - permuted_row_equality_comparator(d_equal, value_order.begin()); + cudf::experimental::row::equality::self_comparator{grouped_values_view, stream}; auto ranks = make_fixed_width_column( data_type{type_to_id()}, grouped_values.size(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - auto unique_identifier = [labels = group_labels.begin(), - offsets = group_offsets.begin(), - permuted_equal, - resolver] __device__(size_type row_index) { - auto const group_start = offsets[labels[row_index]]; - if constexpr (forward) { - // First value of equal values is 1. - return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), - row_index - group_start); - } else { - auto const group_end = offsets[labels[row_index] + 1]; - // Last value of equal values is 1. - return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), - row_index - group_start); - } - }; - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - unique_identifier); + if (cudf::detail::has_nested_columns(grouped_values_view)) { + auto const d_equal = + comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); + auto const permuted_equal = + permuted_row_equality_comparator(d_equal, value_order.begin()); + + auto unique_identifier = [labels = group_labels.begin(), + offsets = group_offsets.begin(), + permuted_equal, + resolver] __device__(size_type row_index) { + auto const group_start = offsets[labels[row_index]]; + if constexpr (forward) { + // First value of equal values is 1. + return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), + row_index - group_start); + } else { + auto const group_end = offsets[labels[row_index] + 1]; + // Last value of equal values is 1. + return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), + row_index - group_start); + } + }; + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + unique_identifier); + } else { + auto const d_equal = + comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); + auto const permuted_equal = + permuted_row_equality_comparator(d_equal, value_order.begin()); + + auto unique_identifier = [labels = group_labels.begin(), + offsets = group_offsets.begin(), + permuted_equal, + resolver] __device__(size_type row_index) { + auto const group_start = offsets[labels[row_index]]; + if constexpr (forward) { + // First value of equal values is 1. + return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), + row_index - group_start); + } else { + auto const group_end = offsets[labels[row_index] + 1]; + // Last value of equal values is 1. + return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), + row_index - group_start); + } + }; + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + unique_identifier); + } auto [group_labels_begin, mutable_rank_begin] = [&]() { if constexpr (forward) { diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index 3be090159a7..802c5c72edd 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -149,17 +149,28 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_offsets( _group_offsets = std::make_unique(num_keys(stream) + 1, stream); - auto const comparator = cudf::experimental::row::equality::self_comparator{_keys, stream}; - auto const d_key_equal = comparator.equal_to( - cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL); + auto const comparator = cudf::experimental::row::equality::self_comparator{_keys, stream}; + auto const sorted_order = key_sort_order(stream).data(); decltype(_group_offsets->begin()) result_end; - result_end = thrust::unique_copy(rmm::exec_policy(stream), - thrust::counting_iterator(0), - thrust::counting_iterator(num_keys(stream)), - _group_offsets->begin(), - permuted_row_equality_comparator(d_key_equal, sorted_order)); + if (cudf::detail::has_nested_columns(_keys)) { + auto const d_key_equal = comparator.equal_to( + cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL); + result_end = thrust::unique_copy(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(num_keys(stream)), + _group_offsets->begin(), + permuted_row_equality_comparator(d_key_equal, sorted_order)); + } else { + auto const d_key_equal = comparator.equal_to( + cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL); + result_end = thrust::unique_copy(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(num_keys(stream)), + _group_offsets->begin(), + permuted_row_equality_comparator(d_key_equal, sorted_order)); + } size_type num_groups = thrust::distance(_group_offsets->begin(), result_end); _group_offsets->set_element(num_groups, num_keys(stream), stream); diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index c6909bfd601..b696c7e737a 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -51,20 +51,34 @@ std::unique_ptr rank_generator(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto comp = cudf::experimental::row::equality::self_comparator(table_view{{order_by}}, stream); - auto const device_comparator = - comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); + auto const order_by_view = table_view{{order_by}}; + auto comp = cudf::experimental::row::equality::self_comparator(order_by_view, stream); + auto ranks = make_fixed_width_column( data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator = device_comparator, resolver] __device__(size_type row_index) { - return resolver(row_index == 0 || !comparator(row_index, row_index - 1), - row_index); - }); + if (cudf::detail::has_nested_columns(order_by_view)) { + auto const device_comparator = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [comparator = device_comparator, resolver] __device__(size_type row_index) { + return resolver(row_index == 0 || !comparator(row_index, row_index - 1), + row_index); + }); + } else { + auto const device_comparator = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); + thrust::tabulate(rmm::exec_policy(stream), + mutable_ranks.begin(), + mutable_ranks.end(), + [comparator = device_comparator, resolver] __device__(size_type row_index) { + return resolver(row_index == 0 || !comparator(row_index, row_index - 1), + row_index); + }); + } thrust::inclusive_scan(rmm::exec_policy(stream), mutable_ranks.begin(), diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 639dc503ce4..f36470277f5 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -205,29 +205,56 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; // Insert only rows that do not have any null at any level. - auto const insert_map = [&](auto const value_comp) { - auto const d_eqcomp = strong_index_comparator_adapter{ - comparator.equal_to(nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert_if(haystack_it, - haystack_it + haystack.num_rows(), - thrust::counting_iterator(0), // stencil - row_is_valid{row_bitmask_ptr}, - d_hasher, - d_eqcomp, - stream.value()); - }; - - dispatch_nan_comparator(compare_nans, insert_map); + if (cudf::detail::has_nested_columns(haystack)) { + auto const insert_map = [&](auto const value_comp) { + auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; + map.insert_if(haystack_it, + haystack_it + haystack.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask_ptr}, + d_hasher, + d_eqcomp, + stream.value()); + }; + + dispatch_nan_comparator(compare_nans, insert_map); + } else { + auto const insert_map = [&](auto const value_comp) { + auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; + map.insert_if(haystack_it, + haystack_it + haystack.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask_ptr}, + d_hasher, + d_eqcomp, + stream.value()); + }; + + dispatch_nan_comparator(compare_nans, insert_map); + } } else { // haystack_doesn't_have_nulls || compare_nulls == null_equality::EQUAL - auto const insert_map = [&](auto const value_comp) { - auto const d_eqcomp = strong_index_comparator_adapter{ - comparator.equal_to(nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; - map.insert( - haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - }; - - dispatch_nan_comparator(compare_nans, insert_map); + if (cudf::detail::has_nested_columns(haystack)) { + auto const insert_map = [&](auto const value_comp) { + auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; + map.insert( + haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); + }; + + dispatch_nan_comparator(compare_nans, insert_map); + } else { + auto const insert_map = [&](auto const value_comp) { + auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( + nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; + map.insert( + haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); + }; + + dispatch_nan_comparator(compare_nans, insert_map); + } } } diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 8f462f58e4e..e15d54b4251 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -55,7 +55,8 @@ rmm::device_uvector get_distinct_indices(table_view const& input, auto const preprocessed_input = cudf::experimental::row::hash::preprocessed_table::create(input, stream); - auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; + auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; + auto const has_nested_columns = cudf::detail::has_nested_columns(input); auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); @@ -66,8 +67,13 @@ rmm::device_uvector get_distinct_indices(table_view const& input, size_type{0}, [] __device__(size_type const i) { return cuco::make_pair(i, i); }); auto const insert_keys = [&](auto const value_comp) { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + if (has_nested_columns) { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + } else { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + } }; if (nans_equal == nan_equality::ALL_EQUAL) { @@ -92,6 +98,7 @@ rmm::device_uvector get_distinct_indices(table_view const& input, std::move(preprocessed_input), input.num_rows(), has_nulls, + has_nested_columns, keep, nulls_equal, nans_equal, diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index 468561273b3..d7c1e04c633 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -93,6 +93,7 @@ rmm::device_uvector hash_reduce_by_row( std::shared_ptr const preprocessed_input, size_type num_rows, cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, @@ -115,13 +116,23 @@ rmm::device_uvector hash_reduce_by_row( auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); auto const reduce_by_row = [&](auto const value_comp) { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - reduce_by_row_fn{ - map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); + if (has_nested_columns) { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + reduce_by_row_fn{ + map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); + } else { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + reduce_by_row_fn{ + map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); + } }; if (nans_equal == nan_equality::ALL_EQUAL) { diff --git a/cpp/src/stream_compaction/distinct_reduce.cuh b/cpp/src/stream_compaction/distinct_reduce.cuh index c8a0c2869c8..878f7adb58f 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cuh +++ b/cpp/src/stream_compaction/distinct_reduce.cuh @@ -65,6 +65,7 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) * comparisons * @param num_rows The number of all input rows * @param has_nulls Indicate whether the input rows has any nulls at any nested levels + * @param has_nested_columns Indicates whether the input table has any nested columns * @param keep The parameter to determine what type of reduction to perform * @param nulls_equal Flag to specify whether null elements should be considered as equal * @param stream CUDA stream used for device memory operations and kernel launches @@ -76,6 +77,7 @@ rmm::device_uvector hash_reduce_by_row( std::shared_ptr const preprocessed_input, size_type num_rows, cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index 369b63995e3..f9df4d6a2fa 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -65,28 +65,57 @@ std::unique_ptr
unique(table_view const& input, auto mutable_view = mutable_column_device_view::create(*unique_indices, stream); auto keys_view = input.select(keys); - auto comp = cudf::experimental::row::equality::self_comparator(keys_view, stream); - auto row_equal = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - - // get indices of unique rows - auto result_end = unique_copy(thrust::counting_iterator(0), - thrust::counting_iterator(num_rows), - mutable_view->begin(), - row_equal, - keep, - stream); - auto indices_view = - cudf::detail::slice(column_view(*unique_indices), - 0, - thrust::distance(mutable_view->begin(), result_end)); - - // gather unique rows and return - return detail::gather(input, - indices_view, - out_of_bounds_policy::DONT_CHECK, - detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); + auto comp = cudf::experimental::row::equality::self_comparator(keys_view, stream); + + if (cudf::detail::has_nested_columns(keys_view)) { + auto row_equal = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); + + // get indices of unique rows + auto result_end = unique_copy(thrust::counting_iterator(0), + thrust::counting_iterator(num_rows), + mutable_view->begin(), + row_equal, + keep, + stream); + + auto indices_view = + cudf::detail::slice(column_view(*unique_indices), + 0, + thrust::distance(mutable_view->begin(), result_end)); + + // gather unique rows and return + return detail::gather(input, + indices_view, + out_of_bounds_policy::DONT_CHECK, + detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + } else { + auto row_equal = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); + + // get indices of unique rows + auto result_end = unique_copy(thrust::counting_iterator(0), + thrust::counting_iterator(num_rows), + mutable_view->begin(), + row_equal, + keep, + stream); + + auto indices_view = + cudf::detail::slice(column_view(*unique_indices), + 0, + thrust::distance(mutable_view->begin(), result_end)); + + // gather unique rows and return + return detail::gather(input, + indices_view, + out_of_bounds_policy::DONT_CHECK, + detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + } } } // namespace detail diff --git a/cpp/tests/table/experimental_row_operator_tests.cu b/cpp/tests/table/experimental_row_operator_tests.cu index ae55275aaec..d1980412ad4 100644 --- a/cpp/tests/table/experimental_row_operator_tests.cu +++ b/cpp/tests/table/experimental_row_operator_tests.cu @@ -115,18 +115,32 @@ auto self_equality(cudf::table_view input, rmm::cuda_stream_view stream{cudf::get_default_stream()}; auto const table_comparator = cudf::experimental::row::equality::self_comparator{input, stream}; - auto const equal_comparator = - table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); auto output = cudf::make_numeric_column( cudf::data_type(cudf::type_id::BOOL8), input.num_rows(), cudf::mask_state::UNALLOCATED); - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.num_rows()), - thrust::make_counting_iterator(0), - output->mutable_view().data(), - equal_comparator); + if (cudf::detail::has_nested_columns(input)) { + auto const equal_comparator = + table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); + + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + thrust::make_counting_iterator(0), + output->mutable_view().data(), + equal_comparator); + } else { + auto const equal_comparator = + table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); + + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.num_rows()), + thrust::make_counting_iterator(0), + output->mutable_view().data(), + equal_comparator); + } + return output; } From f71d161f79fb67caa2a538a038e912d9fc86204d Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Feb 2023 09:38:27 -0800 Subject: [PATCH 02/14] two table comp --- .../cudf/table/experimental/row_operators.cuh | 5 +- .../binaryop/compiled/struct_binary_ops.cuh | 59 +++++++++++++------ cpp/src/groupby/sort/common_utils.cuh | 2 - cpp/src/groupby/sort/group_nunique.cu | 2 +- cpp/src/groupby/sort/group_rank_scan.cu | 2 +- cpp/src/lists/contains.cu | 58 ++++++++++++------ cpp/src/search/contains_scalar.cu | 39 ++++++++---- cpp/src/search/contains_table.cu | 40 +++++++++---- cpp/src/stream_compaction/distinct_reduce.cu | 2 +- cpp/src/stream_compaction/distinct_reduce.cuh | 2 +- cpp/src/transform/one_hot_encode.cu | 43 ++++++++++---- .../table/experimental_row_operator_tests.cu | 32 +++++++--- 12 files changed, 196 insertions(+), 90 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 94182b0d758..f9805175948 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -1553,14 +1553,15 @@ class two_table_comparator { * @param comparator Physical element equality comparison functor. * @return A binary callable object */ - template auto equal_to(Nullate nullate = {}, null_equality nulls_are_equal = null_equality::EQUAL, PhysicalEqualityComparator comparator = {}) const noexcept { return strong_index_comparator_adapter{ - device_row_comparator( + device_row_comparator( nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)}; } diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index 2fcf1ce4e32..640e0a2652b 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -125,26 +125,49 @@ void apply_struct_equality_op(mutable_column_view& out, auto trhs = table_view{{rhs}}; auto table_comparator = cudf::experimental::row::equality::two_table_comparator{tlhs, trhs, stream}; - auto device_comparator = - table_comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}, - null_equality::EQUAL, - comparator); auto outd = column_device_view::create(out, stream); auto optional_iter = cudf::detail::make_optional_iterator(*outd, nullate::DYNAMIC{out.has_nulls()}); - thrust::tabulate(rmm::exec_policy(stream), - out.begin(), - out.end(), - [optional_iter, - is_lhs_scalar, - is_rhs_scalar, - preserve_output = (op != binary_operator::NOT_EQUAL), - device_comparator] __device__(size_type i) { - auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; - auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; - return optional_iter[i].has_value() and - (device_comparator(lhs, rhs) == preserve_output); - }); + + if (cudf::detail::has_nested_columns(tlhs) or cudf::detail::has_nested_columns(trhs)) { + auto device_comparator = table_comparator.equal_to( + nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}, + null_equality::EQUAL, + comparator); + + thrust::tabulate(rmm::exec_policy(stream), + out.begin(), + out.end(), + [optional_iter, + is_lhs_scalar, + is_rhs_scalar, + preserve_output = (op != binary_operator::NOT_EQUAL), + device_comparator] __device__(size_type i) { + auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; + auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; + return optional_iter[i].has_value() and + (device_comparator(lhs, rhs) == preserve_output); + }); + } else { + auto device_comparator = table_comparator.equal_to( + nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}, + null_equality::EQUAL, + comparator); + + thrust::tabulate(rmm::exec_policy(stream), + out.begin(), + out.end(), + [optional_iter, + is_lhs_scalar, + is_rhs_scalar, + preserve_output = (op != binary_operator::NOT_EQUAL), + device_comparator] __device__(size_type i) { + auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; + auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; + return optional_iter[i].has_value() and + (device_comparator(lhs, rhs) == preserve_output); + }); + } } } // namespace cudf::binops::compiled::detail diff --git a/cpp/src/groupby/sort/common_utils.cuh b/cpp/src/groupby/sort/common_utils.cuh index d0cf82a24eb..fe5d7c325ca 100644 --- a/cpp/src/groupby/sort/common_utils.cuh +++ b/cpp/src/groupby/sort/common_utils.cuh @@ -39,8 +39,6 @@ struct permuted_row_equality_comparator { { } - permuted_row_equality_comparator() = default; - /** * @brief Returns true if the two rows at the specified indices in the permuted * order are equivalent. diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index ba4ce10274e..69c2e6c8dfd 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 4a452611a1e..90f41038a77 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 0142e736fd0..85d9fea7ae5 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -267,24 +267,48 @@ void index_of_nested_types(InputIterator input_it, auto const has_nulls = has_nested_nulls(child_tview) || has_nested_nulls(keys_tview); auto const comparator = cudf::experimental::row::equality::two_table_comparator(child_tview, keys_tview, stream); - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - - auto const do_search = [=](auto const key_validity_iter) { - thrust::transform( - rmm::exec_policy(stream), - input_it, - input_it + num_rows, - output_it, - search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); - }; - if constexpr (search_key_is_scalar) { - auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); - do_search(key_validity_iter); + if (cudf::detail::has_nested_columns(child_tview) or + cudf::detail::has_nested_columns(keys_tview)) { + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + + auto const do_search = [=](auto const key_validity_iter) { + thrust::transform( + rmm::exec_policy(stream), + input_it, + input_it + num_rows, + output_it, + search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); + }; + + if constexpr (search_key_is_scalar) { + auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); + do_search(key_validity_iter); + } else { + auto const keys_dv_ptr = column_device_view::create(search_keys, stream); + auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); + do_search(key_validity_iter); + } } else { - auto const keys_dv_ptr = column_device_view::create(search_keys, stream); - auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); - do_search(key_validity_iter); + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + + auto const do_search = [=](auto const key_validity_iter) { + thrust::transform( + rmm::exec_policy(stream), + input_it, + input_it + num_rows, + output_it, + search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); + }; + + if constexpr (search_key_is_scalar) { + auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); + do_search(key_validity_iter); + } else { + auto const keys_dv_ptr = column_device_view::create(search_keys, stream); + auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); + do_search(key_validity_iter); + } } } diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index 8c500e1e757..acd0b6b069d 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -99,7 +99,6 @@ struct contains_scalar_dispatch { auto const comparator = cudf::experimental::row::equality::two_table_comparator(haystack_tv, needle_tv, stream); - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); auto const begin = cudf::experimental::row::lhs_iterator(0); auto const end = begin + haystack.size(); @@ -108,16 +107,32 @@ struct contains_scalar_dispatch { auto const check_nulls = haystack.has_nulls(); auto const haystack_cdv_ptr = column_device_view::create(haystack, stream); - return thrust::count_if( - rmm::exec_policy(stream), - begin, - end, - [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { - if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { - return false; - } - return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. - }) > 0; + if (cudf::detail::has_nested_columns(haystack_tv) or + cudf::detail::has_nested_columns(needle_tv)) { + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + return thrust::count_if( + rmm::exec_policy(stream), + begin, + end, + [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { + if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { + return false; + } + return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. + }) > 0; + } else { + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + return thrust::count_if( + rmm::exec_policy(stream), + begin, + end, + [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { + if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { + return false; + } + return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. + }) > 0; + } } }; diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index f36470277f5..5b2db3dbb83 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -272,19 +272,33 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack auto const comparator = cudf::experimental::row::equality::two_table_comparator(haystack, needles, stream); - - auto const check_contains = [&](auto const value_comp) { - auto const d_eqcomp = - comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); - map.contains(needles_it, - needles_it + needles.num_rows(), - contained.begin(), - d_hasher, - d_eqcomp, - stream.value()); - }; - - dispatch_nan_comparator(compare_nans, check_contains); + if (cudf::detail::has_nested_columns(haystack) or cudf::detail::has_nested_columns(needles)) { + auto const check_contains = [&](auto const value_comp) { + auto const d_eqcomp = + comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); + map.contains(needles_it, + needles_it + needles.num_rows(), + contained.begin(), + d_hasher, + d_eqcomp, + stream.value()); + }; + + dispatch_nan_comparator(compare_nans, check_contains); + } else { + auto const check_contains = [&](auto const value_comp) { + auto const d_eqcomp = + comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); + map.contains(needles_it, + needles_it + needles.num_rows(), + contained.begin(), + d_hasher, + d_eqcomp, + stream.value()); + }; + + dispatch_nan_comparator(compare_nans, check_contains); + } } return contained; diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index d7c1e04c633..020e6a495bc 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/stream_compaction/distinct_reduce.cuh b/cpp/src/stream_compaction/distinct_reduce.cuh index 878f7adb58f..e360d03280a 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cuh +++ b/cpp/src/stream_compaction/distinct_reduce.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 8f0a44585bf..50bbe216b5f 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -59,19 +59,36 @@ std::pair, table_view> one_hot_encode(column_view const& auto const t_rhs = table_view{{categories}}; auto const comparator = cudf::experimental::row::equality::two_table_comparator{t_lhs, t_rhs, stream}; - auto const d_equal = - comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); - - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(total_size), - all_encodings->mutable_view().begin(), - [input_size = input.size(), d_equal] __device__(size_type i) { - auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; - auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; - return d_equal(element_index, category_index); - }); + + if (cudf::detail::has_nested_columns(t_lhs) or cudf::detail::has_nested_columns(t_rhs)) { + auto const d_equal = comparator.equal_to( + nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); + + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(total_size), + all_encodings->mutable_view().begin(), + [input_size = input.size(), d_equal] __device__(size_type i) { + auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; + auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; + return d_equal(element_index, category_index); + }); + } else { + auto const d_equal = comparator.equal_to( + nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); + + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(total_size), + all_encodings->mutable_view().begin(), + [input_size = input.size(), d_equal] __device__(size_type i) { + auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; + auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; + return d_equal(element_index, category_index); + }); + } auto const split_iter = make_counting_transform_iterator(1, [width = input.size()](auto i) { return i * width; }); diff --git a/cpp/tests/table/experimental_row_operator_tests.cu b/cpp/tests/table/experimental_row_operator_tests.cu index d1980412ad4..1f3f7eefe79 100644 --- a/cpp/tests/table/experimental_row_operator_tests.cu +++ b/cpp/tests/table/experimental_row_operator_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -154,20 +154,34 @@ auto two_table_equality(cudf::table_view lhs, auto const table_comparator = cudf::experimental::row::equality::two_table_comparator{lhs, rhs, stream}; - auto const equal_comparator = - table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); + auto const lhs_it = cudf::experimental::row::lhs_iterator(0); auto const rhs_it = cudf::experimental::row::rhs_iterator(0); auto output = cudf::make_numeric_column( cudf::data_type(cudf::type_id::BOOL8), lhs.num_rows(), cudf::mask_state::UNALLOCATED); - thrust::transform(rmm::exec_policy(stream), - lhs_it, - lhs_it + lhs.num_rows(), - rhs_it, - output->mutable_view().data(), - equal_comparator); + if (cudf::detail::has_nested_columns(lhs) or cudf::detail::has_nested_columns(rhs)) { + auto const equal_comparator = + table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); + + thrust::transform(rmm::exec_policy(stream), + lhs_it, + lhs_it + lhs.num_rows(), + rhs_it, + output->mutable_view().data(), + equal_comparator); + } else { + auto const equal_comparator = + table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator); + + thrust::transform(rmm::exec_policy(stream), + lhs_it, + lhs_it + lhs.num_rows(), + rhs_it, + output->mutable_view().data(), + equal_comparator); + } return output; } From 3ca298c2fa5c0a55cb5fa64e8a6423f16c2cf5df Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Feb 2023 09:50:20 -0800 Subject: [PATCH 03/14] copyright years --- cpp/src/groupby/sort/sort_helper.cu | 2 +- cpp/src/reductions/scan/rank_scan.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index 802c5c72edd..b53955472b1 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index b696c7e737a..f7a763c5237 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 7c167a79386a1b7fac3530f48c713dd7ef2d58f2 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Feb 2023 13:32:37 -0800 Subject: [PATCH 04/14] centralizing repeated logic --- cpp/src/groupby/hash/groupby.cu | 98 ++++++++++--------------- cpp/src/groupby/sort/group_nunique.cu | 27 +++---- cpp/src/groupby/sort/group_rank_scan.cu | 85 ++++++++++----------- cpp/src/stream_compaction/unique.cu | 33 +++------ 4 files changed, 98 insertions(+), 145 deletions(-) diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 1d4a47e2500..07558cae387 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -68,14 +68,18 @@ namespace { // TODO: replace it with `cuco::static_map` // https://github.com/rapidsai/cudf/issues/10401 +template +using map_type = + concurrent_unordered_map, + ComparatorType>; + template -using map_type = concurrent_unordered_map< - cudf::size_type, - cudf::size_type, - cudf::experimental::row::hash::device_row_hasher, +using comparator_type = cudf::experimental::row::equality::device_row_comparator>; + cudf::nullate::DYNAMIC>; /** * @brief List of aggregation operations that can be computed with a hash-based @@ -191,14 +195,14 @@ class groupby_simple_aggregations_collector final } }; -template +template class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer { column_view col; data_type result_type; cudf::detail::result_cache* sparse_results; cudf::detail::result_cache* dense_results; device_span gather_map; - map_type const& map; + map_type const& map; bitmask_type const* __restrict__ row_bitmask; rmm::cuda_stream_view stream; rmm::mr::device_memory_resource* mr; @@ -210,7 +214,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, device_span gather_map, - map_type const& map, + map_type const& map, bitmask_type const* row_bitmask, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -339,7 +343,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final rmm::exec_policy(stream), thrust::make_counting_iterator(0), col.size(), - ::cudf::detail::var_hash_functor>{ + ::cudf::detail::var_hash_functor>{ map, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof}); sparse_results->add_result(col, agg, std::move(var_result)); dense_results->add_result(col, agg, to_dense_agg_result(agg)); @@ -397,13 +401,13 @@ flatten_single_pass_aggs(host_span requests) * * @see groupby_null_templated() */ -template +template void sparse_to_dense_results(table_view const& keys, host_span requests, cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, device_span gather_map, - map_type const& map, + map_type const& map, bool keys_have_nulls, null_policy include_null_keys, rmm::cuda_stream_view stream, @@ -465,11 +469,11 @@ auto create_sparse_results_table(table_view const& flattened_values, * @brief Computes all aggregations from `requests` that require a single pass * over the data and stores the results in `sparse_results` */ -template +template void compute_single_pass_aggs(table_view const& keys, host_span requests, cudf::detail::result_cache* sparse_results, - map_type& map, + map_type& map, bool keys_have_nulls, null_policy include_null_keys, rmm::cuda_stream_view stream) @@ -492,7 +496,7 @@ void compute_single_pass_aggs(table_view const& keys, thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), keys.num_rows(), - hash::compute_single_pass_aggs_fn>{ + hash::compute_single_pass_aggs_fn>{ map, *d_values, *d_sparse_table, @@ -512,8 +516,8 @@ void compute_single_pass_aggs(table_view const& keys, * @brief Computes and returns a device vector containing all populated keys in * `map`. */ -template -rmm::device_uvector extract_populated_keys(map_type const& map, +template +rmm::device_uvector extract_populated_keys(map_type const& map, size_type num_keys, rmm::cuda_stream_view stream) { @@ -581,17 +585,16 @@ std::unique_ptr
groupby(table_view const& keys, // column is indexed by the hash map cudf::detail::result_cache sparse_results(requests.size()); - if (cudf::detail::has_nested_columns(keys)) { - using allocator_type = typename map_type::allocator_type; + auto const comparator_helper = [&](auto const d_key_equal) { + using allocator_type = typename map_type::allocator_type; - auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - auto const map = map_type::create(compute_hash_table_size(num_keys), - stream, - unused_key, - unused_value, - d_row_hash, - d_key_equal, - allocator_type()); + auto const map = map_type::create(compute_hash_table_size(num_keys), + stream, + unused_key, + unused_value, + d_row_hash, + d_key_equal, + allocator_type()); // Compute all single pass aggs first compute_single_pass_aggs( keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream); @@ -618,44 +621,17 @@ std::unique_ptr
groupby(table_view const& keys, cudf::detail::negative_index_policy::NOT_ALLOWED, stream, mr); - } else { - using allocator_type = typename map_type::allocator_type; - - auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - auto const map = map_type::create(compute_hash_table_size(num_keys), - stream, - unused_key, - unused_value, - d_row_hash, - d_key_equal, - allocator_type()); + }; - // Compute all single pass aggs first - compute_single_pass_aggs( - keys, requests, &sparse_results, *map, keys_have_nulls, include_null_keys, stream); + if (cudf::detail::has_nested_columns(keys)) { + auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - // Extract the populated indices from the hash map and create a gather map. - // Gathering using this map from sparse results will give dense results. - auto gather_map = extract_populated_keys(*map, keys.num_rows(), stream); + return comparator_helper(d_key_equal); - // Compact all results from sparse_results and insert into cache - sparse_to_dense_results(keys, - requests, - &sparse_results, - cache, - gather_map, - *map, - keys_have_nulls, - include_null_keys, - stream, - mr); + } else { + auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - return cudf::detail::gather(keys, - gather_map, - out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); + return comparator_helper(d_key_equal); } } diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index 69c2e6c8dfd..c68bdb1fd43 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -94,10 +94,7 @@ std::unique_ptr group_nunique(column_view const& values, auto const d_values_view = column_device_view::create(values, stream); - if (cudf::detail::has_nested_columns(values_view)) { - auto const d_equal = comparator.equal_to( - cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); - + auto const comparator_helper = [&](auto const d_equal) { auto const is_unique_iterator = thrust::make_transform_iterator(thrust::counting_iterator(0), is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, @@ -112,24 +109,18 @@ std::unique_ptr group_nunique(column_view const& values, is_unique_iterator, thrust::make_discard_iterator(), result->mutable_view().begin()); + }; + + if (cudf::detail::has_nested_columns(values_view)) { + auto const d_equal = comparator.equal_to( + cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); + + comparator_helper(d_equal); } else { auto const d_equal = comparator.equal_to( cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); - auto const is_unique_iterator = - thrust::make_transform_iterator(thrust::counting_iterator(0), - is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()}, - *d_values_view, - d_equal, - null_handling, - group_offsets.data(), - group_labels.data()}); - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - is_unique_iterator, - thrust::make_discard_iterator(), - result->mutable_view().begin()); + comparator_helper(d_equal); } return result; diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 90f41038a77..9d50ea66f51 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -41,6 +41,37 @@ namespace groupby { namespace detail { namespace { +template +struct unique_identifier { + unique_identifier(size_type const* labels_, + size_type const* offsets_, + permuted_equal_t permuted_equal_, + value_resolver resolver_) + : labels(labels_), offsets(offsets_), permuted_equal(permuted_equal_), resolver(resolver_) + { + } + + auto __device__ operator()(size_type row_index) + { + auto const group_start = offsets[labels[row_index]]; + if constexpr (forward) { + // First value of equal values is 1. + return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), + row_index - group_start); + } else { + auto const group_end = offsets[labels[row_index] + 1]; + // Last value of equal values is 1. + return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), + row_index - group_start); + } + } + + size_type const* labels; + size_type const* offsets; + permuted_equal_t permuted_equal; + value_resolver resolver; +}; + /** * @brief generate grouped row ranks or dense ranks using a row comparison then scan the results * @@ -79,58 +110,28 @@ std::unique_ptr rank_generator(column_view const& grouped_values, data_type{type_to_id()}, grouped_values.size(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - if (cudf::detail::has_nested_columns(grouped_values_view)) { - auto const d_equal = - comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); + auto const comparator_helper = [&](auto const d_equal) { auto const permuted_equal = permuted_row_equality_comparator(d_equal, value_order.begin()); - auto unique_identifier = [labels = group_labels.begin(), - offsets = group_offsets.begin(), - permuted_equal, - resolver] __device__(size_type row_index) { - auto const group_start = offsets[labels[row_index]]; - if constexpr (forward) { - // First value of equal values is 1. - return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), - row_index - group_start); - } else { - auto const group_end = offsets[labels[row_index] + 1]; - // Last value of equal values is 1. - return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), - row_index - group_start); - } - }; thrust::tabulate(rmm::exec_policy(stream), mutable_ranks.begin(), mutable_ranks.end(), - unique_identifier); + unique_identifier( + group_labels.begin(), group_offsets.begin(), permuted_equal, resolver)); + }; + + if (cudf::detail::has_nested_columns(grouped_values_view)) { + auto const d_equal = + comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); + + comparator_helper(d_equal); + } else { auto const d_equal = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); - auto const permuted_equal = - permuted_row_equality_comparator(d_equal, value_order.begin()); - auto unique_identifier = [labels = group_labels.begin(), - offsets = group_offsets.begin(), - permuted_equal, - resolver] __device__(size_type row_index) { - auto const group_start = offsets[labels[row_index]]; - if constexpr (forward) { - // First value of equal values is 1. - return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), - row_index - group_start); - } else { - auto const group_end = offsets[labels[row_index] + 1]; - // Last value of equal values is 1. - return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), - row_index - group_start); - } - }; - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - unique_identifier); + comparator_helper(d_equal); } auto [group_labels_begin, mutable_rank_begin] = [&]() { diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index f9df4d6a2fa..279f2895464 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -67,10 +67,7 @@ std::unique_ptr
unique(table_view const& input, auto comp = cudf::experimental::row::equality::self_comparator(keys_view, stream); - if (cudf::detail::has_nested_columns(keys_view)) { - auto row_equal = - comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - + auto const comparator_helper = [&](auto const row_equal) { // get indices of unique rows auto result_end = unique_copy(thrust::counting_iterator(0), thrust::counting_iterator(num_rows), @@ -91,30 +88,18 @@ std::unique_ptr
unique(table_view const& input, detail::negative_index_policy::NOT_ALLOWED, stream, mr); + }; + + if (cudf::detail::has_nested_columns(keys_view)) { + auto row_equal = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); + + return comparator_helper(row_equal); } else { auto row_equal = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - // get indices of unique rows - auto result_end = unique_copy(thrust::counting_iterator(0), - thrust::counting_iterator(num_rows), - mutable_view->begin(), - row_equal, - keep, - stream); - - auto indices_view = - cudf::detail::slice(column_view(*unique_indices), - 0, - thrust::distance(mutable_view->begin(), result_end)); - - // gather unique rows and return - return detail::gather(input, - indices_view, - out_of_bounds_policy::DONT_CHECK, - detail::negative_index_policy::NOT_ALLOWED, - stream, - mr); + return comparator_helper(row_equal); } } } // namespace detail From 0ceb79ea4012c7401b15cc7c30a6716ba2cacf84 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 3 Feb 2023 09:10:15 -0800 Subject: [PATCH 05/14] address review to create functors --- .../cudf/table/experimental/row_operators.cuh | 11 ++- .../binaryop/compiled/struct_binary_ops.cuh | 69 ++++++++++++------- cpp/src/reductions/scan/rank_scan.cu | 43 ++++++++---- cpp/src/search/contains_scalar.cu | 37 +++------- cpp/src/transform/one_hot_encode.cu | 48 +++++++------ 5 files changed, 121 insertions(+), 87 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index f9805175948..3da69efe766 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -1131,10 +1131,19 @@ struct nan_equal_physical_equality_comparator { * returns false, representing unequal rows. If the rows are compared without mismatched elements, * the rows are equal. * - * @tparam has_nested_columns compile-time optimization for primitive types + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values * rather than logical elements, defaults to a comparator for which `NaN == NaN`. + * + * NOTE: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. */ template +struct struct_equality_functor { + struct_equality_functor(OptionalIteratorType optional_iter_, + bool is_lhs_scalar_, + bool is_rhs_scalar_, + bool preserve_output_, + DeviceComparatorType device_comparator_) + : optional_iter(optional_iter_), + is_lhs_scalar(is_lhs_scalar_), + is_rhs_scalar(is_rhs_scalar_), + preserve_output(preserve_output_), + device_comparator(device_comparator_) + { + } + + auto __device__ operator()(size_type i) + { + auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; + auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; + return optional_iter[i].has_value() and (device_comparator(lhs, rhs) == preserve_output); + } + + OptionalIteratorType optional_iter; + bool is_lhs_scalar; + bool is_rhs_scalar; + bool preserve_output; + DeviceComparatorType device_comparator; +}; + template void apply_struct_equality_op(mutable_column_view& out, @@ -130,44 +159,32 @@ void apply_struct_equality_op(mutable_column_view& out, auto optional_iter = cudf::detail::make_optional_iterator(*outd, nullate::DYNAMIC{out.has_nulls()}); + auto const comparator_helper = [&](auto const device_comparator) { + thrust::tabulate(rmm::exec_policy(stream), + out.begin(), + out.end(), + struct_equality_functor( + optional_iter, + is_lhs_scalar, + is_rhs_scalar, + op != binary_operator::NOT_EQUAL, + device_comparator)); + }; + if (cudf::detail::has_nested_columns(tlhs) or cudf::detail::has_nested_columns(trhs)) { auto device_comparator = table_comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}, null_equality::EQUAL, comparator); - thrust::tabulate(rmm::exec_policy(stream), - out.begin(), - out.end(), - [optional_iter, - is_lhs_scalar, - is_rhs_scalar, - preserve_output = (op != binary_operator::NOT_EQUAL), - device_comparator] __device__(size_type i) { - auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; - auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; - return optional_iter[i].has_value() and - (device_comparator(lhs, rhs) == preserve_output); - }); + comparator_helper(device_comparator); } else { auto device_comparator = table_comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}, null_equality::EQUAL, comparator); - thrust::tabulate(rmm::exec_policy(stream), - out.begin(), - out.end(), - [optional_iter, - is_lhs_scalar, - is_rhs_scalar, - preserve_output = (op != binary_operator::NOT_EQUAL), - device_comparator] __device__(size_type i) { - auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; - auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; - return optional_iter[i].has_value() and - (device_comparator(lhs, rhs) == preserve_output); - }); + comparator_helper(device_comparator); } } } // namespace cudf::binops::compiled::detail diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index f7a763c5237..fe2c539112d 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -32,6 +32,22 @@ namespace cudf { namespace detail { namespace { +template +struct rank_equality_functor { + rank_equality_functor(device_comparator_type comparator_, value_resolver resolver_) + : comparator(comparator_), resolver(resolver_) + { + } + + auto __device__ operator()(size_type row_index) + { + return resolver(row_index == 0 || !comparator(row_index, row_index - 1), row_index); + } + + device_comparator_type comparator; + value_resolver resolver; +}; + /** * @brief generate row ranks or dense ranks using a row comparison then scan the results * @@ -58,26 +74,25 @@ std::unique_ptr rank_generator(column_view const& order_by, data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); - if (cudf::detail::has_nested_columns(order_by_view)) { - auto const device_comparator = - comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); + auto const comparator_helper = [&](auto const device_comparator) { thrust::tabulate(rmm::exec_policy(stream), mutable_ranks.begin(), mutable_ranks.end(), - [comparator = device_comparator, resolver] __device__(size_type row_index) { - return resolver(row_index == 0 || !comparator(row_index, row_index - 1), - row_index); - }); + rank_equality_functor( + device_comparator, resolver)); + }; + + if (cudf::detail::has_nested_columns(order_by_view)) { + auto const device_comparator = + comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); + + comparator_helper(device_comparator); + } else { auto const device_comparator = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); - thrust::tabulate(rmm::exec_policy(stream), - mutable_ranks.begin(), - mutable_ranks.end(), - [comparator = device_comparator, resolver] __device__(size_type row_index) { - return resolver(row_index == 0 || !comparator(row_index, row_index - 1), - row_index); - }); + + comparator_helper(device_comparator); } thrust::inclusive_scan(rmm::exec_policy(stream), diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index acd0b6b069d..093a1f8f1ed 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -107,32 +107,17 @@ struct contains_scalar_dispatch { auto const check_nulls = haystack.has_nulls(); auto const haystack_cdv_ptr = column_device_view::create(haystack, stream); - if (cudf::detail::has_nested_columns(haystack_tv) or - cudf::detail::has_nested_columns(needle_tv)) { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - return thrust::count_if( - rmm::exec_policy(stream), - begin, - end, - [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { - if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { - return false; - } - return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. - }) > 0; - } else { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - return thrust::count_if( - rmm::exec_policy(stream), - begin, - end, - [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { - if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { - return false; - } - return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. - }) > 0; - } + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + return thrust::count_if( + rmm::exec_policy(stream), + begin, + end, + [d_comp, check_nulls, d_haystack = *haystack_cdv_ptr] __device__(auto const idx) { + if (check_nulls && d_haystack.is_null_nocheck(static_cast(idx))) { + return false; + } + return d_comp(idx, rhs_index_type{0}); // compare haystack[idx] == needle[0]. + }) > 0; } }; diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 50bbe216b5f..5d16dfbded5 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -36,6 +36,24 @@ namespace cudf { namespace detail { +template +struct ohe_equality_functor { + ohe_equality_functor(size_type input_size_, DeviceComparatorType d_equal_) + : input_size(input_size_), d_equal(d_equal_) + { + } + + auto __device__ operator()(size_type i) + { + auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; + auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; + return d_equal(element_index, category_index); + } + + size_type input_size; + DeviceComparatorType d_equal; +}; + std::pair, table_view> one_hot_encode(column_view const& input, column_view const& categories, rmm::cuda_stream_view stream, @@ -60,34 +78,24 @@ std::pair, table_view> one_hot_encode(column_view const& auto const comparator = cudf::experimental::row::equality::two_table_comparator{t_lhs, t_rhs, stream}; + auto const comparator_helper = [&](auto const d_equal) { + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(total_size), + all_encodings->mutable_view().begin(), + ohe_equality_functor(input.size(), d_equal)); + }; + if (cudf::detail::has_nested_columns(t_lhs) or cudf::detail::has_nested_columns(t_rhs)) { auto const d_equal = comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(total_size), - all_encodings->mutable_view().begin(), - [input_size = input.size(), d_equal] __device__(size_type i) { - auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; - auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; - return d_equal(element_index, category_index); - }); + comparator_helper(d_equal); } else { auto const d_equal = comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); - thrust::transform( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(total_size), - all_encodings->mutable_view().begin(), - [input_size = input.size(), d_equal] __device__(size_type i) { - auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; - auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; - return d_equal(element_index, category_index); - }); + comparator_helper(d_equal); } auto const split_iter = From 37e7326b513374a8220d62ae666922474ec05d92 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 3 Feb 2023 11:31:44 -0800 Subject: [PATCH 06/14] updating has_nested_columns docs --- .../cudf/table/experimental/row_operators.cuh | 62 ++++++++++++++++--- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 3da69efe766..d99cea2f9fb 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -245,6 +245,16 @@ using optional_dremel_view = thrust::optional; * second letter in both words is the first non-equal letter, and `a < b`, thus * `aac < abb`. * + * @note: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalElementComparator A relational comparator functor that compares individual values * rather than logical elements, defaults to `NaN` aware relational comparator that evaluates `NaN` @@ -857,6 +867,16 @@ class self_comparator { * * `F(i,j)` returns true if and only if row `i` compares lexicographically less than row `j`. * + * @note: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalElementComparator A relational comparator functor that compares individual * values rather than logical elements, defaults to `NaN` aware relational comparator that @@ -1009,6 +1029,16 @@ class two_table_comparator { * only if row `i` of the right table compares lexicographically less than row * `j` of the left table. * + * @note: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalElementComparator A relational comparator functor that compares individual * values rather than logical elements, defaults to `NaN` aware relational comparator that @@ -1131,6 +1161,11 @@ struct nan_equal_physical_equality_comparator { * returns false, representing unequal rows. If the rows are compared without mismatched elements, * the rows are equal. * + * @note: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * * @tparam has_nested_columns compile-time optimization for primitive types. * This template parameter is to be used by the developer by querying * `cudf::detail::has_nested_columns(input)`. `true` compiles operator @@ -1139,11 +1174,6 @@ struct nan_equal_physical_equality_comparator { * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values * rather than logical elements, defaults to a comparator for which `NaN == NaN`. - * - * NOTE: The operator overloads in sub-class `element_comparator` are templated via the - * `type_dispatcher` to help select an overload instance for each column in a table. - * So, `cudf::is_nested` will return `true` if the table has nested-type columns, - * but it will be a runtime error if template parameter `has_nested_columns != true`. */ template ` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a comparator for which `NaN == NaN`. @@ -1553,7 +1592,16 @@ class two_table_comparator { * Similarly, `F(rhs_index_type i, lhs_index_type j)` returns true if and only if row `i` of the * right table compares equal to row `j` of the left table. * - * @tparam has_nested_columns compile-time optimization for primitive types + * @note: The operator overloads in sub-class `element_comparator` are templated via the + * `type_dispatcher` to help select an overload instance for each column in a table. + * So, `cudf::is_nested` will return `true` if the table has nested-type columns, + * but it will be a runtime error if template parameter `has_nested_columns != true`. + * + * @tparam has_nested_columns compile-time optimization for primitive types. + * This template parameter is to be used by the developer by querying + * `cudf::detail::has_nested_columns(input)`. `true` compiles operator + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a `NaN == NaN` equality comparator. From c2ff1fc4a849f15916b39f0cbf9ed5cdfba9e788 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 6 Feb 2023 16:43:34 -0800 Subject: [PATCH 07/14] address review for underscore prefixes in structs --- .../binaryop/compiled/struct_binary_ops.cuh | 36 +++++++++---------- cpp/src/groupby/sort/group_rank_scan.cu | 30 ++++++++-------- cpp/src/reductions/scan/rank_scan.cu | 10 +++--- cpp/src/transform/one_hot_encode.cu | 14 ++++---- 4 files changed, 45 insertions(+), 45 deletions(-) diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index bea52faa87f..b00c9055f6b 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -108,31 +108,31 @@ void apply_struct_binary_op(mutable_column_view& out, template struct struct_equality_functor { - struct_equality_functor(OptionalIteratorType optional_iter_, - bool is_lhs_scalar_, - bool is_rhs_scalar_, - bool preserve_output_, - DeviceComparatorType device_comparator_) - : optional_iter(optional_iter_), - is_lhs_scalar(is_lhs_scalar_), - is_rhs_scalar(is_rhs_scalar_), - preserve_output(preserve_output_), - device_comparator(device_comparator_) + struct_equality_functor(OptionalIteratorType optional_iter, + bool is_lhs_scalar, + bool is_rhs_scalar, + bool preserve_output, + DeviceComparatorType device_comparator) + : _optional_iter(optional_iter), + _is_lhs_scalar(is_lhs_scalar), + _is_rhs_scalar(is_rhs_scalar), + _preserve_output(preserve_output), + _device_comparator(device_comparator) { } auto __device__ operator()(size_type i) { - auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i}; - auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i}; - return optional_iter[i].has_value() and (device_comparator(lhs, rhs) == preserve_output); + auto lhs = cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i}; + auto rhs = cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i}; + return _optional_iter[i].has_value() and (_device_comparator(lhs, rhs) == _preserve_output); } - OptionalIteratorType optional_iter; - bool is_lhs_scalar; - bool is_rhs_scalar; - bool preserve_output; - DeviceComparatorType device_comparator; + OptionalIteratorType _optional_iter; + bool _is_lhs_scalar; + bool _is_rhs_scalar; + bool _preserve_output; + DeviceComparatorType _device_comparator; }; template struct unique_identifier { - unique_identifier(size_type const* labels_, - size_type const* offsets_, - permuted_equal_t permuted_equal_, - value_resolver resolver_) - : labels(labels_), offsets(offsets_), permuted_equal(permuted_equal_), resolver(resolver_) + unique_identifier(size_type const* labels, + size_type const* offsets, + permuted_equal_t permuted_equal, + value_resolver resolver) + : _labels(labels), _offsets(offsets), _permuted_equal(permuted_equal), _resolver(resolver) { } auto __device__ operator()(size_type row_index) { - auto const group_start = offsets[labels[row_index]]; + auto const group_start = _offsets[_labels[row_index]]; if constexpr (forward) { // First value of equal values is 1. - return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1), - row_index - group_start); + return _resolver(row_index == group_start || !_permuted_equal(row_index, row_index - 1), + row_index - group_start); } else { - auto const group_end = offsets[labels[row_index] + 1]; + auto const group_end = _offsets[_labels[row_index] + 1]; // Last value of equal values is 1. - return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1), - row_index - group_start); + return _resolver(row_index + 1 == group_end || !_permuted_equal(row_index, row_index + 1), + row_index - group_start); } } - size_type const* labels; - size_type const* offsets; - permuted_equal_t permuted_equal; - value_resolver resolver; + size_type const* _labels; + size_type const* _offsets; + permuted_equal_t _permuted_equal; + value_resolver _resolver; }; /** diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index fe2c539112d..e5c90bff8c0 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -34,18 +34,18 @@ namespace { template struct rank_equality_functor { - rank_equality_functor(device_comparator_type comparator_, value_resolver resolver_) - : comparator(comparator_), resolver(resolver_) + rank_equality_functor(device_comparator_type comparator, value_resolver resolver) + : _comparator(comparator), _resolver(resolver) { } auto __device__ operator()(size_type row_index) { - return resolver(row_index == 0 || !comparator(row_index, row_index - 1), row_index); + return _resolver(row_index == 0 || !_comparator(row_index, row_index - 1), row_index); } - device_comparator_type comparator; - value_resolver resolver; + device_comparator_type _comparator; + value_resolver _resolver; }; /** diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 5d16dfbded5..e4c63a769d6 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -38,20 +38,20 @@ namespace detail { template struct ohe_equality_functor { - ohe_equality_functor(size_type input_size_, DeviceComparatorType d_equal_) - : input_size(input_size_), d_equal(d_equal_) + ohe_equality_functor(size_type input_size, DeviceComparatorType d_equal) + : _input_size(input_size), _d_equal(d_equal) { } auto __device__ operator()(size_type i) { - auto const element_index = cudf::experimental::row::lhs_index_type{i % input_size}; - auto const category_index = cudf::experimental::row::rhs_index_type{i / input_size}; - return d_equal(element_index, category_index); + auto const element_index = cudf::experimental::row::lhs_index_type{i % _input_size}; + auto const category_index = cudf::experimental::row::rhs_index_type{i / _input_size}; + return _d_equal(element_index, category_index); } - size_type input_size; - DeviceComparatorType d_equal; + size_type _input_size; + DeviceComparatorType _d_equal; }; std::pair, table_view> one_hot_encode(column_view const& input, From 53e918f88e672411fa2311b0632ab6395c5f1e5d Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Feb 2023 18:59:07 -0800 Subject: [PATCH 08/14] add rank --- cpp/src/sort/rank.cu | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index 461e978643f..5045878b8cf 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -55,21 +55,40 @@ rmm::device_uvector sorted_dense_rank(column_view input_col, { auto const t_input = table_view{{input_col}}; auto const comparator = cudf::experimental::row::equality::self_comparator{t_input, stream}; - auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); auto const sorted_index_order = thrust::make_permutation_iterator( sorted_order_view.begin(), thrust::make_counting_iterator(0)); - auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) { - return static_cast(index == 0 || - not device_comparator(permute[index], permute[index - 1])); - }; - auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv); auto const input_size = input_col.size(); rmm::device_uvector dense_rank_sorted(input_size, stream); - thrust::inclusive_scan( - rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); + if (cudf::detail::has_nested_columns(t_input)) { + auto const device_comparator = + comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); + + auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) { + return static_cast(index == 0 || + not device_comparator(permute[index], permute[index - 1])); + }; + auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv); + + thrust::inclusive_scan( + rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); + + } else { + auto const device_comparator = + comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); + + auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) { + return static_cast(index == 0 || + not device_comparator(permute[index], permute[index - 1])); + }; + auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv); + + thrust::inclusive_scan( + rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); + } + return dense_rank_sorted; } From 65e2bce1df185d015f9debdbf1d63841c99341c7 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Feb 2023 11:30:26 -0800 Subject: [PATCH 09/14] fix compile times for rank --- cpp/src/sort/rank.cu | 50 +++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index 5045878b8cf..b3c8da9d7d7 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -48,6 +48,23 @@ namespace cudf { namespace detail { namespace { +template +struct unique_functor { + unique_functor(PermutationIteratorType permute, DeviceComparatorType device_comparator) + : _permute(permute), _device_comparator(device_comparator) + { + } + + auto __device__ operator()(size_type index) + { + return static_cast(index == 0 || + not _device_comparator(_permute[index], _permute[index - 1])); + } + + PermutationIteratorType _permute; + DeviceComparatorType _device_comparator; +}; + // Assign rank from 1 to n unique values. Equal values get same rank value. rmm::device_uvector sorted_dense_rank(column_view input_col, column_view sorted_order_view, @@ -62,33 +79,32 @@ rmm::device_uvector sorted_dense_rank(column_view input_col, auto const input_size = input_col.size(); rmm::device_uvector dense_rank_sorted(input_size, stream); + auto const comparator_helper = [&](auto const device_comparator) { + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input_size), + dense_rank_sorted.data(), + unique_functor{ + sorted_index_order, device_comparator}); + }; + if (cudf::detail::has_nested_columns(t_input)) { auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); - auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) { - return static_cast(index == 0 || - not device_comparator(permute[index], permute[index - 1])); - }; - auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv); - - thrust::inclusive_scan( - rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); - + comparator_helper(device_comparator); } else { auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); - auto conv = [permute = sorted_index_order, device_comparator] __device__(size_type index) { - return static_cast(index == 0 || - not device_comparator(permute[index], permute[index - 1])); - }; - auto const unique_it = cudf::detail::make_counting_transform_iterator(0, conv); - - thrust::inclusive_scan( - rmm::exec_policy(stream), unique_it, unique_it + input_size, dense_rank_sorted.data()); + comparator_helper(device_comparator); } + thrust::inclusive_scan(rmm::exec_policy(stream), + dense_rank_sorted.begin(), + dense_rank_sorted.end(), + dense_rank_sorted.data()); + return dense_rank_sorted; } From 1344e331bf782d29f17163735c206a1deb0e1f3b Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 10 Feb 2023 19:31:07 -0500 Subject: [PATCH 10/14] Apply suggestions from code review Co-authored-by: Nghia Truong --- .../cudf/table/experimental/row_operators.cuh | 16 ++++++++-------- cpp/src/binaryop/compiled/struct_binary_ops.cuh | 4 ++-- cpp/src/groupby/hash/groupby.cu | 3 --- cpp/src/groupby/sort/group_nunique.cu | 2 -- cpp/src/groupby/sort/group_rank_scan.cu | 3 --- cpp/src/reductions/scan/rank_scan.cu | 3 --- cpp/src/sort/rank.cu | 2 -- cpp/src/stream_compaction/unique.cu | 2 -- cpp/src/transform/one_hot_encode.cu | 2 -- 9 files changed, 10 insertions(+), 27 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index d99cea2f9fb..6040aea9fce 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -253,8 +253,8 @@ using optional_dremel_view = thrust::optional; * @tparam has_nested_columns compile-time optimization for primitive types. * This template parameter is to be used by the developer by querying * `cudf::detail::has_nested_columns(input)`. `true` compiles operator - * overloads for nested types, while `false` only compiles operator - * overloads for primitive types. + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalElementComparator A relational comparator functor that compares individual values * rather than logical elements, defaults to `NaN` aware relational comparator that evaluates `NaN` @@ -1169,8 +1169,8 @@ struct nan_equal_physical_equality_comparator { * @tparam has_nested_columns compile-time optimization for primitive types. * This template parameter is to be used by the developer by querying * `cudf::detail::has_nested_columns(input)`. `true` compiles operator - * overloads for nested types, while `false` only compiles operator - * overloads for primitive types. + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual values * rather than logical elements, defaults to a comparator for which `NaN == NaN`. @@ -1486,8 +1486,8 @@ class self_comparator { * @tparam has_nested_columns compile-time optimization for primitive types. * This template parameter is to be used by the developer by querying * `cudf::detail::has_nested_columns(input)`. `true` compiles operator - * overloads for nested types, while `false` only compiles operator - * overloads for primitive types. + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a comparator for which `NaN == NaN`. @@ -1600,8 +1600,8 @@ class two_table_comparator { * @tparam has_nested_columns compile-time optimization for primitive types. * This template parameter is to be used by the developer by querying * `cudf::detail::has_nested_columns(input)`. `true` compiles operator - * overloads for nested types, while `false` only compiles operator - * overloads for primitive types. + * overloads for nested types, while `false` only compiles operator + * overloads for primitive types. * @tparam Nullate A cudf::nullate type describing whether to check for nulls. * @tparam PhysicalEqualityComparator A equality comparator functor that compares individual * values rather than logical elements, defaults to a `NaN == NaN` equality comparator. diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index b00c9055f6b..5ecdd2447aa 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -123,8 +123,8 @@ struct struct_equality_functor { auto __device__ operator()(size_type i) { - auto lhs = cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i}; - auto rhs = cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i}; + auto const lhs = cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i}; + auto const rhs = cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i}; return _optional_iter[i].has_value() and (_device_comparator(lhs, rhs) == _preserve_output); } diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 07558cae387..75b45cf74d4 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -625,12 +625,9 @@ std::unique_ptr
groupby(table_view const& keys, if (cudf::detail::has_nested_columns(keys)) { auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - return comparator_helper(d_key_equal); - } else { auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal); - return comparator_helper(d_key_equal); } } diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index c68bdb1fd43..cf81253483e 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -114,12 +114,10 @@ std::unique_ptr group_nunique(column_view const& values, if (cudf::detail::has_nested_columns(values_view)) { auto const d_equal = comparator.equal_to( cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); - comparator_helper(d_equal); } else { auto const d_equal = comparator.equal_to( cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL); - comparator_helper(d_equal); } diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 435d8022845..e5e4af8be4f 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -124,13 +124,10 @@ std::unique_ptr rank_generator(column_view const& grouped_values, if (cudf::detail::has_nested_columns(grouped_values_view)) { auto const d_equal = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); - comparator_helper(d_equal); - } else { auto const d_equal = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL); - comparator_helper(d_equal); } diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index e5c90bff8c0..8c6a3cdf088 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -85,13 +85,10 @@ std::unique_ptr rank_generator(column_view const& order_by, if (cudf::detail::has_nested_columns(order_by_view)) { auto const device_comparator = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); - comparator_helper(device_comparator); - } else { auto const device_comparator = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); - comparator_helper(device_comparator); } diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index b3c8da9d7d7..8d3ef3a3c1e 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -91,12 +91,10 @@ rmm::device_uvector sorted_dense_rank(column_view input_col, if (cudf::detail::has_nested_columns(t_input)) { auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); - comparator_helper(device_comparator); } else { auto const device_comparator = comparator.equal_to(nullate::DYNAMIC{has_nested_nulls(t_input)}); - comparator_helper(device_comparator); } diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index 279f2895464..511a7b7ae1c 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -93,12 +93,10 @@ std::unique_ptr
unique(table_view const& input, if (cudf::detail::has_nested_columns(keys_view)) { auto row_equal = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - return comparator_helper(row_equal); } else { auto row_equal = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(keys_view)}, nulls_equal); - return comparator_helper(row_equal); } } diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index e4c63a769d6..c63e6d1261f 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -89,12 +89,10 @@ std::pair, table_view> one_hot_encode(column_view const& if (cudf::detail::has_nested_columns(t_lhs) or cudf::detail::has_nested_columns(t_rhs)) { auto const d_equal = comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); - comparator_helper(d_equal); } else { auto const d_equal = comparator.equal_to( nullate::DYNAMIC{has_nested_nulls(t_lhs) || has_nested_nulls(t_rhs)}); - comparator_helper(d_equal); } From 41233796701bbd893abbf8f03c05b2dae04f4da4 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 10 Feb 2023 17:20:46 -0800 Subject: [PATCH 11/14] address review --- .../binaryop/compiled/struct_binary_ops.cuh | 16 +++--- cpp/src/groupby/hash/groupby.cu | 5 -- cpp/src/groupby/sort/group_rank_scan.cu | 2 +- cpp/src/lists/contains.cu | 40 ++++++-------- cpp/src/partitioning/partitioning.cu | 13 +++-- cpp/src/reductions/scan/rank_scan.cu | 8 +-- cpp/src/search/contains_table.cu | 52 +++++++------------ cpp/src/sort/rank.cu | 2 +- cpp/src/transform/one_hot_encode.cu | 2 +- 9 files changed, 58 insertions(+), 82 deletions(-) diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index 5ecdd2447aa..0f273f8f0dd 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -109,19 +109,19 @@ void apply_struct_binary_op(mutable_column_view& out, template struct struct_equality_functor { struct_equality_functor(OptionalIteratorType optional_iter, + DeviceComparatorType device_comparator, bool is_lhs_scalar, bool is_rhs_scalar, - bool preserve_output, - DeviceComparatorType device_comparator) + bool preserve_output) : _optional_iter(optional_iter), + _device_comparator(device_comparator), _is_lhs_scalar(is_lhs_scalar), _is_rhs_scalar(is_rhs_scalar), - _preserve_output(preserve_output), - _device_comparator(device_comparator) + _preserve_output(preserve_output) { } - auto __device__ operator()(size_type i) + auto __device__ operator()(size_type i) const noexcept { auto const lhs = cudf::experimental::row::lhs_index_type{_is_lhs_scalar ? 0 : i}; auto const rhs = cudf::experimental::row::rhs_index_type{_is_rhs_scalar ? 0 : i}; @@ -129,10 +129,10 @@ struct struct_equality_functor { } OptionalIteratorType _optional_iter; + DeviceComparatorType _device_comparator; bool _is_lhs_scalar; bool _is_rhs_scalar; bool _preserve_output; - DeviceComparatorType _device_comparator; }; template (), struct_equality_functor( optional_iter, + device_comparator, is_lhs_scalar, is_rhs_scalar, - op != binary_operator::NOT_EQUAL, - device_comparator)); + op != binary_operator::NOT_EQUAL)); }; if (cudf::detail::has_nested_columns(tlhs) or cudf::detail::has_nested_columns(trhs)) { diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 75b45cf74d4..72ac6255549 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -76,11 +76,6 @@ using map_type = device_row_hasher, ComparatorType>; -template -using comparator_type = - cudf::experimental::row::equality::device_row_comparator; - /** * @brief List of aggregation operations that can be computed with a hash-based * implementation. diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index e5e4af8be4f..5715a4829f4 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -51,7 +51,7 @@ struct unique_identifier { { } - auto __device__ operator()(size_type row_index) + auto __device__ operator()(size_type row_index) const noexcept { auto const group_start = _offsets[_labels[row_index]]; if constexpr (forward) { diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 85d9fea7ae5..03ac2919bce 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -268,47 +268,37 @@ void index_of_nested_types(InputIterator input_it, auto const comparator = cudf::experimental::row::equality::two_table_comparator(child_tview, keys_tview, stream); - if (cudf::detail::has_nested_columns(child_tview) or - cudf::detail::has_nested_columns(keys_tview)) { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + auto const tables_have_nested_columns = + cudf::detail::has_nested_columns(child_tview) or cudf::detail::has_nested_columns(keys_tview); + auto const do_search = [=](auto const key_validity_iter) { + if (tables_have_nested_columns) { + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - auto const do_search = [=](auto const key_validity_iter) { thrust::transform( rmm::exec_policy(stream), input_it, input_it + num_rows, output_it, search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); - }; - - if constexpr (search_key_is_scalar) { - auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); - do_search(key_validity_iter); } else { - auto const keys_dv_ptr = column_device_view::create(search_keys, stream); - auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); - do_search(key_validity_iter); - } - } else { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - auto const do_search = [=](auto const key_validity_iter) { thrust::transform( rmm::exec_policy(stream), input_it, input_it + num_rows, output_it, search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); - }; - - if constexpr (search_key_is_scalar) { - auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); - do_search(key_validity_iter); - } else { - auto const keys_dv_ptr = column_device_view::create(search_keys, stream); - auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); - do_search(key_validity_iter); } + }; + + if constexpr (search_key_is_scalar) { + auto const key_validity_iter = cudf::detail::make_validity_iterator(search_keys); + do_search(key_validity_iter); + } else { + auto const keys_dv_ptr = column_device_view::create(search_keys, stream); + auto const key_validity_iter = cudf::detail::make_validity_iterator(*keys_dv_ptr); + do_search(key_validity_iter); } } diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 876c8f136ae..f9376c3da23 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -489,9 +489,12 @@ std::pair, std::vector> hash_partition_table( auto row_partition_offset = cudf::detail::make_zeroed_device_uvector_async(num_rows, stream); - auto const device_input = table_device_view::create(table_to_hash, stream); - auto const hasher = row_hasher( - nullate::DYNAMIC{hash_has_nulls}, *device_input, seed); + // auto const device_input = table_device_view::create(table_to_hash, stream); + // auto const hasher = row_hasher( + // nullate::DYNAMIC{hash_has_nulls}, *device_input, seed); + auto const row_hasher = experimental::row::hash::row_hasher(table_to_hash, stream); + auto const hasher = + row_hasher.device_hasher(nullate::DYNAMIC{hash_has_nulls}, seed); // If the number of partitions is a power of two, we can compute the partition // number of each row more efficiently with bitwise operations @@ -730,7 +733,7 @@ std::pair, std::vector> hash_partition( return std::pair(empty_like(input), std::vector(num_partitions, 0)); } - if (has_nulls(table_to_hash)) { + if (has_nested_nulls(table_to_hash)) { return hash_partition_table( input, table_to_hash, num_partitions, seed, stream, mr); } else { diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index 8c6a3cdf088..8b9d0f0f859 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -39,7 +39,7 @@ struct rank_equality_functor { { } - auto __device__ operator()(size_type row_index) + auto __device__ operator()(size_type row_index) const noexcept { return _resolver(row_index == 0 || !_comparator(row_index, row_index - 1), row_index); } @@ -67,8 +67,8 @@ std::unique_ptr rank_generator(column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const order_by_view = table_view{{order_by}}; - auto comp = cudf::experimental::row::equality::self_comparator(order_by_view, stream); + auto const order_by_tview = table_view{{order_by}}; + auto comp = cudf::experimental::row::equality::self_comparator(order_by_tview, stream); auto ranks = make_fixed_width_column( data_type{type_to_id()}, order_by.size(), mask_state::UNALLOCATED, stream, mr); @@ -82,7 +82,7 @@ std::unique_ptr rank_generator(column_view const& order_by, device_comparator, resolver)); }; - if (cudf::detail::has_nested_columns(order_by_view)) { + if (cudf::detail::has_nested_columns(order_by_tview)) { auto const device_comparator = comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))}); comparator_helper(device_comparator); diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 5b2db3dbb83..c1cc4659a19 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -204,9 +204,8 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); auto const row_bitmask_ptr = bitmask_buffer_and_ptr.second; - // Insert only rows that do not have any null at any level. - if (cudf::detail::has_nested_columns(haystack)) { - auto const insert_map = [&](auto const value_comp) { + auto const insert_map = [&](auto const value_comp) { + if (cudf::detail::has_nested_columns(haystack)) { auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; map.insert_if(haystack_it, @@ -216,11 +215,7 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack d_hasher, d_eqcomp, stream.value()); - }; - - dispatch_nan_comparator(compare_nans, insert_map); - } else { - auto const insert_map = [&](auto const value_comp) { + } else { auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; map.insert_if(haystack_it, @@ -230,31 +225,27 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack d_hasher, d_eqcomp, stream.value()); - }; - - dispatch_nan_comparator(compare_nans, insert_map); - } + } + }; + // Insert only rows that do not have any null at any level. + dispatch_nan_comparator(compare_nans, insert_map); } else { // haystack_doesn't_have_nulls || compare_nulls == null_equality::EQUAL - if (cudf::detail::has_nested_columns(haystack)) { - auto const insert_map = [&](auto const value_comp) { + auto const insert_map = [&](auto const value_comp) { + if (cudf::detail::has_nested_columns(haystack)) { auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; map.insert( haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - }; - - dispatch_nan_comparator(compare_nans, insert_map); - } else { - auto const insert_map = [&](auto const value_comp) { + } else { auto const d_eqcomp = strong_index_comparator_adapter{comparator.equal_to( nullate::DYNAMIC{haystack_has_nulls}, compare_nulls, value_comp)}; map.insert( haystack_it, haystack_it + haystack.num_rows(), d_hasher, d_eqcomp, stream.value()); - }; + } + }; - dispatch_nan_comparator(compare_nans, insert_map); - } + dispatch_nan_comparator(compare_nans, insert_map); } } @@ -272,8 +263,9 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack auto const comparator = cudf::experimental::row::equality::two_table_comparator(haystack, needles, stream); - if (cudf::detail::has_nested_columns(haystack) or cudf::detail::has_nested_columns(needles)) { - auto const check_contains = [&](auto const value_comp) { + + auto const check_contains = [&](auto const value_comp) { + if (cudf::detail::has_nested_columns(haystack) or cudf::detail::has_nested_columns(needles)) { auto const d_eqcomp = comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); map.contains(needles_it, @@ -282,11 +274,7 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack d_hasher, d_eqcomp, stream.value()); - }; - - dispatch_nan_comparator(compare_nans, check_contains); - } else { - auto const check_contains = [&](auto const value_comp) { + } else { auto const d_eqcomp = comparator.equal_to(nullate::DYNAMIC{has_any_nulls}, compare_nulls, value_comp); map.contains(needles_it, @@ -295,10 +283,10 @@ rmm::device_uvector contains_with_lists_or_nans(table_view const& haystack d_hasher, d_eqcomp, stream.value()); - }; + } + }; - dispatch_nan_comparator(compare_nans, check_contains); - } + dispatch_nan_comparator(compare_nans, check_contains); } return contained; diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index 8d3ef3a3c1e..c16e892357f 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -55,7 +55,7 @@ struct unique_functor { { } - auto __device__ operator()(size_type index) + auto __device__ operator()(size_type index) const noexcept { return static_cast(index == 0 || not _device_comparator(_permute[index], _permute[index - 1])); diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index c63e6d1261f..9defea34215 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -43,7 +43,7 @@ struct ohe_equality_functor { { } - auto __device__ operator()(size_type i) + auto __device__ operator()(size_type i) const noexcept { auto const element_index = cudf::experimental::row::lhs_index_type{i % _input_size}; auto const category_index = cudf::experimental::row::rhs_index_type{i / _input_size}; From 9d0f7a6fab276b9c6ae2c7951e09f0bc8dcd6ed8 Mon Sep 17 00:00:00 2001 From: divyegala Date: Sat, 11 Feb 2023 15:39:47 -0800 Subject: [PATCH 12/14] address review, mark members of functors as private --- .../cudf/table/experimental/row_operators.cuh | 12 ++++++------ cpp/src/binaryop/compiled/struct_binary_ops.cuh | 1 + cpp/src/groupby/sort/group_rank_scan.cu | 1 + cpp/src/partitioning/partitioning.cu | 9 +++------ cpp/src/reductions/scan/rank_scan.cu | 1 + cpp/src/sort/rank.cu | 1 + cpp/src/transform/one_hot_encode.cu | 1 + 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 6040aea9fce..f9ffbfcdf7b 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -245,7 +245,7 @@ using optional_dremel_view = thrust::optional; * second letter in both words is the first non-equal letter, and `a < b`, thus * `aac < abb`. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. @@ -867,7 +867,7 @@ class self_comparator { * * `F(i,j)` returns true if and only if row `i` compares lexicographically less than row `j`. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. @@ -1029,7 +1029,7 @@ class two_table_comparator { * only if row `i` of the right table compares lexicographically less than row * `j` of the left table. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. @@ -1161,7 +1161,7 @@ struct nan_equal_physical_equality_comparator { * returns false, representing unequal rows. If the rows are compared without mismatched elements, * the rows are equal. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. @@ -1478,7 +1478,7 @@ class self_comparator { * * `F(i,j)` returns true if and only if row `i` compares equal to row `j`. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. @@ -1592,7 +1592,7 @@ class two_table_comparator { * Similarly, `F(rhs_index_type i, lhs_index_type j)` returns true if and only if row `i` of the * right table compares equal to row `j` of the left table. * - * @note: The operator overloads in sub-class `element_comparator` are templated via the + * @note The operator overloads in sub-class `element_comparator` are templated via the * `type_dispatcher` to help select an overload instance for each column in a table. * So, `cudf::is_nested` will return `true` if the table has nested-type columns, * but it will be a runtime error if template parameter `has_nested_columns != true`. diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh index 0f273f8f0dd..d167f0fe3c5 100644 --- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh +++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh @@ -128,6 +128,7 @@ struct struct_equality_functor { return _optional_iter[i].has_value() and (_device_comparator(lhs, rhs) == _preserve_output); } + private: OptionalIteratorType _optional_iter; DeviceComparatorType _device_comparator; bool _is_lhs_scalar; diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 5715a4829f4..479ce166724 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -66,6 +66,7 @@ struct unique_identifier { } } + private: size_type const* _labels; size_type const* _offsets; permuted_equal_t _permuted_equal; diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index f9376c3da23..380977f7f3e 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -489,12 +489,9 @@ std::pair, std::vector> hash_partition_table( auto row_partition_offset = cudf::detail::make_zeroed_device_uvector_async(num_rows, stream); - // auto const device_input = table_device_view::create(table_to_hash, stream); - // auto const hasher = row_hasher( - // nullate::DYNAMIC{hash_has_nulls}, *device_input, seed); - auto const row_hasher = experimental::row::hash::row_hasher(table_to_hash, stream); - auto const hasher = - row_hasher.device_hasher(nullate::DYNAMIC{hash_has_nulls}, seed); + auto const device_input = table_device_view::create(table_to_hash, stream); + auto const hasher = row_hasher( + nullate::DYNAMIC{hash_has_nulls}, *device_input, seed); // If the number of partitions is a power of two, we can compute the partition // number of each row more efficiently with bitwise operations diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index 8b9d0f0f859..538763099d3 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -44,6 +44,7 @@ struct rank_equality_functor { return _resolver(row_index == 0 || !_comparator(row_index, row_index - 1), row_index); } + private: device_comparator_type _comparator; value_resolver _resolver; }; diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index c16e892357f..fd65e38d467 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -61,6 +61,7 @@ struct unique_functor { not _device_comparator(_permute[index], _permute[index - 1])); } + private: PermutationIteratorType _permute; DeviceComparatorType _device_comparator; }; diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 9defea34215..3f3dd422f9d 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -50,6 +50,7 @@ struct ohe_equality_functor { return _d_equal(element_index, category_index); } + private: size_type _input_size; DeviceComparatorType _d_equal; }; From fe41be8e0c8dbead605acee50df34a88455712a1 Mon Sep 17 00:00:00 2001 From: divyegala Date: Sat, 11 Feb 2023 15:40:55 -0800 Subject: [PATCH 13/14] removing partitioning --- cpp/src/partitioning/partitioning.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index 380977f7f3e..876c8f136ae 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -730,7 +730,7 @@ std::pair, std::vector> hash_partition( return std::pair(empty_like(input), std::vector(num_partitions, 0)); } - if (has_nested_nulls(table_to_hash)) { + if (has_nulls(table_to_hash)) { return hash_partition_table( input, table_to_hash, num_partitions, seed, stream, mr); } else { From 02dd5c53265b680863dbf83adf775667a1898408 Mon Sep 17 00:00:00 2001 From: divyegala Date: Sat, 11 Feb 2023 16:24:09 -0800 Subject: [PATCH 14/14] simplify lists/contains since it already has a nested-type dispatch mechanism --- cpp/src/lists/contains.cu | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 03ac2919bce..05fe82d1713 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -267,29 +267,15 @@ void index_of_nested_types(InputIterator input_it, auto const has_nulls = has_nested_nulls(child_tview) || has_nested_nulls(keys_tview); auto const comparator = cudf::experimental::row::equality::two_table_comparator(child_tview, keys_tview, stream); + auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - auto const tables_have_nested_columns = - cudf::detail::has_nested_columns(child_tview) or cudf::detail::has_nested_columns(keys_tview); auto const do_search = [=](auto const key_validity_iter) { - if (tables_have_nested_columns) { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - - thrust::transform( - rmm::exec_policy(stream), - input_it, - input_it + num_rows, - output_it, - search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); - } else { - auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls}); - - thrust::transform( - rmm::exec_policy(stream), - input_it, - input_it + num_rows, - output_it, - search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); - } + thrust::transform( + rmm::exec_policy(stream), + input_it, + input_it + num_rows, + output_it, + search_list_nested_types_fn{find_option, key_validity_iter, d_comp, search_key_is_scalar}); }; if constexpr (search_key_is_scalar) {