From 4c5220d644acda22e9f81cba9ff25362832c7f64 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 19 Nov 2021 22:37:02 +0530 Subject: [PATCH 1/7] add missing stream arg to *::create() --- cpp/src/copying/copy.cu | 2 +- cpp/src/copying/shift.cu | 4 ++-- cpp/src/groupby/hash/groupby.cu | 8 ++++---- cpp/src/groupby/sort/group_count.cu | 2 +- cpp/src/groupby/sort/group_nth_element.cu | 2 +- cpp/src/groupby/sort/group_tdigest.cu | 2 +- cpp/src/lists/extract.cu | 2 +- cpp/src/lists/interleave_columns.cu | 8 ++++---- cpp/src/quantiles/quantile.cu | 2 +- cpp/src/quantiles/tdigest/tdigest.cu | 2 +- cpp/src/reductions/nth_element.cu | 2 +- cpp/src/replace/nans.cu | 8 ++++---- cpp/src/replace/nulls.cu | 10 +++++----- cpp/src/replace/replace.cu | 12 ++++++------ cpp/src/reshape/interleave_columns.cu | 6 +++--- cpp/src/unary/nan_ops.cu | 2 +- 16 files changed, 37 insertions(+), 37 deletions(-) diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index f4d09c8e0be..1240cb7c0cf 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -307,7 +307,7 @@ std::unique_ptr copy_if_else(Left const& lhs, if (boolean_mask.is_empty()) { return cudf::empty_like(lhs); } - auto bool_mask_device_p = column_device_view::create(boolean_mask); + auto bool_mask_device_p = column_device_view::create(boolean_mask, stream); column_device_view bool_mask_device = *bool_mask_device_p; auto const has_nulls = boolean_mask.has_nulls(); diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index 0b88545ffa5..dacc1d07447 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -105,10 +105,10 @@ struct shift_functor { using ScalarType = cudf::scalar_type_t; auto& scalar = static_cast(fill_value); - auto device_input = column_device_view::create(input); + auto device_input = column_device_view::create(input, stream); auto output = detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream, mr); - auto device_output = mutable_column_device_view::create(*output); + auto device_output = mutable_column_device_view::create(*output, stream); auto const scalar_is_valid = scalar.is_valid(stream); diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index e35fa36a289..7c8a86e048d 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -308,13 +308,13 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final column_view sum_result = sparse_results->get_result(col, *sum_agg); column_view count_result = sparse_results->get_result(col, *count_agg); - auto values_view = column_device_view::create(col); - auto sum_view = column_device_view::create(sum_result); - auto count_view = column_device_view::create(count_result); + auto values_view = column_device_view::create(col, stream); + auto sum_view = column_device_view::create(sum_result, stream); + auto count_view = column_device_view::create(count_result, stream); auto var_result = make_fixed_width_column( cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream); - auto var_result_view = mutable_column_device_view::create(var_result->mutable_view()); + auto var_result_view = mutable_column_device_view::create(var_result->mutable_view(), stream); mutable_table_view var_table_view{{var_result->mutable_view()}}; cudf::detail::initialize_with_identity(var_table_view, {agg.kind}, stream); diff --git a/cpp/src/groupby/sort/group_count.cu b/cpp/src/groupby/sort/group_count.cu index 121e4bb889d..6a2ff994b8b 100644 --- a/cpp/src/groupby/sort/group_count.cu +++ b/cpp/src/groupby/sort/group_count.cu @@ -45,7 +45,7 @@ std::unique_ptr group_count_valid(column_view const& values, if (num_groups == 0) { return result; } if (values.nullable()) { - auto values_view = column_device_view::create(values); + auto values_view = column_device_view::create(values, stream); // make_validity_iterator returns a boolean iterator that sums to 1 (1+1=1) // so we need to transform it to cast it to an integer type diff --git a/cpp/src/groupby/sort/group_nth_element.cu b/cpp/src/groupby/sort/group_nth_element.cu index f2c57abf54e..c1fc58beb80 100644 --- a/cpp/src/groupby/sort/group_nth_element.cu +++ b/cpp/src/groupby/sort/group_nth_element.cu @@ -72,7 +72,7 @@ std::unique_ptr group_nth_element(column_view const& values, }); } else { // skip nulls (equivalent to pandas nth(dropna='any')) // Returns index of nth value. - auto values_view = column_device_view::create(values); + auto values_view = column_device_view::create(values, stream); auto bitmask_iterator = thrust::make_transform_iterator(cudf::detail::make_validity_iterator(*values_view), [] __device__(auto b) { return static_cast(b); }); diff --git a/cpp/src/groupby/sort/group_tdigest.cu b/cpp/src/groupby/sort/group_tdigest.cu index 146a6a8c31c..92b45f1dd44 100644 --- a/cpp/src/groupby/sort/group_tdigest.cu +++ b/cpp/src/groupby/sort/group_tdigest.cu @@ -692,7 +692,7 @@ struct typed_group_tdigest { // device column view. handy because the .element() function // automatically handles fixed-point conversions for us - auto d_col = cudf::column_device_view::create(col); + auto d_col = cudf::column_device_view::create(col, stream); // compute min and max columns auto min_col = cudf::make_numeric_column( diff --git a/cpp/src/lists/extract.cu b/cpp/src/lists/extract.cu index 381864e1a68..7c6c612eb25 100644 --- a/cpp/src/lists/extract.cu +++ b/cpp/src/lists/extract.cu @@ -53,7 +53,7 @@ std::unique_ptr make_index_child(column_view const& indices, // `segmented_gather()` on a null index should produce a null row. if (not indices.nullable()) { return std::make_unique(indices, stream); } - auto const d_indices = column_device_view::create(indices); + auto const d_indices = column_device_view::create(indices, stream); // Replace null indices with MAX_SIZE_TYPE, so that gather() returns null for them. auto const null_replaced_iter_begin = cudf::detail::make_null_replacement_iterator(*d_indices, std::numeric_limits::max()); diff --git a/cpp/src/lists/interleave_columns.cu b/cpp/src/lists/interleave_columns.cu index b9b73d98ed2..276f18b7577 100644 --- a/cpp/src/lists/interleave_columns.cu +++ b/cpp/src/lists/interleave_columns.cu @@ -50,7 +50,7 @@ generate_list_offsets_and_validities(table_view const& input, auto const num_cols = input.num_columns(); auto const num_rows = input.num_rows(); auto const num_output_lists = num_rows * num_cols; - auto const table_dv_ptr = table_device_view::create(input); + auto const table_dv_ptr = table_device_view::create(input, stream); // The output offsets column. static_assert(sizeof(offset_type) == sizeof(int32_t)); @@ -217,7 +217,7 @@ struct interleave_list_entries_impl(), data_has_null_mask}; @@ -250,7 +250,7 @@ struct interleave_list_entries_impl( rmm::mr::device_memory_resource* mr) const noexcept { auto const num_cols = input.num_columns(); - auto const table_dv_ptr = table_device_view::create(input); + auto const table_dv_ptr = table_device_view::create(input, stream); // The output child column. auto output = allocate_like(lists_column_view(*input.begin()).child(), @@ -258,7 +258,7 @@ struct interleave_list_entries_impl( mask_allocation_policy::NEVER, stream, mr); - auto output_dv_ptr = mutable_column_device_view::create(*output); + auto output_dv_ptr = mutable_column_device_view::create(*output, stream); // The array of int8_t to store entry validities. auto validities = diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 073b318b879..645730fdb2f 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -80,7 +80,7 @@ struct quantile_functor { } auto d_input = column_device_view::create(input, stream); - auto d_output = mutable_column_device_view::create(output->mutable_view()); + auto d_output = mutable_column_device_view::create(output->mutable_view(), stream); auto q_device = cudf::detail::make_device_uvector_sync(q); diff --git a/cpp/src/quantiles/tdigest/tdigest.cu b/cpp/src/quantiles/tdigest/tdigest.cu index 57c221b15ed..a9950cced57 100644 --- a/cpp/src/quantiles/tdigest/tdigest.cu +++ b/cpp/src/quantiles/tdigest/tdigest.cu @@ -199,7 +199,7 @@ std::unique_ptr compute_approx_percentiles(tdigest_column_view const& in weight.begin(), cumulative_weights->mutable_view().begin()); - auto percentiles_cdv = column_device_view::create(percentiles); + auto percentiles_cdv = column_device_view::create(percentiles, stream); // leaf is a column of size input.size() * percentiles.size() auto const num_output_values = input.size() * percentiles.size(); diff --git a/cpp/src/reductions/nth_element.cu b/cpp/src/reductions/nth_element.cu index 001ee8c791f..2b8066a57ee 100644 --- a/cpp/src/reductions/nth_element.cu +++ b/cpp/src/reductions/nth_element.cu @@ -38,7 +38,7 @@ std::unique_ptr cudf::reduction::nth_element(column_view const& co auto valid_count = col.size() - col.null_count(); n = wrap_n(valid_count); CUDF_EXPECTS(n >= 0 and n < valid_count, "Index out of bounds"); - auto dcol = column_device_view::create(col); + auto dcol = column_device_view::create(col, stream); auto bitmask_iterator = thrust::make_transform_iterator(cudf::detail::make_validity_iterator(*dcol), [] __device__(auto b) { return static_cast(b); }); diff --git a/cpp/src/replace/nans.cu b/cpp/src/replace/nans.cu index c1c26573692..fc41f0a2ca7 100644 --- a/cpp/src/replace/nans.cu +++ b/cpp/src/replace/nans.cu @@ -48,7 +48,7 @@ struct replace_nans_functor { if (input.is_empty()) { return cudf::make_empty_column(input.type()); } - auto input_device_view = column_device_view::create(input); + auto input_device_view = column_device_view::create(input, stream); size_type size = input.size(); auto predicate = [dinput = *input_device_view] __device__(auto i) { @@ -89,7 +89,7 @@ std::unique_ptr replace_nans(column_view const& input, return type_dispatcher(input.type(), replace_nans_functor{}, input, - *column_device_view::create(replacement), + *column_device_view::create(replacement, stream), replacement.nullable(), stream, mr); @@ -180,10 +180,10 @@ void normalize_nans_and_zeros(mutable_column_view in_out, rmm::cuda_stream_view column_view input = in_out; // to device. unique_ptr which gets automatically cleaned up when we leave - auto device_in = column_device_view::create(input); + auto device_in = column_device_view::create(input, stream); // from device. unique_ptr which gets automatically cleaned up when we leave. - auto device_out = mutable_column_device_view::create(in_out); + auto device_out = mutable_column_device_view::create(in_out, stream); // invoke the actual kernel. cudf::type_dispatcher( diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index d12f18f4827..93bc6cf5ae5 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -180,9 +180,9 @@ struct replace_nulls_column_kernel_forwarder { auto replace = replace_nulls; if (output_view.nullable()) replace = replace_nulls; - auto device_in = cudf::column_device_view::create(input); - auto device_out = cudf::mutable_column_device_view::create(output_view); - auto device_replacement = cudf::column_device_view::create(replacement); + auto device_in = cudf::column_device_view::create(input, stream); + auto device_out = cudf::mutable_column_device_view::create(output_view, stream); + auto device_replacement = cudf::column_device_view::create(replacement, stream); rmm::device_scalar valid_counter(0, stream); cudf::size_type* valid_count = valid_counter.data(); @@ -311,7 +311,7 @@ struct replace_nulls_scalar_kernel_forwarder { using ScalarType = cudf::scalar_type_t; auto& s1 = static_cast(replacement); - auto device_in = cudf::column_device_view::create(input); + auto device_in = cudf::column_device_view::create(input, stream); auto func = replace_nulls_functor{s1.data()}; thrust::transform(rmm::exec_policy(stream), @@ -366,7 +366,7 @@ std::unique_ptr replace_nulls_policy_impl(cudf::column_view const& rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto device_in = cudf::column_device_view::create(input); + auto device_in = cudf::column_device_view::create(input, stream); auto index = thrust::make_counting_iterator(0); auto valid_it = cudf::detail::make_validity_iterator(*device_in); auto in_begin = thrust::make_zip_iterator(thrust::make_tuple(index, valid_it)); diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index a9c7db048b3..3505fe1f5d7 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -323,10 +323,10 @@ struct replace_kernel_forwarder { auto output_view = output->mutable_view(); auto grid = cudf::detail::grid_1d{output_view.size(), BLOCK_SIZE, 1}; - auto device_in = cudf::column_device_view::create(input_col); - auto device_out = cudf::mutable_column_device_view::create(output_view); - auto device_values_to_replace = cudf::column_device_view::create(values_to_replace); - auto device_replacement_values = cudf::column_device_view::create(replacement_values); + auto device_in = cudf::column_device_view::create(input_col, stream); + auto device_out = cudf::mutable_column_device_view::create(output_view, stream); + auto device_values_to_replace = cudf::column_device_view::create(values_to_replace, stream); + auto device_replacement_values = cudf::column_device_view::create(replacement_values, stream); replace<<>>(*device_in, *device_out, @@ -412,7 +412,7 @@ std::unique_ptr replace_kernel_forwarder::operator() offsets = cudf::strings::detail::make_offsets_child_column( sizes_view.begin(), sizes_view.end(), stream, mr); auto offsets_view = offsets->mutable_view(); - auto device_offsets = cudf::mutable_column_device_view::create(offsets_view); + auto device_offsets = cudf::mutable_column_device_view::create(offsets_view, stream); auto const bytes = cudf::detail::get_value(offsets_view, offsets_view.size() - 1, stream); @@ -422,7 +422,7 @@ std::unique_ptr replace_kernel_forwarder::operator()mutable_view(); - auto device_chars = cudf::mutable_column_device_view::create(output_chars_view); + auto device_chars = cudf::mutable_column_device_view::create(output_chars_view, stream); replace_second<<>>( *device_in, *device_replacement, *device_offsets, *device_chars, *device_indices); diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index e97c9f8109c..0e3ead3fd99 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -111,7 +111,7 @@ struct interleave_columns_impl(0); auto index_end = thrust::make_counting_iterator(output_size); diff --git a/cpp/src/unary/nan_ops.cu b/cpp/src/unary/nan_ops.cu index 1840aebf8f0..c6b9ecefe94 100644 --- a/cpp/src/unary/nan_ops.cu +++ b/cpp/src/unary/nan_ops.cu @@ -34,7 +34,7 @@ struct nan_dispatcher { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto input_device_view = column_device_view::create(input); + auto input_device_view = column_device_view::create(input, stream); if (input.has_nulls()) { auto input_pair_iterator = make_pair_iterator(*input_device_view); From 2ff7d891cafab450c7a01f02309eb5c032ba020c Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 19 Nov 2021 22:40:11 +0530 Subject: [PATCH 2/7] add missing stream arg to is_valid() --- cpp/src/filling/fill.cu | 2 +- cpp/src/search/search.cu | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index d17c698f91a..50f750e6416 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -49,7 +49,7 @@ void in_place_fill(cudf::mutable_column_view& destination, using ScalarType = cudf::scalar_type_t; auto p_scalar = static_cast(&value); T fill_value = p_scalar->value(stream); - bool is_valid = p_scalar->is_valid(); + bool is_valid = p_scalar->is_valid(stream); cudf::detail::copy_range(thrust::make_constant_iterator(fill_value), thrust::make_constant_iterator(is_valid), destination, diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 462d0678eab..5477eefc92e 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -193,12 +193,12 @@ bool contains_scalar_dispatch::operator()(column_view const& // first, find the value in the dictionary's key set auto index = cudf::dictionary::detail::get_index(dict_col, value, stream); // if found, check the index is actually in the indices column - return index->is_valid() ? cudf::type_dispatcher(dict_col.indices().type(), - contains_scalar_dispatch{}, - dict_col.indices(), - *index, - stream) - : false; + return index->is_valid(stream) ? cudf::type_dispatcher(dict_col.indices().type(), + contains_scalar_dispatch{}, + dict_col.indices(), + *index, + stream) + : false; } } // namespace From c194b067a82a1b9393cb0088489701199b380c49 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 23 Nov 2021 13:48:16 +0530 Subject: [PATCH 3/7] remove unused stream, mr default arguments --- cpp/src/binaryop/compiled/binary_ops.hpp | 78 ++++++++++----------- cpp/src/dictionary/remove_keys.cu | 25 +++---- cpp/src/io/csv/writer_impl.cu | 7 +- cpp/src/io/utilities/column_buffer.hpp | 13 ++-- cpp/src/join/conditional_join.hpp | 13 ++-- cpp/src/reductions/reductions.cpp | 11 ++- cpp/src/strings/strings_column_factories.cu | 13 ++-- 7 files changed, 72 insertions(+), 88 deletions(-) diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp index 26a0f26b59c..146959bd21c 100644 --- a/cpp/src/binaryop/compiled/binary_ops.hpp +++ b/cpp/src/binaryop/compiled/binary_ops.hpp @@ -31,29 +31,26 @@ class mutable_column_device_view; namespace binops { namespace compiled { -std::unique_ptr string_null_min_max( - scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr string_null_min_max(scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); -std::unique_ptr string_null_min_max( - column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr string_null_min_max(column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); -std::unique_ptr string_null_min_max( - column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr string_null_min_max(column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs a binary operation between a string scalar and a string @@ -74,13 +71,12 @@ std::unique_ptr string_null_min_max( * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation( - scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs a binary operation between a string column and a string @@ -101,13 +97,12 @@ std::unique_ptr binary_operation( * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation( - column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Performs a binary operation between two string columns. @@ -127,13 +122,12 @@ std::unique_ptr binary_operation( * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation( - column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr binary_operation(column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); void binary_operation(mutable_column_view& out, scalar const& lhs, diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index 7e2a82a683c..2c59e2897df 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -53,11 +53,10 @@ namespace { * @param mr Device memory resource used to allocate the returned column's device memory. */ template -std::unique_ptr remove_keys_fn( - dictionary_column_view const& dictionary_column, - KeysKeeper keys_to_keep_fn, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr remove_keys_fn(dictionary_column_view const& dictionary_column, + KeysKeeper keys_to_keep_fn, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto const keys_view = dictionary_column.keys(); auto const indices_type = dictionary_column.indices().type(); @@ -145,11 +144,10 @@ std::unique_ptr remove_keys_fn( } // namespace -std::unique_ptr remove_keys( - dictionary_column_view const& dictionary_column, - column_view const& keys_to_remove, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr remove_keys(dictionary_column_view const& dictionary_column, + column_view const& keys_to_remove, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls"); auto const keys_view = dictionary_column.keys(); @@ -163,10 +161,9 @@ std::unique_ptr remove_keys( return remove_keys_fn(dictionary_column, key_matcher, stream, mr); } -std::unique_ptr remove_unused_keys( - dictionary_column_view const& dictionary_column, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr remove_unused_keys(dictionary_column_view const& dictionary_column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // locate the keys to remove auto const keys_size = dictionary_column.keys_size(); diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index b9b6fc6cf94..179a5c89c37 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -137,10 +137,9 @@ struct column_to_strings_fn { (cudf::is_timestamp()) || (cudf::is_duration())); } - explicit column_to_strings_fn( - csv_writer_options const& options, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + explicit column_to_strings_fn(csv_writer_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) : options_(options), stream_(stream), mr_(mr) { } diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp index 9300bd0f8b2..063b5096fd3 100644 --- a/cpp/src/io/utilities/column_buffer.hpp +++ b/cpp/src/io/utilities/column_buffer.hpp @@ -45,11 +45,10 @@ namespace detail { * * @return `rmm::device_buffer` Device buffer allocation */ -inline rmm::device_buffer create_data( - data_type type, - size_type size, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +inline rmm::device_buffer create_data(data_type type, + size_type size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { std::size_t data_size = size_of(type) * size; @@ -93,9 +92,7 @@ struct column_buffer { // instantiate a column of known type with a specified size. Allows deferred creation for // preprocessing steps such as in the Parquet reader - void create(size_type _size, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + void create(size_type _size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); auto data() { return _strings ? _strings->data() : _data.data(); } auto data_size() const { return _strings ? _strings->size() : _data.size(); } diff --git a/cpp/src/join/conditional_join.hpp b/cpp/src/join/conditional_join.hpp index 911cbd222a0..941f4445710 100644 --- a/cpp/src/join/conditional_join.hpp +++ b/cpp/src/join/conditional_join.hpp @@ -62,13 +62,12 @@ conditional_join(table_view const& left, * * @return Join output indices vector pair */ -std::size_t compute_conditional_join_output_size( - table_view const& left, - table_view const& right, - ast::expression const& binary_predicate, - join_kind JoinKind, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::size_t compute_conditional_join_output_size(table_view const& left, + table_view const& right, + ast::expression const& binary_predicate, + join_kind JoinKind, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace cudf diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 6f9149a47e2..32c53b80079 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -107,12 +107,11 @@ struct reduce_dispatch_functor { } }; -std::unique_ptr reduce( - column_view const& col, - std::unique_ptr const& agg, - data_type output_dtype, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr reduce(column_view const& col, + std::unique_ptr const& agg, + data_type output_dtype, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // Returns default scalar if input column is non-valid. In terms of nested columns, we need to // handcraft the default scalar with input column. diff --git a/cpp/src/strings/strings_column_factories.cu b/cpp/src/strings/strings_column_factories.cu index e7ee8215b3d..f074eed0f99 100644 --- a/cpp/src/strings/strings_column_factories.cu +++ b/cpp/src/strings/strings_column_factories.cu @@ -52,13 +52,12 @@ std::unique_ptr make_strings_column( return cudf::strings::detail::make_strings_column(strings.begin(), strings.end(), stream, mr); } -std::unique_ptr make_strings_column( - device_span chars, - device_span offsets, - size_type null_count, - rmm::device_buffer&& null_mask, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +std::unique_ptr make_strings_column(device_span chars, + device_span offsets, + size_type null_count, + rmm::device_buffer&& null_mask, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); From bd8033c2ba9a15950028aa0ecf69baff4eac779d Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 23 Nov 2021 13:49:06 +0530 Subject: [PATCH 4/7] Revert "remove unused stream, mr default arguments" This reverts commit c194b067a82a1b9393cb0088489701199b380c49. --- cpp/src/binaryop/compiled/binary_ops.hpp | 78 +++++++++++---------- cpp/src/dictionary/remove_keys.cu | 25 ++++--- cpp/src/io/csv/writer_impl.cu | 7 +- cpp/src/io/utilities/column_buffer.hpp | 13 ++-- cpp/src/join/conditional_join.hpp | 13 ++-- cpp/src/reductions/reductions.cpp | 11 +-- cpp/src/strings/strings_column_factories.cu | 13 ++-- 7 files changed, 88 insertions(+), 72 deletions(-) diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp index 146959bd21c..26a0f26b59c 100644 --- a/cpp/src/binaryop/compiled/binary_ops.hpp +++ b/cpp/src/binaryop/compiled/binary_ops.hpp @@ -31,26 +31,29 @@ class mutable_column_device_view; namespace binops { namespace compiled { -std::unique_ptr string_null_min_max(scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr string_null_min_max( + scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -std::unique_ptr string_null_min_max(column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr string_null_min_max( + column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); -std::unique_ptr string_null_min_max(column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr string_null_min_max( + column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a binary operation between a string scalar and a string @@ -71,12 +74,13 @@ std::unique_ptr string_null_min_max(column_view const& lhs, * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation(scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr binary_operation( + scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a binary operation between a string column and a string @@ -97,12 +101,13 @@ std::unique_ptr binary_operation(scalar const& lhs, * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation(column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr binary_operation( + column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a binary operation between two string columns. @@ -122,12 +127,13 @@ std::unique_ptr binary_operation(column_view const& lhs, * @param mr Device memory resource used to allocate the returned column's device memory * @return std::unique_ptr Output column */ -std::unique_ptr binary_operation(column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr binary_operation( + column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); void binary_operation(mutable_column_view& out, scalar const& lhs, diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index 2c59e2897df..7e2a82a683c 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -53,10 +53,11 @@ namespace { * @param mr Device memory resource used to allocate the returned column's device memory. */ template -std::unique_ptr remove_keys_fn(dictionary_column_view const& dictionary_column, - KeysKeeper keys_to_keep_fn, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr remove_keys_fn( + dictionary_column_view const& dictionary_column, + KeysKeeper keys_to_keep_fn, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto const keys_view = dictionary_column.keys(); auto const indices_type = dictionary_column.indices().type(); @@ -144,10 +145,11 @@ std::unique_ptr remove_keys_fn(dictionary_column_view const& dictionary_ } // namespace -std::unique_ptr remove_keys(dictionary_column_view const& dictionary_column, - column_view const& keys_to_remove, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr remove_keys( + dictionary_column_view const& dictionary_column, + column_view const& keys_to_remove, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls"); auto const keys_view = dictionary_column.keys(); @@ -161,9 +163,10 @@ std::unique_ptr remove_keys(dictionary_column_view const& dictionary_col return remove_keys_fn(dictionary_column, key_matcher, stream, mr); } -std::unique_ptr remove_unused_keys(dictionary_column_view const& dictionary_column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr remove_unused_keys( + dictionary_column_view const& dictionary_column, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { // locate the keys to remove auto const keys_size = dictionary_column.keys_size(); diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index 179a5c89c37..b9b6fc6cf94 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -137,9 +137,10 @@ struct column_to_strings_fn { (cudf::is_timestamp()) || (cudf::is_duration())); } - explicit column_to_strings_fn(csv_writer_options const& options, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + explicit column_to_strings_fn( + csv_writer_options const& options, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) : options_(options), stream_(stream), mr_(mr) { } diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp index 063b5096fd3..9300bd0f8b2 100644 --- a/cpp/src/io/utilities/column_buffer.hpp +++ b/cpp/src/io/utilities/column_buffer.hpp @@ -45,10 +45,11 @@ namespace detail { * * @return `rmm::device_buffer` Device buffer allocation */ -inline rmm::device_buffer create_data(data_type type, - size_type size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +inline rmm::device_buffer create_data( + data_type type, + size_type size, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { std::size_t data_size = size_of(type) * size; @@ -92,7 +93,9 @@ struct column_buffer { // instantiate a column of known type with a specified size. Allows deferred creation for // preprocessing steps such as in the Parquet reader - void create(size_type _size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + void create(size_type _size, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); auto data() { return _strings ? _strings->data() : _data.data(); } auto data_size() const { return _strings ? _strings->size() : _data.size(); } diff --git a/cpp/src/join/conditional_join.hpp b/cpp/src/join/conditional_join.hpp index 941f4445710..911cbd222a0 100644 --- a/cpp/src/join/conditional_join.hpp +++ b/cpp/src/join/conditional_join.hpp @@ -62,12 +62,13 @@ conditional_join(table_view const& left, * * @return Join output indices vector pair */ -std::size_t compute_conditional_join_output_size(table_view const& left, - table_view const& right, - ast::expression const& binary_predicate, - join_kind JoinKind, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::size_t compute_conditional_join_output_size( + table_view const& left, + table_view const& right, + ast::expression const& binary_predicate, + join_kind JoinKind, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail } // namespace cudf diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 32c53b80079..6f9149a47e2 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -107,11 +107,12 @@ struct reduce_dispatch_functor { } }; -std::unique_ptr reduce(column_view const& col, - std::unique_ptr const& agg, - data_type output_dtype, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr reduce( + column_view const& col, + std::unique_ptr const& agg, + data_type output_dtype, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { // Returns default scalar if input column is non-valid. In terms of nested columns, we need to // handcraft the default scalar with input column. diff --git a/cpp/src/strings/strings_column_factories.cu b/cpp/src/strings/strings_column_factories.cu index f074eed0f99..e7ee8215b3d 100644 --- a/cpp/src/strings/strings_column_factories.cu +++ b/cpp/src/strings/strings_column_factories.cu @@ -52,12 +52,13 @@ std::unique_ptr make_strings_column( return cudf::strings::detail::make_strings_column(strings.begin(), strings.end(), stream, mr); } -std::unique_ptr make_strings_column(device_span chars, - device_span offsets, - size_type null_count, - rmm::device_buffer&& null_mask, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr make_strings_column( + device_span chars, + device_span offsets, + size_type null_count, + rmm::device_buffer&& null_mask, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_FUNC_RANGE(); From cd53d993d03ff4336d46412810fe72d6968e762e Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 7 Dec 2021 00:33:07 +0530 Subject: [PATCH 5/7] add missing stream, mr at several locations --- cpp/src/copying/scatter.cu | 2 +- cpp/src/copying/segmented_shift.cu | 2 +- cpp/src/groupby/common/utils.hpp | 6 +++-- cpp/src/groupby/hash/groupby.cu | 2 +- cpp/src/groupby/sort/aggregate.cpp | 2 +- cpp/src/groupby/sort/group_scan_util.cuh | 4 ++- cpp/src/groupby/sort/scan.cpp | 2 +- cpp/src/interop/from_arrow.cu | 26 ++++++++++++------- cpp/src/interop/to_arrow.cu | 6 ++--- cpp/src/io/avro/reader_impl.cu | 2 +- cpp/src/labeling/label_bins.cu | 4 +-- .../combine/concatenate_list_elements.cu | 4 ++- cpp/src/lists/copying/copying.cu | 4 ++- cpp/src/lists/copying/gather.cu | 8 ++++-- cpp/src/lists/copying/segmented_gather.cu | 4 ++- cpp/src/lists/drop_list_duplicates.cu | 10 ++++--- cpp/src/lists/segmented_sort.cu | 8 ++++-- cpp/src/merge/merge.cu | 4 ++- cpp/src/quantiles/quantile.cu | 6 ++--- cpp/src/quantiles/tdigest/tdigest.cu | 12 ++++++--- cpp/src/reductions/simple.cuh | 2 +- cpp/src/round/round.cu | 3 ++- cpp/src/strings/split/split_record.cu | 4 ++- cpp/src/structs/utilities.cpp | 2 +- cpp/src/unary/cast_ops.cu | 3 ++- 25 files changed, 85 insertions(+), 47 deletions(-) diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 7c1d78e2f98..06ef42e4a08 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -269,7 +269,7 @@ struct column_scalar_scatterer_impl { // Null mask pushdown inside factory method return make_structs_column( - target.size(), std::move(fields), null_count, std::move(*contents.null_mask)); + target.size(), std::move(fields), null_count, std::move(*contents.null_mask), stream, mr); } }; diff --git a/cpp/src/copying/segmented_shift.cu b/cpp/src/copying/segmented_shift.cu index 62f992012cd..595a46784da 100644 --- a/cpp/src/copying/segmented_shift.cu +++ b/cpp/src/copying/segmented_shift.cu @@ -148,7 +148,7 @@ std::unique_ptr segmented_shift(column_view const& segmented_values, rmm::mr::device_memory_resource* mr) { if (segmented_values.is_empty()) { return empty_like(segmented_values); } - if (offset == 0) { return std::make_unique(segmented_values); }; + if (offset == 0) { return std::make_unique(segmented_values, stream, mr); }; return type_dispatcher(segmented_values.type(), segmented_shift_functor_forwarder{}, diff --git a/cpp/src/groupby/common/utils.hpp b/cpp/src/groupby/common/utils.hpp index e3611eb0e4b..09b85c74f08 100644 --- a/cpp/src/groupby/common/utils.hpp +++ b/cpp/src/groupby/common/utils.hpp @@ -29,7 +29,9 @@ namespace detail { template inline std::vector extract_results(host_span requests, - cudf::detail::result_cache& cache) + cudf::detail::result_cache& cache, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { std::vector results(requests.size()); std::unordered_map>, @@ -45,7 +47,7 @@ inline std::vector extract_results(host_span(it->second)); + results[i].results.emplace_back(std::make_unique(it->second, stream, mr)); } else { CUDF_FAIL("Cannot extract result from the cache"); } diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 7c8a86e048d..c0501a2b187 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -673,7 +673,7 @@ std::pair, std::vector> groupby( groupby_null_templated(keys, requests, &cache, include_null_keys, stream, mr); } - return std::make_pair(std::move(unique_keys), extract_results(requests, cache)); + return std::make_pair(std::move(unique_keys), extract_results(requests, cache, stream, mr)); } } // namespace hash } // namespace detail diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index d68b701d75f..b3624282c24 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -775,7 +775,7 @@ std::pair, std::vector> groupby::sort } } - auto results = detail::extract_results(requests, cache); + auto results = detail::extract_results(requests, cache, stream, mr); return std::make_pair(helper().unique_keys(stream, mr), std::move(results)); } diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index b565e8dc6d8..57f8147fe06 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -262,7 +262,9 @@ struct group_scan_functor, std::vector> groupby::sort } } - auto results = detail::extract_results(requests, cache); + auto results = detail::extract_results(requests, cache, stream, mr); return std::make_pair(helper().sorted_keys(stream, mr), std::move(results)); } diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu index 59095fef85e..edd3ce2ed07 100644 --- a/cpp/src/interop/from_arrow.cu +++ b/cpp/src/interop/from_arrow.cu @@ -287,11 +287,14 @@ std::unique_ptr dispatch_to_cudf_column::operator()( UNKNOWN_NULL_COUNT, std::move(*get_mask_buffer(array, stream, mr))); - return num_rows == array.length() ? std::move(out_col) - : std::make_unique(cudf::detail::slice( - out_col->view(), - static_cast(array.offset()), - static_cast(array.offset() + array.length()))); + return num_rows == array.length() + ? std::move(out_col) + : std::make_unique( + cudf::detail::slice(out_col->view(), + static_cast(array.offset()), + static_cast(array.offset() + array.length())), + stream, + mr); } template <> @@ -383,11 +386,14 @@ std::unique_ptr dispatch_to_cudf_column::operator()( stream, mr); - return num_rows == array.length() ? std::move(out_col) - : std::make_unique(cudf::detail::slice( - out_col->view(), - static_cast(array.offset()), - static_cast(array.offset() + array.length()))); + return num_rows == array.length() + ? std::move(out_col) + : std::make_unique( + cudf::detail::slice(out_col->view(), + static_cast(array.offset()), + static_cast(array.offset() + array.length())), + stream, + mr); } std::unique_ptr get_column(arrow::Array const& array, diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index 3271804bf39..e6db5807dde 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -210,7 +210,7 @@ std::shared_ptr dispatch_to_arrow::operator()( std::unique_ptr tmp_column = ((input.offset() != 0) or ((input.num_children() == 2) and (input.child(0).size() - 1 != input.size()))) - ? std::make_unique(input) + ? std::make_unique(input, stream) : nullptr; column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input; @@ -245,7 +245,7 @@ std::shared_ptr dispatch_to_arrow::operator()( "Number of field names and number of children doesn't match\n"); std::unique_ptr tmp_column = nullptr; - if (input.offset() != 0) { tmp_column = std::make_unique(input); } + if (input.offset() != 0) { tmp_column = std::make_unique(input, stream); } column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input; auto child_arrays = fetch_child_array(input_view, metadata.children_meta, ar_mr, stream); @@ -280,7 +280,7 @@ std::shared_ptr dispatch_to_arrow::operator()( std::unique_ptr tmp_column = nullptr; if ((input.offset() != 0) or ((input.num_children() == 2) and (input.child(0).size() - 1 != input.size()))) { - tmp_column = std::make_unique(input); + tmp_column = std::make_unique(input, stream); } column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input; diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index 176da2f5cf7..d908e6c8ed5 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -278,7 +278,7 @@ rmm::device_buffer decompress_data(datasource& source, 0); uncompressed_data_offsets.host_to_device(stream); - thrust::tabulate(rmm::exec_policy(), + thrust::tabulate(rmm::exec_policy(stream), uncompressed_data_ptrs.begin(), uncompressed_data_ptrs.end(), [off = uncompressed_data_offsets.device_ptr(), diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu index 5007c3028ad..774027ed322 100644 --- a/cpp/src/labeling/label_bins.cu +++ b/cpp/src/labeling/label_bins.cu @@ -146,9 +146,9 @@ std::unique_ptr label_bins(column_view const& input, left_begin, left_end, right_begin)); } - const auto mask_and_count = valid_if(output_begin, output_end, filter_null_sentinel()); + auto mask_and_count = valid_if(output_begin, output_end, filter_null_sentinel(), stream, mr); - output->set_null_mask(mask_and_count.first, mask_and_count.second); + output->set_null_mask(std::move(mask_and_count.first), mask_and_count.second); return output; } diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu index 4bef312b396..29108a108fe 100644 --- a/cpp/src/lists/combine/concatenate_list_elements.cu +++ b/cpp/src/lists/combine/concatenate_list_elements.cu @@ -70,7 +70,9 @@ std::unique_ptr concatenate_lists_ignore_null(column_view const& input, // The child column of the output lists column is just copied from the input column. auto out_entries = std::make_unique( - lists_column_view(lists_column_view(input).get_sliced_child(stream)).get_sliced_child(stream)); + lists_column_view(lists_column_view(input).get_sliced_child(stream)).get_sliced_child(stream), + stream, + mr); auto [null_mask, null_count] = [&] { if (!build_null_mask) diff --git a/cpp/src/lists/copying/copying.cu b/cpp/src/lists/copying/copying.cu index d4a3d5555a6..e9d183bc073 100644 --- a/cpp/src/lists/copying/copying.cu +++ b/cpp/src/lists/copying/copying.cu @@ -84,7 +84,9 @@ std::unique_ptr copy_slice(lists_column_view const& lists, std::move(offsets), std::move(child), cudf::UNKNOWN_NULL_COUNT, - std::move(null_mask)); + std::move(null_mask), + stream, + mr); } } // namespace detail diff --git a/cpp/src/lists/copying/gather.cu b/cpp/src/lists/copying/gather.cu index 60cf4027621..fe45cdfc338 100644 --- a/cpp/src/lists/copying/gather.cu +++ b/cpp/src/lists/copying/gather.cu @@ -176,7 +176,9 @@ std::unique_ptr gather_list_nested(cudf::lists_column_view const& list, std::move(child_gd.offsets), std::move(child), null_count, - std::move(null_mask)); + std::move(null_mask), + stream, + mr); } // it's a leaf. do a regular gather @@ -187,7 +189,9 @@ std::unique_ptr gather_list_nested(cudf::lists_column_view const& list, std::move(child_gd.offsets), std::move(child), null_count, - std::move(null_mask)); + std::move(null_mask), + stream, + mr); } } // namespace detail diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu index 8cbcddc1c58..8b9c80db8ba 100644 --- a/cpp/src/lists/copying/segmented_gather.cu +++ b/cpp/src/lists/copying/segmented_gather.cu @@ -103,7 +103,9 @@ std::unique_ptr segmented_gather(lists_column_view const& value_column, std::move(output_offset), std::move(child), null_count, - std::move(null_mask)); + std::move(null_mask), + stream, + mr); } } // namespace detail diff --git a/cpp/src/lists/drop_list_duplicates.cu b/cpp/src/lists/drop_list_duplicates.cu index 527e834c76c..b86e028192e 100644 --- a/cpp/src/lists/drop_list_duplicates.cu +++ b/cpp/src/lists/drop_list_duplicates.cu @@ -634,17 +634,21 @@ std::pair, std::unique_ptr> drop_list_duplicates // If the values lists column is not given, its corresponding output will be nullptr. auto out_values = values ? make_lists_column(keys.size(), - std::make_unique(output_offsets->view()), + std::make_unique(output_offsets->view(), stream, mr), std::move(unique_entries_and_list_indices[1]), values.value().null_count(), - cudf::detail::copy_bitmask(values.value().parent(), stream, mr)) + cudf::detail::copy_bitmask(values.value().parent(), stream, mr), + stream, + mr) : nullptr; auto out_keys = make_lists_column(keys.size(), std::move(output_offsets), std::move(unique_entries_and_list_indices[0]), keys.null_count(), - cudf::detail::copy_bitmask(keys.parent(), stream, mr)); + cudf::detail::copy_bitmask(keys.parent(), stream, mr), + stream, + mr); return std::pair{std::move(out_keys), std::move(out_values)}; } diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index 088db226c24..b7e2b73329a 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -266,7 +266,9 @@ std::unique_ptr sort_lists(lists_column_view const& input, std::move(output_offset), std::move(output_child), input.null_count(), - std::move(null_mask)); + std::move(null_mask), + stream, + mr); } std::unique_ptr stable_sort_lists(lists_column_view const& input, @@ -300,7 +302,9 @@ std::unique_ptr stable_sort_lists(lists_column_view const& input, std::move(output_offset), std::move(sorted_child_table->release().front()), input.null_count(), - cudf::detail::copy_bitmask(input.parent(), stream, mr)); + cudf::detail::copy_bitmask(input.parent(), stream, mr), + stream, + mr); } } // namespace detail diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 9a456224635..05bd195e764 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -499,7 +499,9 @@ table_ptr_type merge(std::vector const& tables_to_merge, }); // If there is only one non-empty table_view, return its copy - if (merge_queue.size() == 1) { return std::make_unique(merge_queue.top().view); } + if (merge_queue.size() == 1) { + return std::make_unique(merge_queue.top().view, stream, mr); + } // No inputs have rows, return a table with same columns as the first one if (merge_queue.empty()) { return empty_like(first_table); } diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 645730fdb2f..dbb66ec295c 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -82,12 +82,12 @@ struct quantile_functor { auto d_input = column_device_view::create(input, stream); auto d_output = mutable_column_device_view::create(output->mutable_view(), stream); - auto q_device = cudf::detail::make_device_uvector_sync(q); + auto q_device = cudf::detail::make_device_uvector_sync(q, stream); if (!cudf::is_dictionary(input.type())) { auto sorted_data = thrust::make_permutation_iterator(input.data(), ordered_indices); - thrust::transform(rmm::exec_policy(), + thrust::transform(rmm::exec_policy(stream), q_device.begin(), q_device.end(), d_output->template begin(), @@ -97,7 +97,7 @@ struct quantile_functor { } else { auto sorted_data = thrust::make_permutation_iterator( dictionary::detail::make_dictionary_iterator(*d_input), ordered_indices); - thrust::transform(rmm::exec_policy(), + thrust::transform(rmm::exec_policy(stream), q_device.begin(), q_device.end(), d_output->template begin(), diff --git a/cpp/src/quantiles/tdigest/tdigest.cu b/cpp/src/quantiles/tdigest/tdigest.cu index a9950cced57..fa9d8435341 100644 --- a/cpp/src/quantiles/tdigest/tdigest.cu +++ b/cpp/src/quantiles/tdigest/tdigest.cu @@ -212,7 +212,9 @@ std::unique_ptr compute_approx_percentiles(tdigest_column_view const& in thrust::make_counting_iterator(0) + num_output_values, [percentiles = *percentiles_cdv] __device__(size_type i) { return percentiles.is_valid(i % percentiles.size()); - }) + }, + stream, + mr) : std::pair{rmm::device_buffer{}, 0}; }(); @@ -263,8 +265,8 @@ std::unique_ptr make_tdigest_column(size_type num_rows, cudf::make_structs_column(centroids_size, std::move(inner_children), 0, {}, stream, mr); // grouped into lists - auto tdigest = - cudf::make_lists_column(num_rows, std::move(tdigest_offsets), std::move(tdigest_data), 0, {}); + auto tdigest = cudf::make_lists_column( + num_rows, std::move(tdigest_offsets), std::move(tdigest_data), 0, {}, stream, mr); // create the final column std::vector> children; @@ -334,7 +336,9 @@ std::unique_ptr percentile_approx(tdigest_column_view const& input, cudf::make_empty_column(type_id::FLOAT64), input.size(), cudf::detail::create_null_mask( - input.size(), mask_state::ALL_NULL, rmm::cuda_stream_view(stream), mr)); + input.size(), mask_state::ALL_NULL, rmm::cuda_stream_view(stream), mr), + stream, + mr); } // if any of the input digests are empty, nullify the corresponding output rows (values will be diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index 7dd54e9250a..f0940de2dfc 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -122,7 +122,7 @@ std::unique_ptr fixed_point_reduction(column_view const& col, }(); auto const val = static_cast*>(result.get()); - return cudf::make_fixed_point_scalar(val->value(stream), scale); + return cudf::make_fixed_point_scalar(val->value(stream), scale, stream, mr); } /** diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 36dd2dabd72..ef021ca8a35 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -246,7 +246,8 @@ std::unique_ptr round_with(column_view const& input, // if rounding to more precision than fixed_point is capable of, just need to rescale // note: decimal_places has the opposite sign of numeric::scale_type (therefore have to negate) - if (input.type().scale() > -decimal_places) return cudf::detail::cast(input, result_type); + if (input.type().scale() > -decimal_places) + return cudf::detail::cast(input, result_type, stream, mr); auto result = cudf::make_fixed_width_column( result_type, input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr); diff --git a/cpp/src/strings/split/split_record.cu b/cpp/src/strings/split/split_record.cu index a31716ad2a2..929d21a024c 100644 --- a/cpp/src/strings/split/split_record.cu +++ b/cpp/src/strings/split/split_record.cu @@ -255,7 +255,9 @@ std::unique_ptr split_record_fn(strings_column_view const& strings, std::move(offsets), std::move(strings_output), strings.null_count(), - copy_bitmask(strings.parent(), stream, mr)); + copy_bitmask(strings.parent(), stream, mr), + stream, + mr); } template diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index d4e2f48feba..43a32c8405a 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -430,7 +430,7 @@ std::tuple> superimpose_parent auto superimposed_columns = std::vector{}; auto superimposed_nullmasks = std::vector{}; for (auto col : table) { - auto [superimposed_col, null_masks] = superimpose_parent_nulls(col); + auto [superimposed_col, null_masks] = superimpose_parent_nulls(col, stream, mr); superimposed_columns.push_back(superimposed_col); superimposed_nullmasks.insert(superimposed_nullmasks.begin(), std::make_move_iterator(null_masks.begin()), diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index e852b00796a..7fa45e04e02 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -290,7 +290,8 @@ struct dispatch_unary_cast_to { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (input.type() == type) return std::make_unique(input); // TODO add test for this + if (input.type() == type) + return std::make_unique(input, stream, mr); // TODO add test for this return detail::rescale(input, numeric::scale_type{type.scale()}, stream, mr); } From 7dd9bed5582bff74d45f6cb76c21b130380c671d Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Tue, 7 Dec 2021 01:20:57 +0530 Subject: [PATCH 6/7] add missing stream, mr after merge upstream --- cpp/src/reductions/scan/scan_inclusive.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 2b1ac8aa704..5c2b686fd9c 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -212,7 +212,9 @@ struct scan_functor { return make_structs_column(input.size(), std::move(scanned_children), UNKNOWN_NULL_COUNT, - rmm::device_buffer{0, stream, mr}); + rmm::device_buffer{0, stream, mr}, + stream, + mr); } }; From c5353755ee01e2e3c1e93b837e7e62abd8d16cc3 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 8 Dec 2021 23:38:10 +0530 Subject: [PATCH 7/7] review comments --- cpp/src/unary/cast_ops.cu | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index f978e314f08..f41ebacce53 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -177,13 +177,14 @@ std::unique_ptr rescale(column_view input, using namespace numeric; if (input.type().scale() >= scale) { - auto const scalar = make_fixed_point_scalar(0, scale_type{scale}); + auto const scalar = make_fixed_point_scalar(0, scale_type{scale}, rmm::cuda_stream_default); auto const type = cudf::data_type{cudf::type_to_id(), scale}; return detail::binary_operation(input, *scalar, binary_operator::ADD, type, stream, mr); } else { - auto const diff = input.type().scale() - scale; - auto const scalar = make_fixed_point_scalar(std::pow(10, -diff), scale_type{diff}); - auto const type = cudf::data_type{cudf::type_to_id(), scale}; + auto const diff = input.type().scale() - scale; + auto const scalar = + make_fixed_point_scalar(std::pow(10, -diff), scale_type{diff}, rmm::cuda_stream_default); + auto const type = cudf::data_type{cudf::type_to_id(), scale}; return detail::binary_operation(input, *scalar, binary_operator::DIV, type, stream, mr); } }; @@ -290,8 +291,9 @@ struct dispatch_unary_cast_to { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (input.type() == type) + if (input.type() == type) { return std::make_unique(input, stream, mr); // TODO add test for this + } return detail::rescale(input, numeric::scale_type{type.scale()}, stream, mr); }