diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 55173fd409f..10af2ffb614 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -303,7 +303,7 @@ std::unique_ptr copy_if_else(Left const& lhs, if (boolean_mask.is_empty()) { return cudf::empty_like(lhs); } - auto bool_mask_device_p = column_device_view::create(boolean_mask); + auto bool_mask_device_p = column_device_view::create(boolean_mask, stream); column_device_view bool_mask_device = *bool_mask_device_p; auto const has_nulls = boolean_mask.has_nulls(); diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 7c1d78e2f98..06ef42e4a08 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -269,7 +269,7 @@ struct column_scalar_scatterer_impl { // Null mask pushdown inside factory method return make_structs_column( - target.size(), std::move(fields), null_count, std::move(*contents.null_mask)); + target.size(), std::move(fields), null_count, std::move(*contents.null_mask), stream, mr); } }; diff --git a/cpp/src/copying/segmented_shift.cu b/cpp/src/copying/segmented_shift.cu index b08eaa0862c..6d3a005add0 100644 --- a/cpp/src/copying/segmented_shift.cu +++ b/cpp/src/copying/segmented_shift.cu @@ -145,7 +145,7 @@ std::unique_ptr segmented_shift(column_view const& segmented_values, rmm::mr::device_memory_resource* mr) { if (segmented_values.is_empty()) { return empty_like(segmented_values); } - if (offset == 0) { return std::make_unique(segmented_values); }; + if (offset == 0) { return std::make_unique(segmented_values, stream, mr); }; return type_dispatcher(segmented_values.type(), segmented_shift_functor_forwarder{}, diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index 0b88545ffa5..dacc1d07447 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -105,10 +105,10 @@ struct shift_functor { using ScalarType = cudf::scalar_type_t; auto& scalar = static_cast(fill_value); - auto device_input = column_device_view::create(input); + auto device_input = column_device_view::create(input, stream); auto output = detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream, mr); - auto device_output = mutable_column_device_view::create(*output); + auto device_output = mutable_column_device_view::create(*output, stream); auto const scalar_is_valid = scalar.is_valid(stream); diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index d17c698f91a..50f750e6416 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -49,7 +49,7 @@ void in_place_fill(cudf::mutable_column_view& destination, using ScalarType = cudf::scalar_type_t; auto p_scalar = static_cast(&value); T fill_value = p_scalar->value(stream); - bool is_valid = p_scalar->is_valid(); + bool is_valid = p_scalar->is_valid(stream); cudf::detail::copy_range(thrust::make_constant_iterator(fill_value), thrust::make_constant_iterator(is_valid), destination, diff --git a/cpp/src/groupby/common/utils.hpp b/cpp/src/groupby/common/utils.hpp index e3611eb0e4b..09b85c74f08 100644 --- a/cpp/src/groupby/common/utils.hpp +++ b/cpp/src/groupby/common/utils.hpp @@ -29,7 +29,9 @@ namespace detail { template inline std::vector extract_results(host_span requests, - cudf::detail::result_cache& cache) + cudf::detail::result_cache& cache, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { std::vector results(requests.size()); std::unordered_map>, @@ -45,7 +47,7 @@ inline std::vector extract_results(host_span(it->second)); + results[i].results.emplace_back(std::make_unique(it->second, stream, mr)); } else { CUDF_FAIL("Cannot extract result from the cache"); } diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 0c3e79ea36c..1474cfd8a19 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -308,13 +308,13 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final column_view sum_result = sparse_results->get_result(col, *sum_agg); column_view count_result = sparse_results->get_result(col, *count_agg); - auto values_view = column_device_view::create(col); - auto sum_view = column_device_view::create(sum_result); - auto count_view = column_device_view::create(count_result); + auto values_view = column_device_view::create(col, stream); + auto sum_view = column_device_view::create(sum_result, stream); + auto count_view = column_device_view::create(count_result, stream); auto var_result = make_fixed_width_column( cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream); - auto var_result_view = mutable_column_device_view::create(var_result->mutable_view()); + auto var_result_view = mutable_column_device_view::create(var_result->mutable_view(), stream); mutable_table_view var_table_view{{var_result->mutable_view()}}; cudf::detail::initialize_with_identity(var_table_view, {agg.kind}, stream); @@ -668,7 +668,7 @@ std::pair, std::vector> groupby( std::unique_ptr unique_keys = groupby(keys, requests, &cache, has_nulls(keys), include_null_keys, stream, mr); - return std::make_pair(std::move(unique_keys), extract_results(requests, cache)); + return std::make_pair(std::move(unique_keys), extract_results(requests, cache, stream, mr)); } } // namespace hash } // namespace detail diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index d68b701d75f..b3624282c24 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -775,7 +775,7 @@ std::pair, std::vector> groupby::sort } } - auto results = detail::extract_results(requests, cache); + auto results = detail::extract_results(requests, cache, stream, mr); return std::make_pair(helper().unique_keys(stream, mr), std::move(results)); } diff --git a/cpp/src/groupby/sort/group_count.cu b/cpp/src/groupby/sort/group_count.cu index 121e4bb889d..6a2ff994b8b 100644 --- a/cpp/src/groupby/sort/group_count.cu +++ b/cpp/src/groupby/sort/group_count.cu @@ -45,7 +45,7 @@ std::unique_ptr group_count_valid(column_view const& values, if (num_groups == 0) { return result; } if (values.nullable()) { - auto values_view = column_device_view::create(values); + auto values_view = column_device_view::create(values, stream); // make_validity_iterator returns a boolean iterator that sums to 1 (1+1=1) // so we need to transform it to cast it to an integer type diff --git a/cpp/src/groupby/sort/group_nth_element.cu b/cpp/src/groupby/sort/group_nth_element.cu index f2c57abf54e..c1fc58beb80 100644 --- a/cpp/src/groupby/sort/group_nth_element.cu +++ b/cpp/src/groupby/sort/group_nth_element.cu @@ -72,7 +72,7 @@ std::unique_ptr group_nth_element(column_view const& values, }); } else { // skip nulls (equivalent to pandas nth(dropna='any')) // Returns index of nth value. - auto values_view = column_device_view::create(values); + auto values_view = column_device_view::create(values, stream); auto bitmask_iterator = thrust::make_transform_iterator(cudf::detail::make_validity_iterator(*values_view), [] __device__(auto b) { return static_cast(b); }); diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index 08f65536466..2efe14f70ca 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -259,7 +259,9 @@ struct group_scan_functor, std::vector> groupby::sort } } - auto results = detail::extract_results(requests, cache); + auto results = detail::extract_results(requests, cache, stream, mr); return std::make_pair(helper().sorted_keys(stream, mr), std::move(results)); } diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu index 59095fef85e..edd3ce2ed07 100644 --- a/cpp/src/interop/from_arrow.cu +++ b/cpp/src/interop/from_arrow.cu @@ -287,11 +287,14 @@ std::unique_ptr dispatch_to_cudf_column::operator()( UNKNOWN_NULL_COUNT, std::move(*get_mask_buffer(array, stream, mr))); - return num_rows == array.length() ? std::move(out_col) - : std::make_unique(cudf::detail::slice( - out_col->view(), - static_cast(array.offset()), - static_cast(array.offset() + array.length()))); + return num_rows == array.length() + ? std::move(out_col) + : std::make_unique( + cudf::detail::slice(out_col->view(), + static_cast(array.offset()), + static_cast(array.offset() + array.length())), + stream, + mr); } template <> @@ -383,11 +386,14 @@ std::unique_ptr dispatch_to_cudf_column::operator()( stream, mr); - return num_rows == array.length() ? std::move(out_col) - : std::make_unique(cudf::detail::slice( - out_col->view(), - static_cast(array.offset()), - static_cast(array.offset() + array.length()))); + return num_rows == array.length() + ? std::move(out_col) + : std::make_unique( + cudf::detail::slice(out_col->view(), + static_cast(array.offset()), + static_cast(array.offset() + array.length())), + stream, + mr); } std::unique_ptr get_column(arrow::Array const& array, diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index 3271804bf39..e6db5807dde 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -210,7 +210,7 @@ std::shared_ptr dispatch_to_arrow::operator()( std::unique_ptr tmp_column = ((input.offset() != 0) or ((input.num_children() == 2) and (input.child(0).size() - 1 != input.size()))) - ? std::make_unique(input) + ? std::make_unique(input, stream) : nullptr; column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input; @@ -245,7 +245,7 @@ std::shared_ptr dispatch_to_arrow::operator()( "Number of field names and number of children doesn't match\n"); std::unique_ptr tmp_column = nullptr; - if (input.offset() != 0) { tmp_column = std::make_unique(input); } + if (input.offset() != 0) { tmp_column = std::make_unique(input, stream); } column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input; auto child_arrays = fetch_child_array(input_view, metadata.children_meta, ar_mr, stream); @@ -280,7 +280,7 @@ std::shared_ptr dispatch_to_arrow::operator()( std::unique_ptr tmp_column = nullptr; if ((input.offset() != 0) or ((input.num_children() == 2) and (input.child(0).size() - 1 != input.size()))) { - tmp_column = std::make_unique(input); + tmp_column = std::make_unique(input, stream); } column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input; diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index 176da2f5cf7..d908e6c8ed5 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -278,7 +278,7 @@ rmm::device_buffer decompress_data(datasource& source, 0); uncompressed_data_offsets.host_to_device(stream); - thrust::tabulate(rmm::exec_policy(), + thrust::tabulate(rmm::exec_policy(stream), uncompressed_data_ptrs.begin(), uncompressed_data_ptrs.end(), [off = uncompressed_data_offsets.device_ptr(), diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index d96bf93d10f..85a5f7e2281 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -75,8 +75,8 @@ class cufile_shim { ~cufile_shim() { - if (driver_close) driver_close(); - if (cf_lib) dlclose(cf_lib); + if (driver_close != nullptr) driver_close(); + if (cf_lib != nullptr) dlclose(cf_lib); } decltype(cuFileHandleRegister)* handle_register = nullptr; @@ -117,7 +117,8 @@ void cufile_shim::modify_cufile_json() const void cufile_shim::load_cufile_lib() { - cf_lib = dlopen("libcufile.so", RTLD_NOW); + cf_lib = dlopen("libcufile.so", RTLD_LAZY | RTLD_LOCAL | RTLD_NODELETE); + CUDF_EXPECTS(cf_lib != nullptr, "Failed to load cuFile library"); driver_open = reinterpret_cast(dlsym(cf_lib, "cuFileDriverOpen")); CUDF_EXPECTS(driver_open != nullptr, "could not find cuFile cuFileDriverOpen symbol"); driver_close = reinterpret_cast(dlsym(cf_lib, "cuFileDriverClose")); diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu index 5007c3028ad..774027ed322 100644 --- a/cpp/src/labeling/label_bins.cu +++ b/cpp/src/labeling/label_bins.cu @@ -146,9 +146,9 @@ std::unique_ptr label_bins(column_view const& input, left_begin, left_end, right_begin)); } - const auto mask_and_count = valid_if(output_begin, output_end, filter_null_sentinel()); + auto mask_and_count = valid_if(output_begin, output_end, filter_null_sentinel(), stream, mr); - output->set_null_mask(mask_and_count.first, mask_and_count.second); + output->set_null_mask(std::move(mask_and_count.first), mask_and_count.second); return output; } diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu index 2ddede97ce4..240543db7bb 100644 --- a/cpp/src/lists/combine/concatenate_list_elements.cu +++ b/cpp/src/lists/combine/concatenate_list_elements.cu @@ -70,7 +70,9 @@ std::unique_ptr concatenate_lists_ignore_null(column_view const& input, // The child column of the output lists column is just copied from the input column. auto out_entries = std::make_unique( - lists_column_view(lists_column_view(input).get_sliced_child(stream)).get_sliced_child(stream)); + lists_column_view(lists_column_view(input).get_sliced_child(stream)).get_sliced_child(stream), + stream, + mr); auto [null_mask, null_count] = [&] { if (!build_null_mask) diff --git a/cpp/src/lists/copying/copying.cu b/cpp/src/lists/copying/copying.cu index d4a3d5555a6..e9d183bc073 100644 --- a/cpp/src/lists/copying/copying.cu +++ b/cpp/src/lists/copying/copying.cu @@ -84,7 +84,9 @@ std::unique_ptr copy_slice(lists_column_view const& lists, std::move(offsets), std::move(child), cudf::UNKNOWN_NULL_COUNT, - std::move(null_mask)); + std::move(null_mask), + stream, + mr); } } // namespace detail diff --git a/cpp/src/lists/copying/gather.cu b/cpp/src/lists/copying/gather.cu index 60cf4027621..fe45cdfc338 100644 --- a/cpp/src/lists/copying/gather.cu +++ b/cpp/src/lists/copying/gather.cu @@ -176,7 +176,9 @@ std::unique_ptr gather_list_nested(cudf::lists_column_view const& list, std::move(child_gd.offsets), std::move(child), null_count, - std::move(null_mask)); + std::move(null_mask), + stream, + mr); } // it's a leaf. do a regular gather @@ -187,7 +189,9 @@ std::unique_ptr gather_list_nested(cudf::lists_column_view const& list, std::move(child_gd.offsets), std::move(child), null_count, - std::move(null_mask)); + std::move(null_mask), + stream, + mr); } } // namespace detail diff --git a/cpp/src/lists/drop_list_duplicates.cu b/cpp/src/lists/drop_list_duplicates.cu index 527e834c76c..b86e028192e 100644 --- a/cpp/src/lists/drop_list_duplicates.cu +++ b/cpp/src/lists/drop_list_duplicates.cu @@ -634,17 +634,21 @@ std::pair, std::unique_ptr> drop_list_duplicates // If the values lists column is not given, its corresponding output will be nullptr. auto out_values = values ? make_lists_column(keys.size(), - std::make_unique(output_offsets->view()), + std::make_unique(output_offsets->view(), stream, mr), std::move(unique_entries_and_list_indices[1]), values.value().null_count(), - cudf::detail::copy_bitmask(values.value().parent(), stream, mr)) + cudf::detail::copy_bitmask(values.value().parent(), stream, mr), + stream, + mr) : nullptr; auto out_keys = make_lists_column(keys.size(), std::move(output_offsets), std::move(unique_entries_and_list_indices[0]), keys.null_count(), - cudf::detail::copy_bitmask(keys.parent(), stream, mr)); + cudf::detail::copy_bitmask(keys.parent(), stream, mr), + stream, + mr); return std::pair{std::move(out_keys), std::move(out_values)}; } diff --git a/cpp/src/lists/interleave_columns.cu b/cpp/src/lists/interleave_columns.cu index 220cb25a942..913f2771a0e 100644 --- a/cpp/src/lists/interleave_columns.cu +++ b/cpp/src/lists/interleave_columns.cu @@ -50,7 +50,7 @@ generate_list_offsets_and_validities(table_view const& input, auto const num_cols = input.num_columns(); auto const num_rows = input.num_rows(); auto const num_output_lists = num_rows * num_cols; - auto const table_dv_ptr = table_device_view::create(input); + auto const table_dv_ptr = table_device_view::create(input, stream); // The output offsets column. static_assert(sizeof(offset_type) == sizeof(int32_t)); @@ -217,7 +217,7 @@ struct interleave_list_entries_impl(), data_has_null_mask}; @@ -250,7 +250,7 @@ struct interleave_list_entries_impl( rmm::mr::device_memory_resource* mr) const noexcept { auto const num_cols = input.num_columns(); - auto const table_dv_ptr = table_device_view::create(input); + auto const table_dv_ptr = table_device_view::create(input, stream); // The output child column. auto output = allocate_like(lists_column_view(*input.begin()).child(), @@ -258,7 +258,7 @@ struct interleave_list_entries_impl( mask_allocation_policy::NEVER, stream, mr); - auto output_dv_ptr = mutable_column_device_view::create(*output); + auto output_dv_ptr = mutable_column_device_view::create(*output, stream); // The array of int8_t to store entry validities. auto validities = diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index 088db226c24..b7e2b73329a 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -266,7 +266,9 @@ std::unique_ptr sort_lists(lists_column_view const& input, std::move(output_offset), std::move(output_child), input.null_count(), - std::move(null_mask)); + std::move(null_mask), + stream, + mr); } std::unique_ptr stable_sort_lists(lists_column_view const& input, @@ -300,7 +302,9 @@ std::unique_ptr stable_sort_lists(lists_column_view const& input, std::move(output_offset), std::move(sorted_child_table->release().front()), input.null_count(), - cudf::detail::copy_bitmask(input.parent(), stream, mr)); + cudf::detail::copy_bitmask(input.parent(), stream, mr), + stream, + mr); } } // namespace detail diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 9a456224635..05bd195e764 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -499,7 +499,9 @@ table_ptr_type merge(std::vector const& tables_to_merge, }); // If there is only one non-empty table_view, return its copy - if (merge_queue.size() == 1) { return std::make_unique(merge_queue.top().view); } + if (merge_queue.size() == 1) { + return std::make_unique(merge_queue.top().view, stream, mr); + } // No inputs have rows, return a table with same columns as the first one if (merge_queue.empty()) { return empty_like(first_table); } diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 073b318b879..dbb66ec295c 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -80,14 +80,14 @@ struct quantile_functor { } auto d_input = column_device_view::create(input, stream); - auto d_output = mutable_column_device_view::create(output->mutable_view()); + auto d_output = mutable_column_device_view::create(output->mutable_view(), stream); - auto q_device = cudf::detail::make_device_uvector_sync(q); + auto q_device = cudf::detail::make_device_uvector_sync(q, stream); if (!cudf::is_dictionary(input.type())) { auto sorted_data = thrust::make_permutation_iterator(input.data(), ordered_indices); - thrust::transform(rmm::exec_policy(), + thrust::transform(rmm::exec_policy(stream), q_device.begin(), q_device.end(), d_output->template begin(), @@ -97,7 +97,7 @@ struct quantile_functor { } else { auto sorted_data = thrust::make_permutation_iterator( dictionary::detail::make_dictionary_iterator(*d_input), ordered_indices); - thrust::transform(rmm::exec_policy(), + thrust::transform(rmm::exec_policy(stream), q_device.begin(), q_device.end(), d_output->template begin(), diff --git a/cpp/src/quantiles/tdigest/tdigest.cu b/cpp/src/quantiles/tdigest/tdigest.cu index 18e7d02d086..fbdd2e9896b 100644 --- a/cpp/src/quantiles/tdigest/tdigest.cu +++ b/cpp/src/quantiles/tdigest/tdigest.cu @@ -199,7 +199,7 @@ std::unique_ptr compute_approx_percentiles(tdigest_column_view const& in weight.begin(), cumulative_weights->mutable_view().begin()); - auto percentiles_cdv = column_device_view::create(percentiles); + auto percentiles_cdv = column_device_view::create(percentiles, stream); // leaf is a column of size input.size() * percentiles.size() auto const num_output_values = input.size() * percentiles.size(); @@ -212,7 +212,9 @@ std::unique_ptr compute_approx_percentiles(tdigest_column_view const& in thrust::make_counting_iterator(0) + num_output_values, [percentiles = *percentiles_cdv] __device__(size_type i) { return percentiles.is_valid(i % percentiles.size()); - }) + }, + stream, + mr) : std::pair{rmm::device_buffer{}, 0}; }(); @@ -263,8 +265,8 @@ std::unique_ptr make_tdigest_column(size_type num_rows, cudf::make_structs_column(centroids_size, std::move(inner_children), 0, {}, stream, mr); // grouped into lists - auto tdigest = - cudf::make_lists_column(num_rows, std::move(tdigest_offsets), std::move(tdigest_data), 0, {}); + auto tdigest = cudf::make_lists_column( + num_rows, std::move(tdigest_offsets), std::move(tdigest_data), 0, {}, stream, mr); // create the final column std::vector> children; @@ -334,7 +336,9 @@ std::unique_ptr percentile_approx(tdigest_column_view const& input, cudf::make_empty_column(type_id::FLOAT64), input.size(), cudf::detail::create_null_mask( - input.size(), mask_state::ALL_NULL, rmm::cuda_stream_view(stream), mr)); + input.size(), mask_state::ALL_NULL, rmm::cuda_stream_view(stream), mr), + stream, + mr); } // if any of the input digests are empty, nullify the corresponding output rows (values will be diff --git a/cpp/src/reductions/nth_element.cu b/cpp/src/reductions/nth_element.cu index 001ee8c791f..2b8066a57ee 100644 --- a/cpp/src/reductions/nth_element.cu +++ b/cpp/src/reductions/nth_element.cu @@ -38,7 +38,7 @@ std::unique_ptr cudf::reduction::nth_element(column_view const& co auto valid_count = col.size() - col.null_count(); n = wrap_n(valid_count); CUDF_EXPECTS(n >= 0 and n < valid_count, "Index out of bounds"); - auto dcol = column_device_view::create(col); + auto dcol = column_device_view::create(col, stream); auto bitmask_iterator = thrust::make_transform_iterator(cudf::detail::make_validity_iterator(*dcol), [] __device__(auto b) { return static_cast(b); }); diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 2b1ac8aa704..5c2b686fd9c 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -212,7 +212,9 @@ struct scan_functor { return make_structs_column(input.size(), std::move(scanned_children), UNKNOWN_NULL_COUNT, - rmm::device_buffer{0, stream, mr}); + rmm::device_buffer{0, stream, mr}, + stream, + mr); } }; diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index e5633341ffa..642531434ae 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -122,7 +122,7 @@ std::unique_ptr fixed_point_reduction(column_view const& col, }(); auto const val = static_cast*>(result.get()); - return cudf::make_fixed_point_scalar(val->value(stream), scale); + return cudf::make_fixed_point_scalar(val->value(stream), scale, stream, mr); } /** diff --git a/cpp/src/replace/nans.cu b/cpp/src/replace/nans.cu index 7b47f8df28d..537b2f3d092 100644 --- a/cpp/src/replace/nans.cu +++ b/cpp/src/replace/nans.cu @@ -48,7 +48,7 @@ struct replace_nans_functor { if (input.is_empty()) { return cudf::make_empty_column(input.type()); } - auto input_device_view = column_device_view::create(input); + auto input_device_view = column_device_view::create(input, stream); size_type size = input.size(); auto predicate = [dinput = *input_device_view] __device__(auto i) { @@ -89,7 +89,7 @@ std::unique_ptr replace_nans(column_view const& input, return type_dispatcher(input.type(), replace_nans_functor{}, input, - *column_device_view::create(replacement), + *column_device_view::create(replacement, stream), replacement.nullable(), stream, mr); @@ -180,10 +180,10 @@ void normalize_nans_and_zeros(mutable_column_view in_out, rmm::cuda_stream_view column_view input = in_out; // to device. unique_ptr which gets automatically cleaned up when we leave - auto device_in = column_device_view::create(input); + auto device_in = column_device_view::create(input, stream); // from device. unique_ptr which gets automatically cleaned up when we leave. - auto device_out = mutable_column_device_view::create(in_out); + auto device_out = mutable_column_device_view::create(in_out, stream); // invoke the actual kernel. cudf::type_dispatcher( diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index d12f18f4827..93bc6cf5ae5 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -180,9 +180,9 @@ struct replace_nulls_column_kernel_forwarder { auto replace = replace_nulls; if (output_view.nullable()) replace = replace_nulls; - auto device_in = cudf::column_device_view::create(input); - auto device_out = cudf::mutable_column_device_view::create(output_view); - auto device_replacement = cudf::column_device_view::create(replacement); + auto device_in = cudf::column_device_view::create(input, stream); + auto device_out = cudf::mutable_column_device_view::create(output_view, stream); + auto device_replacement = cudf::column_device_view::create(replacement, stream); rmm::device_scalar valid_counter(0, stream); cudf::size_type* valid_count = valid_counter.data(); @@ -311,7 +311,7 @@ struct replace_nulls_scalar_kernel_forwarder { using ScalarType = cudf::scalar_type_t; auto& s1 = static_cast(replacement); - auto device_in = cudf::column_device_view::create(input); + auto device_in = cudf::column_device_view::create(input, stream); auto func = replace_nulls_functor{s1.data()}; thrust::transform(rmm::exec_policy(stream), @@ -366,7 +366,7 @@ std::unique_ptr replace_nulls_policy_impl(cudf::column_view const& rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto device_in = cudf::column_device_view::create(input); + auto device_in = cudf::column_device_view::create(input, stream); auto index = thrust::make_counting_iterator(0); auto valid_it = cudf::detail::make_validity_iterator(*device_in); auto in_begin = thrust::make_zip_iterator(thrust::make_tuple(index, valid_it)); diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index a9c7db048b3..3505fe1f5d7 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -323,10 +323,10 @@ struct replace_kernel_forwarder { auto output_view = output->mutable_view(); auto grid = cudf::detail::grid_1d{output_view.size(), BLOCK_SIZE, 1}; - auto device_in = cudf::column_device_view::create(input_col); - auto device_out = cudf::mutable_column_device_view::create(output_view); - auto device_values_to_replace = cudf::column_device_view::create(values_to_replace); - auto device_replacement_values = cudf::column_device_view::create(replacement_values); + auto device_in = cudf::column_device_view::create(input_col, stream); + auto device_out = cudf::mutable_column_device_view::create(output_view, stream); + auto device_values_to_replace = cudf::column_device_view::create(values_to_replace, stream); + auto device_replacement_values = cudf::column_device_view::create(replacement_values, stream); replace<<>>(*device_in, *device_out, @@ -412,7 +412,7 @@ std::unique_ptr replace_kernel_forwarder::operator() offsets = cudf::strings::detail::make_offsets_child_column( sizes_view.begin(), sizes_view.end(), stream, mr); auto offsets_view = offsets->mutable_view(); - auto device_offsets = cudf::mutable_column_device_view::create(offsets_view); + auto device_offsets = cudf::mutable_column_device_view::create(offsets_view, stream); auto const bytes = cudf::detail::get_value(offsets_view, offsets_view.size() - 1, stream); @@ -422,7 +422,7 @@ std::unique_ptr replace_kernel_forwarder::operator()mutable_view(); - auto device_chars = cudf::mutable_column_device_view::create(output_chars_view); + auto device_chars = cudf::mutable_column_device_view::create(output_chars_view, stream); replace_second<<>>( *device_in, *device_replacement, *device_offsets, *device_chars, *device_indices); diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index e97c9f8109c..0e3ead3fd99 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -111,7 +111,7 @@ struct interleave_columns_impl(0); auto index_end = thrust::make_counting_iterator(output_size); diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 36dd2dabd72..ef021ca8a35 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -246,7 +246,8 @@ std::unique_ptr round_with(column_view const& input, // if rounding to more precision than fixed_point is capable of, just need to rescale // note: decimal_places has the opposite sign of numeric::scale_type (therefore have to negate) - if (input.type().scale() > -decimal_places) return cudf::detail::cast(input, result_type); + if (input.type().scale() > -decimal_places) + return cudf::detail::cast(input, result_type, stream, mr); auto result = cudf::make_fixed_width_column( result_type, input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr); diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 9a677d7907a..241b3c595f1 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -189,12 +189,12 @@ bool contains_scalar_dispatch::operator()(column_view const& // first, find the value in the dictionary's key set auto index = cudf::dictionary::detail::get_index(dict_col, value, stream); // if found, check the index is actually in the indices column - return index->is_valid() ? cudf::type_dispatcher(dict_col.indices().type(), - contains_scalar_dispatch{}, - dict_col.indices(), - *index, - stream) - : false; + return index->is_valid(stream) ? cudf::type_dispatcher(dict_col.indices().type(), + contains_scalar_dispatch{}, + dict_col.indices(), + *index, + stream) + : false; } } // namespace diff --git a/cpp/src/strings/split/split_record.cu b/cpp/src/strings/split/split_record.cu index a31716ad2a2..929d21a024c 100644 --- a/cpp/src/strings/split/split_record.cu +++ b/cpp/src/strings/split/split_record.cu @@ -255,7 +255,9 @@ std::unique_ptr split_record_fn(strings_column_view const& strings, std::move(offsets), std::move(strings_output), strings.null_count(), - copy_bitmask(strings.parent(), stream, mr)); + copy_bitmask(strings.parent(), stream, mr), + stream, + mr); } template diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index d4e2f48feba..43a32c8405a 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -430,7 +430,7 @@ std::tuple> superimpose_parent auto superimposed_columns = std::vector{}; auto superimposed_nullmasks = std::vector{}; for (auto col : table) { - auto [superimposed_col, null_masks] = superimpose_parent_nulls(col); + auto [superimposed_col, null_masks] = superimpose_parent_nulls(col, stream, mr); superimposed_columns.push_back(superimposed_col); superimposed_nullmasks.insert(superimposed_nullmasks.begin(), std::make_move_iterator(null_masks.begin()), diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index 131fde11cf8..f41ebacce53 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -177,13 +177,14 @@ std::unique_ptr rescale(column_view input, using namespace numeric; if (input.type().scale() >= scale) { - auto const scalar = make_fixed_point_scalar(0, scale_type{scale}); + auto const scalar = make_fixed_point_scalar(0, scale_type{scale}, rmm::cuda_stream_default); auto const type = cudf::data_type{cudf::type_to_id(), scale}; return detail::binary_operation(input, *scalar, binary_operator::ADD, type, stream, mr); } else { - auto const diff = input.type().scale() - scale; - auto const scalar = make_fixed_point_scalar(std::pow(10, -diff), scale_type{diff}); - auto const type = cudf::data_type{cudf::type_to_id(), scale}; + auto const diff = input.type().scale() - scale; + auto const scalar = + make_fixed_point_scalar(std::pow(10, -diff), scale_type{diff}, rmm::cuda_stream_default); + auto const type = cudf::data_type{cudf::type_to_id(), scale}; return detail::binary_operation(input, *scalar, binary_operator::DIV, type, stream, mr); } }; @@ -290,7 +291,9 @@ struct dispatch_unary_cast_to { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (input.type() == type) return std::make_unique(input); // TODO add test for this + if (input.type() == type) { + return std::make_unique(input, stream, mr); // TODO add test for this + } return detail::rescale(input, numeric::scale_type{type.scale()}, stream, mr); } diff --git a/cpp/src/unary/nan_ops.cu b/cpp/src/unary/nan_ops.cu index 1840aebf8f0..c6b9ecefe94 100644 --- a/cpp/src/unary/nan_ops.cu +++ b/cpp/src/unary/nan_ops.cu @@ -34,7 +34,7 @@ struct nan_dispatcher { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto input_device_view = column_device_view::create(input); + auto input_device_view = column_device_view::create(input, stream); if (input.has_nulls()) { auto input_pair_iterator = make_pair_iterator(*input_device_view);