Skip to content

Commit

Permalink
Replace remaining instances of rmm::cuda_stream_default with cudf::de…
Browse files Browse the repository at this point in the history
…fault_stream_value (#11082)

Closes #10864.
Also closes #9614.

This PR is a follow-up to #10877.  It replaces all of the remaining instances of `rmm::cuda_stream_default` with `cudf::default_stream_value`.

There are a lot of replacements and addition of includes, along with some reformatting due to clang-format, but like #10877, there should be no noticeable functional change here.

Authors:
  - Jim Brennan (https://github.com/jbrennan333)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)
  - https://github.com/nvdbaranec
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)

URL: #11082
  • Loading branch information
jbrennan333 authored Jun 22, 2022
1 parent b250ac7 commit f4f3428
Show file tree
Hide file tree
Showing 312 changed files with 1,430 additions and 1,116 deletions.
7 changes: 4 additions & 3 deletions cpp/benchmarks/column/concatenate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <cudf/concatenate.hpp>
#include <cudf/table/table.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/counting_iterator.h>
Expand All @@ -48,7 +49,7 @@ static void BM_concatenate(benchmark::State& state)
CUDF_CHECK_CUDA(0);

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cuda_event_timer raii(state, true, cudf::default_stream_value);
auto result = cudf::concatenate(column_views);
}

Expand Down Expand Up @@ -90,7 +91,7 @@ static void BM_concatenate_tables(benchmark::State& state)
CUDF_CHECK_CUDA(0);

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cuda_event_timer raii(state, true, cudf::default_stream_value);
auto result = cudf::concatenate(table_views);
}

Expand Down Expand Up @@ -149,7 +150,7 @@ static void BM_concatenate_strings(benchmark::State& state)
CUDF_CHECK_CUDA(0);

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cuda_event_timer raii(state, true, cudf::default_stream_value);
auto result = cudf::concatenate(column_views);
}

Expand Down
24 changes: 12 additions & 12 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/table/table.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>

Expand Down Expand Up @@ -206,7 +206,7 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_chrono<T>()>> {
} else {
// Don't need a random seconds generator for sub-second intervals
seconds_gen = [range_s](thrust::minstd_rand&, size_t size) {
rmm::device_uvector<int64_t> result(size, rmm::cuda_stream_default);
rmm::device_uvector<int64_t> result(size, cudf::default_stream_value);
thrust::fill(thrust::device, result.begin(), result.end(), range_s.second.count());
return result;
};
Expand All @@ -224,7 +224,7 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_chrono<T>()>> {
{
auto const sec = seconds_gen(engine, size);
auto const ns = nanoseconds_gen(engine, size);
rmm::device_uvector<T> result(size, rmm::cuda_stream_default);
rmm::device_uvector<T> result(size, cudf::default_stream_value);
thrust::transform(
thrust::device,
sec.begin(),
Expand Down Expand Up @@ -268,7 +268,7 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
scale = numeric::scale_type{scale_dist(engine_scale)};
}
auto const ints = dist(engine, size);
rmm::device_uvector<T> result(size, rmm::cuda_stream_default);
rmm::device_uvector<T> result(size, cudf::default_stream_value);
// Clamp the generated random value to the specified range
thrust::transform(thrust::device,
ints.begin(),
Expand Down Expand Up @@ -313,7 +313,7 @@ struct random_value_fn<T, typename std::enable_if_t<std::is_same_v<T, bool>>> {
random_value_fn(distribution_params<bool> const& desc)
: dist{[valid_prob = desc.probability_true](thrust::minstd_rand& engine,
size_t size) -> rmm::device_uvector<bool> {
rmm::device_uvector<bool> result(size, rmm::cuda_stream_default);
rmm::device_uvector<bool> result(size, cudf::default_stream_value);
thrust::tabulate(
thrust::device, result.begin(), result.end(), bool_generator(engine, valid_prob));
return result;
Expand Down Expand Up @@ -365,7 +365,7 @@ rmm::device_uvector<cudf::size_type> sample_indices_with_run_length(cudf::size_t
return samples_indices[sample_idx];
});
rmm::device_uvector<cudf::size_type> repeated_sample_indices(num_rows,
rmm::cuda_stream_default);
cudf::default_stream_value);
thrust::copy(thrust::device,
avg_repeated_sample_indices_iterator,
avg_repeated_sample_indices_iterator + num_rows,
Expand Down Expand Up @@ -403,8 +403,8 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,

// Distribution for picking elements from the array of samples
auto const avg_run_len = profile.get_avg_run_length();
rmm::device_uvector<T> data(0, rmm::cuda_stream_default);
rmm::device_uvector<bool> null_mask(0, rmm::cuda_stream_default);
rmm::device_uvector<T> data(0, cudf::default_stream_value);
rmm::device_uvector<bool> null_mask(0, cudf::default_stream_value);

if (cardinality == 0) {
data = value_dist(engine, num_rows);
Expand All @@ -413,8 +413,8 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
// generate n samples and gather.
auto const sample_indices =
sample_indices_with_run_length(avg_run_len, cardinality, num_rows, engine);
data = rmm::device_uvector<T>(num_rows, rmm::cuda_stream_default);
null_mask = rmm::device_uvector<bool>(num_rows, rmm::cuda_stream_default);
data = rmm::device_uvector<T>(num_rows, cudf::default_stream_value);
null_mask = rmm::device_uvector<bool>(num_rows, cudf::default_stream_value);
thrust::gather(
thrust::device, sample_indices.begin(), sample_indices.end(), samples.begin(), data.begin());
thrust::gather(thrust::device,
Expand Down Expand Up @@ -493,12 +493,12 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
auto valid_lengths = thrust::make_transform_iterator(
thrust::make_zip_iterator(thrust::make_tuple(lengths.begin(), null_mask.begin())),
valid_or_zero{});
rmm::device_uvector<cudf::size_type> offsets(num_rows + 1, rmm::cuda_stream_default);
rmm::device_uvector<cudf::size_type> offsets(num_rows + 1, cudf::default_stream_value);
thrust::exclusive_scan(
thrust::device, valid_lengths, valid_lengths + lengths.size(), offsets.begin());
// offfsets are ready.
auto chars_length = *thrust::device_pointer_cast(offsets.end() - 1);
rmm::device_uvector<char> chars(chars_length, rmm::cuda_stream_default);
rmm::device_uvector<char> chars(chars_length, cudf::default_stream_value);
thrust::for_each_n(thrust::device,
thrust::make_zip_iterator(offsets.begin(), offsets.begin() + 1),
num_rows,
Expand Down
8 changes: 5 additions & 3 deletions cpp/benchmarks/common/random_distribution_factory.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include "generate_input.hpp"

#include <cudf/utilities/default_stream.hpp>

#include <rmm/device_uvector.hpp>

#include <thrust/execution_policy.h>
Expand Down Expand Up @@ -146,7 +148,7 @@ distribution_fn<T> make_distribution(distribution_id dist_id, T lower_bound, T u
case distribution_id::NORMAL:
return [lower_bound, upper_bound, dist = make_normal_dist(lower_bound, upper_bound)](
thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector<T> {
rmm::device_uvector<T> result(size, rmm::cuda_stream_default);
rmm::device_uvector<T> result(size, cudf::default_stream_value);
thrust::tabulate(thrust::device,
result.begin(),
result.end(),
Expand All @@ -156,7 +158,7 @@ distribution_fn<T> make_distribution(distribution_id dist_id, T lower_bound, T u
case distribution_id::UNIFORM:
return [lower_bound, upper_bound, dist = make_uniform_dist(lower_bound, upper_bound)](
thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector<T> {
rmm::device_uvector<T> result(size, rmm::cuda_stream_default);
rmm::device_uvector<T> result(size, cudf::default_stream_value);
thrust::tabulate(thrust::device,
result.begin(),
result.end(),
Expand All @@ -167,7 +169,7 @@ distribution_fn<T> make_distribution(distribution_id dist_id, T lower_bound, T u
// kind of exponential distribution from lower_bound to upper_bound.
return [lower_bound, upper_bound, dist = geometric_distribution<T>(lower_bound, upper_bound)](
thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector<T> {
rmm::device_uvector<T> result(size, rmm::cuda_stream_default);
rmm::device_uvector<T> result(size, cudf::default_stream_value);
thrust::tabulate(thrust::device,
result.begin(),
result.end(),
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/copying/copy_if_else.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/copying.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/device_buffer.hpp>

Expand All @@ -44,7 +45,7 @@ static void BM_copy_if_else(benchmark::State& state, bool nulls)
cudf::column_view lhs(input->view().column(0));

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cuda_event_timer raii(state, true, cudf::default_stream_value);
cudf::copy_if_else(lhs, rhs, decision);
}
}
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/copying/shift.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@

#include <cudf/copying.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>

template <typename T, typename ScalarType = cudf::scalar_type_t<T>>
std::unique_ptr<cudf::scalar> make_scalar(
T value = 0,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::cuda_stream_view stream = cudf::default_stream_value,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto s = new ScalarType(value, true, stream, mr);
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/groupby/group_struct_keys.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/groupby.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <nvbench/nvbench.cuh>

Expand Down Expand Up @@ -87,7 +88,7 @@ void bench_groupby_struct_keys(nvbench::state& state)
requests[0].aggregations.push_back(cudf::make_min_aggregation<cudf::groupby_aggregation>());

// Set up nvbench default stream
auto stream = rmm::cuda_stream_default;
auto stream = cudf::default_stream_value;
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));

state.exec(nvbench::exec_tag::sync,
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/hashing/hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <cudf/hashing.hpp>
#include <cudf/table/table.hpp>
#include <cudf/utilities/default_stream.hpp>

class HashBenchmark : public cudf::benchmark {
};
Expand All @@ -34,7 +35,7 @@ static void BM_hash(benchmark::State& state, cudf::hash_id hid, contains_nulls h
data->get_column(0).set_null_mask(rmm::device_buffer{}, 0);

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cuda_event_timer raii(state, true, cudf::default_stream_value);
cudf::hash(data->view(), hid);
}
}
Expand Down
5 changes: 2 additions & 3 deletions cpp/benchmarks/io/text/multibyte_split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/strings/combine.hpp>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <thrust/host_vector.h>
#include <thrust/transform.h>
Expand Down Expand Up @@ -106,7 +105,7 @@ static void BM_multibyte_split(benchmark::State& state)
device_input.data(),
device_input.size() * sizeof(char),
cudaMemcpyDeviceToHost,
rmm::cuda_stream_default);
cudf::default_stream_value);

auto temp_file_name = random_file_in_dir(temp_dir.path());

Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/iterator/iterator.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/utilities/device_operators.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/device_uvector.hpp>

Expand Down Expand Up @@ -54,7 +55,7 @@ inline auto reduce_by_cub(OutputIterator result, InputIterator d_in, int num_ite
nullptr, temp_storage_bytes, d_in, result, num_items, cudf::DeviceSum{}, init);

// Allocate temporary storage
rmm::device_buffer d_temp_storage(temp_storage_bytes, rmm::cuda_stream_default);
rmm::device_buffer d_temp_storage(temp_storage_bytes, cudf::default_stream_value);

// Run reduction
cub::DeviceReduce::Reduce(
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/join/generate_input_tables.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
#pragma once

#include <cudf/detail/utilities/device_atomics.cuh>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/distance.h>
Expand Down Expand Up @@ -154,7 +154,7 @@ void generate_input_tables(key_type* const build_tbl,

const int num_states =
num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size;
rmm::device_uvector<curandState> devStates(num_states, rmm::cuda_stream_default);
rmm::device_uvector<curandState> devStates(num_states, cudf::default_stream_value);

init_curand<<<(num_states - 1) / block_size + 1, block_size>>>(devStates.data(), num_states);

Expand Down
5 changes: 3 additions & 2 deletions cpp/benchmarks/join/join_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <cudf/join.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>

#include <nvbench/nvbench.cuh>
Expand Down Expand Up @@ -125,7 +126,7 @@ static void BM_join(state_type& state, Join JoinFunc)
// Benchmark the inner join operation
if constexpr (std::is_same_v<state_type, benchmark::State> and (not is_conditional)) {
for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cuda_event_timer raii(state, true, cudf::default_stream_value);

auto result = JoinFunc(
probe_table, build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL);
Expand All @@ -152,7 +153,7 @@ static void BM_join(state_type& state, Join JoinFunc)
cudf::ast::operation(cudf::ast::ast_operator::EQUAL, col_ref_left_0, col_ref_right_0);

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cuda_event_timer raii(state, true, cudf::default_stream_value);

auto result =
JoinFunc(probe_table, build_table, left_zero_eq_right_zero, cudf::null_equality::UNEQUAL);
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/lists/copying/scatter_lists.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#include <cudf/copying.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

Expand All @@ -40,7 +40,7 @@ class ScatterLists : public cudf::benchmark {
template <class TypeParam, bool coalesce>
void BM_lists_scatter(::benchmark::State& state)
{
auto stream = rmm::cuda_stream_default;
auto stream = cudf::default_stream_value;
auto mr = rmm::mr::get_current_device_resource();

const size_type base_size{(size_type)state.range(0)};
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/quantiles/quantiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/quantiles.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <thrust/execution_policy.h>
#include <thrust/tabulate.h>
Expand Down Expand Up @@ -50,7 +51,7 @@ static void BM_quantiles(benchmark::State& state, bool nulls)
thrust::seq, q.begin(), q.end(), [n_quantiles](auto i) { return i * (1.0f / n_quantiles); });

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cuda_event_timer raii(state, true, cudf::default_stream_value);

auto result = cudf::quantiles(input, q);
// auto result = (stable) ? cudf::stable_sorted_order(input) : cudf::sorted_order(input);
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/sort/rank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <cudf/column/column_view.hpp>
#include <cudf/sorting.hpp>
#include <cudf/utilities/default_stream.hpp>

class Rank : public cudf::benchmark {
};
Expand All @@ -38,7 +39,7 @@ static void BM_rank(benchmark::State& state, bool nulls)
cudf::column_view input{keys_table->get_column(0)};

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cuda_event_timer raii(state, true, cudf::default_stream_value);

auto result = cudf::rank(input,
cudf::rank_method::FIRST,
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/sort/sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/sorting.hpp>
#include <cudf/utilities/default_stream.hpp>

template <bool stable>
class Sort : public cudf::benchmark {
Expand All @@ -41,7 +42,7 @@ static void BM_sort(benchmark::State& state, bool nulls)
cudf::table_view input{*input_table};

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cuda_event_timer raii(state, true, cudf::default_stream_value);

auto result = (stable) ? cudf::stable_sorted_order(input) : cudf::sorted_order(input);
}
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/sort/sort_strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <cudf/sorting.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>

class Sort : public cudf::benchmark {
};
Expand All @@ -31,7 +32,7 @@ static void BM_sort(benchmark::State& state)
auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows});

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
cuda_event_timer raii(state, true, cudf::default_stream_value);
cudf::sort(table->view());
}
}
Expand Down
Loading

0 comments on commit f4f3428

Please sign in to comment.