Skip to content

Commit

Permalink
Replace raw streams with rmm::cuda_stream_view (part 3) (#6744)
Browse files Browse the repository at this point in the history
Converting libcudf to use `rmm::cuda_stream_view` will require a LOT of changes, so I'm splitting it into multiple PRs to ease reviewing. This is the third PR in the series. This series of PRs will

- Replace usage of `cudaStream_t` with `rmm::cuda_stream_view`
- Replace usage of `0` or `nullptr` as a stream identifier with `rmm::cuda_stream_default`
- Ensure all APIs always order the stream parameter before the memory resource parameter. #5119

Contributes to #6645 and #5119

Depends on #6646 and #6648 so this PR will look much bigger until they are merged.

This third PR converts:

 - remaining dictionary functionality
 - cuio
 - lists
 - scalar
 - strings
 - groupby
 - join
 - contiguous_split
 - get_element
 - datetime_ops
 - extract
 - merge
 - partitioning
 - minmax reduction
 - scan
 - byte_cast
 - clamp
 - interleave_columns
 - is_sorted
 - groupby
 - rank
 - tests
 - concurrent map classes
  • Loading branch information
harrism authored Nov 24, 2020
1 parent fd72e5f commit 8cc23bd
Show file tree
Hide file tree
Showing 235 changed files with 3,179 additions and 2,719 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
- PR #6612 Update JNI to new RMM cuda_stream_view API
- PR #6646 Replace `cudaStream_t` with `rmm::cuda_stream_view` (part 1)
- PR #6648 Replace `cudaStream_t` with `rmm::cuda_stream_view` (part 2)
- PR #6744 Replace `cudaStream_t` with `rmm::cuda_stream_view` (part 3)
- PR #6579 Update scatter APIs to use reference wrapper / const scalar
- PR #6614 Add support for conversion to Pandas nullable dtypes and fix related issue in `cudf.to_json`
- PR #6622 Update `to_pandas` api docs
Expand Down
13 changes: 8 additions & 5 deletions cpp/benchmarks/synchronization/synchronization.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,13 +15,15 @@
*/

#include "synchronization.hpp"

#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>

cuda_event_timer::cuda_event_timer(benchmark::State& state,
bool flush_l2_cache,
cudaStream_t stream)
rmm::cuda_stream_view stream)
: p_state(&state), stream(stream)
{
// flush all of L2$
Expand All @@ -35,18 +37,19 @@ cuda_event_timer::cuda_event_timer(benchmark::State& state,
if (l2_cache_bytes > 0) {
const int memset_value = 0;
rmm::device_buffer l2_cache_buffer(l2_cache_bytes, stream);
CUDA_TRY(cudaMemsetAsync(l2_cache_buffer.data(), memset_value, l2_cache_bytes, stream));
CUDA_TRY(
cudaMemsetAsync(l2_cache_buffer.data(), memset_value, l2_cache_bytes, stream.value()));
}
}

CUDA_TRY(cudaEventCreate(&start));
CUDA_TRY(cudaEventCreate(&stop));
CUDA_TRY(cudaEventRecord(start, stream));
CUDA_TRY(cudaEventRecord(start, stream.value()));
}

cuda_event_timer::~cuda_event_timer()
{
CUDA_TRY(cudaEventRecord(stop, stream));
CUDA_TRY(cudaEventRecord(stop, stream.value()));
CUDA_TRY(cudaEventSynchronize(stop));

float milliseconds = 0.0f;
Expand Down
15 changes: 10 additions & 5 deletions cpp/benchmarks/synchronization/synchronization.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -33,7 +33,7 @@
for (auto _ : state){
cudaStream_t stream = 0;
rmm::cuda_stream_view stream{}; // default stream, could be another stream
// Create (Construct) an object of this class. You HAVE to pass in the
// benchmark::State object you are using. It measures the time from its
Expand All @@ -44,7 +44,7 @@
cuda_event_timer raii(state, true, stream); // flush_l2_cache = true
// Now perform the operations that is to be benchmarked
sample_kernel<<<1, 256, 0, stream>>>(); // Possibly launching a CUDA kernel
sample_kernel<<<1, 256, 0, stream.value()>>>(); // Possibly launching a CUDA kernel
}
}
Expand All @@ -61,8 +61,11 @@

// Google Benchmark library
#include <benchmark/benchmark.h>

#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <driver_types.h>

class cuda_event_timer {
Expand All @@ -77,7 +80,9 @@ class cuda_event_timer {
* every iteration.
* @param[in] stream_ The CUDA stream we are measuring time on.
**/
cuda_event_timer(benchmark::State& state, bool flush_l2_cache, cudaStream_t stream_ = 0);
cuda_event_timer(benchmark::State& state,
bool flush_l2_cache,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);

// The user must provide a benchmark::State object to set
// the timer so we disable the default c'tor.
Expand All @@ -91,7 +96,7 @@ class cuda_event_timer {
private:
cudaEvent_t start;
cudaEvent_t stop;
cudaStream_t stream;
rmm::cuda_stream_view stream;
benchmark::State* p_state;
};

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ std::unique_ptr<table> sample(
*/
std::unique_ptr<scalar> get_element(column_view const& input,
size_type index,
cudaStream_t stream,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);
} // namespace detail
} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/copy_range.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ void copy_range(SourceValueIterator source_value_begin,
nullptr);
}

CHECK_CUDA(stream);
CHECK_CUDA(stream.value());
}

/**
Expand Down
5 changes: 2 additions & 3 deletions cpp/include/cudf/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,9 @@ struct column_scatterer_impl<dictionary32, MapIterator> {
"scatter dictionary keys must be the same type");

// first combine keys so both dictionaries have the same set
auto target_matched = dictionary::detail::add_keys(target, source.keys(), mr, stream.value());
auto target_matched = dictionary::detail::add_keys(target, source.keys(), stream, mr);
auto const target_view = dictionary_column_view(target_matched->view());
auto source_matched = dictionary::detail::set_keys(
source, target_view.keys(), rmm::mr::get_current_device_resource(), stream.value());
auto source_matched = dictionary::detail::set_keys(source, target_view.keys(), stream);
auto const source_view = dictionary_column_view(source_matched->view());

// now build the new indices by doing a scatter on just the matched indices
Expand Down
14 changes: 8 additions & 6 deletions cpp/include/cudf/dictionary/detail/encode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <cudf/column/column_view.hpp>
#include <cudf/dictionary/dictionary_column_view.hpp>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace dictionary {
namespace detail {
Expand All @@ -44,15 +46,15 @@ namespace detail {
*
* @param column The column to dictionary encode.
* @param indices_type The integer type to use for the indices.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Returns a dictionary column.
*/
std::unique_ptr<column> encode(
column_view const& column,
data_type indices_type = data_type{type_id::UINT32},
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
cudaStream_t stream = 0);
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Create a column by gathering the keys from the provided
Expand All @@ -65,14 +67,14 @@ std::unique_ptr<column> encode(
* ```
*
* @param dictionary_column Existing dictionary column.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New column with type matching the dictionary_column's keys.
*/
std::unique_ptr<column> decode(
dictionary_column_view const& dictionary_column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
cudaStream_t stream = 0);
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Return minimal integer type for the given number of elements.
Expand Down
8 changes: 5 additions & 3 deletions cpp/include/cudf/dictionary/detail/merge.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <cudf/detail/merge.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace dictionary {
namespace detail {
Expand All @@ -33,15 +35,15 @@ namespace detail {
* @param lcol First column.
* @param rcol Second column.
* @param row_order Indexes for each column.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> merge(dictionary_column_view const& lcol,
dictionary_column_view const& rcol,
cudf::detail::index_vector const& row_order,
rmm::mr::device_memory_resource* mr,
cudaStream_t stream);
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace dictionary
Expand Down
26 changes: 14 additions & 12 deletions cpp/include/cudf/dictionary/detail/update_keys.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/table/table_view.hpp>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace dictionary {
namespace detail {
Expand All @@ -31,8 +33,8 @@ namespace detail {
std::unique_ptr<column> add_keys(
dictionary_column_view const& dictionary_column,
column_view const& new_keys,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
cudaStream_t stream = 0);
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::dictionary::remove_keys(dictionary_column_view const&,column_view
Expand All @@ -43,8 +45,8 @@ std::unique_ptr<column> add_keys(
std::unique_ptr<column> remove_keys(
dictionary_column_view const& dictionary_column,
column_view const& keys_to_remove,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
cudaStream_t stream = 0);
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::dictionary::remove_unused_keys(dictionary_column_view
Expand All @@ -54,8 +56,8 @@ std::unique_ptr<column> remove_keys(
*/
std::unique_ptr<column> remove_unused_keys(
dictionary_column_view const& dictionary_column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
cudaStream_t stream = 0);
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::dictionary::set_keys(dictionary_column_view
Expand All @@ -66,8 +68,8 @@ std::unique_ptr<column> remove_unused_keys(
std::unique_ptr<column> set_keys(
dictionary_column_view const& dictionary_column,
column_view const& keys,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
cudaStream_t stream = 0);
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Create new dictionaries that have keys merged from the input dictionaries.
Expand All @@ -82,8 +84,8 @@ std::unique_ptr<column> set_keys(
*/
std::vector<std::unique_ptr<column>> match_dictionaries(
std::vector<dictionary_column_view> input,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
cudaStream_t stream = 0);
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Create new dictionaries that have keys merged from dictionary columns
Expand All @@ -106,8 +108,8 @@ std::vector<std::unique_ptr<column>> match_dictionaries(
*/
std::pair<std::vector<std::unique_ptr<column>>, std::vector<table_view>> match_dictionaries(
std::vector<table_view> tables,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
cudaStream_t stream = 0);
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace dictionary
Expand Down
14 changes: 8 additions & 6 deletions cpp/include/cudf/dictionary/dictionary_factories.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
/**
* @addtogroup column_factories Factories
Expand Down Expand Up @@ -54,15 +56,15 @@ namespace cudf {
*
* @param keys_column Column of unique, ordered values to use as the new dictionary column's keys.
* @param indices_column Indices to use for the new dictionary column.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> make_dictionary_column(
column_view const& keys_column,
column_view const& indices_column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
cudaStream_t stream = 0);
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Construct a dictionary column by taking ownership of the provided keys
Expand Down Expand Up @@ -106,15 +108,15 @@ std::unique_ptr<column> make_dictionary_column(std::unique_ptr<column> keys_colu
*
* @param keys Column of unique, ordered values to use as the new dictionary column's keys.
* @param indices Indices values and null-mask to use for the new dictionary column.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New dictionary column.
*/
std::unique_ptr<column> make_dictionary_column(
std::unique_ptr<column> keys_column,
std::unique_ptr<column> indices_column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(),
cudaStream_t stream = 0);
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace cudf
8 changes: 5 additions & 3 deletions cpp/include/cudf/groupby.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,6 +20,8 @@
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <utility>
#include <vector>

Expand Down Expand Up @@ -222,13 +224,13 @@ class groupby {
*/
std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> dispatch_aggregation(
std::vector<aggregation_request> const& requests,
cudaStream_t stream,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

// Sort-based groupby
std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> sort_aggregate(
std::vector<aggregation_request> const& requests,
cudaStream_t stream,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);
};
/** @} */
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/avro.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@

#include "types.hpp"

#include <rmm/mr/device/per_device_resource.hpp>

#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

#include <rmm/mr/device/per_device_resource.hpp>

#include <memory>
#include <string>
#include <vector>
Expand Down
1 change: 0 additions & 1 deletion cpp/include/cudf/io/csv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#pragma once

#include <cudf/io/types.hpp>

#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
Expand Down
Loading

0 comments on commit 8cc23bd

Please sign in to comment.