Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace remaining uses of device_vector #8343

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions cpp/docs/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -454,8 +454,12 @@ int host_value = int_scalar.value();
Allocates a specified number of elements of the specified type. If no initialization value is
provided, all elements are default initialized (this incurs a kernel launch).

**Note**: `rmm::device_vector<T>` is not yet updated to use `device_memory_resource`s, but support
is forthcoming. Likewise, `device_vector` operations cannot be stream ordered.
**Note**: We have removed all usage of `rmm::device_vector` and `thrust::device_vector` from
libcudf, and you should not use it in new code in libcudf without careful consideration. Instead,
use `rmm::device_uvector` along with the utility factories in `device_factories.hpp`. These
utilities enable creation of `uvector`s from host-side vectors, or creating zero-initialized
`uvector`s, so that they are as convenient to use as `device_vector`. Avoiding `device_vector` has
a number of benefits, as described in the folling section on `rmm::device_uvector`.

#### `rmm::device_uvector<T>`

Expand All @@ -464,7 +468,9 @@ differences:
- As an optimization, elements are uninitialized and no synchronization occurs at construction.
This limits the types `T` to trivially copyable types.
- All operations are stream ordered (i.e., they accept a `cuda_stream_view` specifying the stream
on which the operation is performed).
on which the operation is performed). This improves safety when using non-default streams.
- `device_uvector.hpp` does not include any `__device__` code, unlike `thrust/device_vector.hpp`,
which means `device_uvector`s can be used in `.cpp` files, rather than just in `.cu` files.

```c++
cuda_stream s;
Expand Down
10 changes: 7 additions & 3 deletions cpp/src/quantiles/quantile.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/sorting.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/dictionary/detail/iterator.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>

#include <memory>
#include <vector>
Expand Down Expand Up @@ -76,12 +78,13 @@ struct quantile_functor {
auto d_input = column_device_view::create(input, stream);
auto d_output = mutable_column_device_view::create(output->mutable_view());

rmm::device_vector<double> q_device{q};
auto q_device = cudf::detail::make_device_uvector_sync(q);

if (!cudf::is_dictionary(input.type())) {
auto sorted_data =
thrust::make_permutation_iterator(input.data<StorageType>(), ordered_indices);
thrust::transform(q_device.begin(),
thrust::transform(rmm::exec_policy(),
q_device.begin(),
q_device.end(),
d_output->template begin<StorageResult>(),
[sorted_data, interp = interp, size = size] __device__(double q) {
Expand All @@ -90,7 +93,8 @@ struct quantile_functor {
} else {
auto sorted_data = thrust::make_permutation_iterator(
dictionary::detail::make_dictionary_iterator<T>(*d_input), ordered_indices);
thrust::transform(q_device.begin(),
thrust::transform(rmm::exec_policy(),
q_device.begin(),
q_device.end(),
d_output->template begin<StorageResult>(),
[sorted_data, interp = interp, size = size] __device__(double q) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ ConfigureTest(INTEROP_TEST

###################################################################################################
# - io tests --------------------------------------------------------------------------------------
ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cu)
ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cpp)

ConfigureTest(CSV_TEST io/csv_test.cpp)
ConfigureTest(ORC_TEST io/orc_test.cpp)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@

#include <cudf_test/base_fixture.hpp>

#include <vector>
#include <cudf/utilities/span.hpp>
harrism marked this conversation as resolved.
Show resolved Hide resolved

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/device_uvector.hpp>

#include <vector>

/**
* @brief Base test fixture for decompression
Expand All @@ -36,9 +39,6 @@ struct DecompressTest : public cudf::test::BaseFixture {
ASSERT_CUDA_SUCCEEDED(cudaMallocHost((void**)&inf_args, sizeof(cudf::io::gpu_inflate_input_s)));
ASSERT_CUDA_SUCCEEDED(
cudaMallocHost((void**)&inf_stat, sizeof(cudf::io::gpu_inflate_status_s)));

d_inf_args.resize(1);
d_inf_stat.resize(1);
}

void TearDown() override
Expand All @@ -64,19 +64,22 @@ struct DecompressTest : public cudf::test::BaseFixture {
inf_args->dstDevice = static_cast<uint8_t*>(dst.data());
inf_args->srcSize = src.size();
inf_args->dstSize = dst.size();
ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_args.data().get(),
rmm::device_uvector<cudf::io::gpu_inflate_input_s> d_inf_args(1, rmm::cuda_stream_default);
rmm::device_uvector<cudf::io::gpu_inflate_status_s> d_inf_stat(1, rmm::cuda_stream_default);
ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_args.data(),
inf_args,
sizeof(cudf::io::gpu_inflate_input_s),
cudaMemcpyHostToDevice,
0));
ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_stat.data().get(),
ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_stat.data(),
inf_stat,
sizeof(cudf::io::gpu_inflate_status_s),
cudaMemcpyHostToDevice,
0));
ASSERT_CUDA_SUCCEEDED(static_cast<Decompressor*>(this)->dispatch());
ASSERT_CUDA_SUCCEEDED(
static_cast<Decompressor*>(this)->dispatch(d_inf_args.data(), d_inf_stat.data()));
ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(inf_stat,
d_inf_stat.data().get(),
d_inf_stat.data(),
sizeof(cudf::io::gpu_inflate_status_s),
cudaMemcpyDeviceToHost,
0));
Expand All @@ -87,40 +90,40 @@ struct DecompressTest : public cudf::test::BaseFixture {

cudf::io::gpu_inflate_input_s* inf_args = nullptr;
cudf::io::gpu_inflate_status_s* inf_stat = nullptr;
rmm::device_vector<cudf::io::gpu_inflate_input_s> d_inf_args;
rmm::device_vector<cudf::io::gpu_inflate_status_s> d_inf_stat;
};

/**
* @brief Derived fixture for GZIP decompression
*/
struct GzipDecompressTest : public DecompressTest<GzipDecompressTest> {
cudaError_t dispatch()
cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args,
cudf::io::gpu_inflate_status_s* d_inf_stat)
{
return cudf::io::gpuinflate(d_inf_args.data().get(), d_inf_stat.data().get(), 1, 1);
return cudf::io::gpuinflate(d_inf_args, d_inf_stat, 1, 1);
}
};

/**
* @brief Derived fixture for Snappy decompression
*/
struct SnappyDecompressTest : public DecompressTest<SnappyDecompressTest> {
cudaError_t dispatch()
cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args,
cudf::io::gpu_inflate_status_s* d_inf_stat)
{
return cudf::io::gpu_unsnap(d_inf_args.data().get(), d_inf_stat.data().get(), 1);
return cudf::io::gpu_unsnap(d_inf_args, d_inf_stat, 1);
}
};

/**
* @brief Derived fixture for Brotli decompression
*/
struct BrotliDecompressTest : public DecompressTest<BrotliDecompressTest> {
cudaError_t dispatch()
cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args,
cudf::io::gpu_inflate_status_s* d_inf_stat)
{
rmm::device_buffer d_scratch(cudf::io::get_gpu_debrotli_scratch_size(1));

return cudf::io::gpu_debrotli(
d_inf_args.data().get(), d_inf_stat.data().get(), d_scratch.data(), d_scratch.size(), 1);
return cudf::io::gpu_debrotli(d_inf_args, d_inf_stat, d_scratch.data(), d_scratch.size(), 1);
}
};

Expand Down