diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index 0f6e110ffd0..f2873e31c5b 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -458,8 +458,12 @@ int host_value = int_scalar.value(); Allocates a specified number of elements of the specified type. If no initialization value is provided, all elements are default initialized (this incurs a kernel launch). -**Note**: `rmm::device_vector` is not yet updated to use `device_memory_resource`s, but support -is forthcoming. Likewise, `device_vector` operations cannot be stream ordered. +**Note**: We have removed all usage of `rmm::device_vector` and `thrust::device_vector` from +libcudf, and you should not use it in new code in libcudf without careful consideration. Instead, +use `rmm::device_uvector` along with the utility factories in `device_factories.hpp`. These +utilities enable creation of `uvector`s from host-side vectors, or creating zero-initialized +`uvector`s, so that they are as convenient to use as `device_vector`. Avoiding `device_vector` has +a number of benefits, as described in the folling section on `rmm::device_uvector`. #### `rmm::device_uvector` @@ -468,7 +472,9 @@ differences: - As an optimization, elements are uninitialized and no synchronization occurs at construction. This limits the types `T` to trivially copyable types. - All operations are stream ordered (i.e., they accept a `cuda_stream_view` specifying the stream -on which the operation is performed). +on which the operation is performed). This improves safety when using non-default streams. +- `device_uvector.hpp` does not include any `__device__` code, unlike `thrust/device_vector.hpp`, + which means `device_uvector`s can be used in `.cpp` files, rather than just in `.cu` files. ```c++ cuda_stream s; diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 519feea3d7c..25bf4a436ad 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,7 @@ #include #include +#include #include #include @@ -76,12 +78,13 @@ struct quantile_functor { auto d_input = column_device_view::create(input, stream); auto d_output = mutable_column_device_view::create(output->mutable_view()); - rmm::device_vector q_device{q}; + auto q_device = cudf::detail::make_device_uvector_sync(q); if (!cudf::is_dictionary(input.type())) { auto sorted_data = thrust::make_permutation_iterator(input.data(), ordered_indices); - thrust::transform(q_device.begin(), + thrust::transform(rmm::exec_policy(), + q_device.begin(), q_device.end(), d_output->template begin(), [sorted_data, interp = interp, size = size] __device__(double q) { @@ -90,7 +93,8 @@ struct quantile_functor { } else { auto sorted_data = thrust::make_permutation_iterator( dictionary::detail::make_dictionary_iterator(*d_input), ordered_indices); - thrust::transform(q_device.begin(), + thrust::transform(rmm::exec_policy(), + q_device.begin(), q_device.end(), d_output->template begin(), [sorted_data, interp = interp, size = size] __device__(double q) { diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ddeea40df77..6a8e36e6cf6 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -180,7 +180,7 @@ ConfigureTest(INTEROP_TEST ################################################################################################### # - io tests -------------------------------------------------------------------------------------- -ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cu) +ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cpp) ConfigureTest(CSV_TEST io/csv_test.cpp) ConfigureTest(ORC_TEST io/orc_test.cpp) diff --git a/cpp/tests/io/comp/decomp_test.cu b/cpp/tests/io/comp/decomp_test.cpp similarity index 82% rename from cpp/tests/io/comp/decomp_test.cu rename to cpp/tests/io/comp/decomp_test.cpp index a2e2fee8242..8247ced4629 100644 --- a/cpp/tests/io/comp/decomp_test.cu +++ b/cpp/tests/io/comp/decomp_test.cpp @@ -18,10 +18,11 @@ #include -#include - +#include #include -#include +#include + +#include /** * @brief Base test fixture for decompression @@ -36,9 +37,6 @@ struct DecompressTest : public cudf::test::BaseFixture { ASSERT_CUDA_SUCCEEDED(cudaMallocHost((void**)&inf_args, sizeof(cudf::io::gpu_inflate_input_s))); ASSERT_CUDA_SUCCEEDED( cudaMallocHost((void**)&inf_stat, sizeof(cudf::io::gpu_inflate_status_s))); - - d_inf_args.resize(1); - d_inf_stat.resize(1); } void TearDown() override @@ -64,19 +62,22 @@ struct DecompressTest : public cudf::test::BaseFixture { inf_args->dstDevice = static_cast(dst.data()); inf_args->srcSize = src.size(); inf_args->dstSize = dst.size(); - ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_args.data().get(), + rmm::device_uvector d_inf_args(1, rmm::cuda_stream_default); + rmm::device_uvector d_inf_stat(1, rmm::cuda_stream_default); + ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_args.data(), inf_args, sizeof(cudf::io::gpu_inflate_input_s), cudaMemcpyHostToDevice, 0)); - ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_stat.data().get(), + ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_stat.data(), inf_stat, sizeof(cudf::io::gpu_inflate_status_s), cudaMemcpyHostToDevice, 0)); - ASSERT_CUDA_SUCCEEDED(static_cast(this)->dispatch()); + ASSERT_CUDA_SUCCEEDED( + static_cast(this)->dispatch(d_inf_args.data(), d_inf_stat.data())); ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(inf_stat, - d_inf_stat.data().get(), + d_inf_stat.data(), sizeof(cudf::io::gpu_inflate_status_s), cudaMemcpyDeviceToHost, 0)); @@ -87,17 +88,16 @@ struct DecompressTest : public cudf::test::BaseFixture { cudf::io::gpu_inflate_input_s* inf_args = nullptr; cudf::io::gpu_inflate_status_s* inf_stat = nullptr; - rmm::device_vector d_inf_args; - rmm::device_vector d_inf_stat; }; /** * @brief Derived fixture for GZIP decompression */ struct GzipDecompressTest : public DecompressTest { - cudaError_t dispatch() + cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args, + cudf::io::gpu_inflate_status_s* d_inf_stat) { - return cudf::io::gpuinflate(d_inf_args.data().get(), d_inf_stat.data().get(), 1, 1); + return cudf::io::gpuinflate(d_inf_args, d_inf_stat, 1, 1); } }; @@ -105,9 +105,10 @@ struct GzipDecompressTest : public DecompressTest { * @brief Derived fixture for Snappy decompression */ struct SnappyDecompressTest : public DecompressTest { - cudaError_t dispatch() + cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args, + cudf::io::gpu_inflate_status_s* d_inf_stat) { - return cudf::io::gpu_unsnap(d_inf_args.data().get(), d_inf_stat.data().get(), 1); + return cudf::io::gpu_unsnap(d_inf_args, d_inf_stat, 1); } }; @@ -115,13 +116,13 @@ struct SnappyDecompressTest : public DecompressTest { * @brief Derived fixture for Brotli decompression */ struct BrotliDecompressTest : public DecompressTest { - cudaError_t dispatch() + cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args, + cudf::io::gpu_inflate_status_s* d_inf_stat) { rmm::device_buffer d_scratch{cudf::io::get_gpu_debrotli_scratch_size(1), rmm::cuda_stream_default}; - return cudf::io::gpu_debrotli( - d_inf_args.data().get(), d_inf_stat.data().get(), d_scratch.data(), d_scratch.size(), 1); + return cudf::io::gpu_debrotli(d_inf_args, d_inf_stat, d_scratch.data(), d_scratch.size(), 1); } };