diff --git a/cpp/src/io/avro/avro_gpu.h b/cpp/src/io/avro/avro_gpu.h index c87ac8afb13..3811132435b 100644 --- a/cpp/src/io/avro/avro_gpu.h +++ b/cpp/src/io/avro/avro_gpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,17 +47,17 @@ struct schemadesc_s { * @param[in] max_rows Maximum number of rows to load * @param[in] first_row Crop all rows below first_row * @param[in] min_row_size Minimum size in bytes of a row - * @param[in] stream CUDA stream to use, default 0 + * @param[in] stream CUDA stream to use */ void DecodeAvroColumnData(cudf::device_span blocks, schemadesc_s* schema, cudf::device_span global_dictionary, uint8_t const* avro_data, uint32_t schema_len, - size_t max_rows = ~0, - size_t first_row = 0, - uint32_t min_row_size = 0, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + size_t max_rows, + size_t first_row, + uint32_t min_row_size, + rmm::cuda_stream_view stream); } // namespace gpu } // namespace avro diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index d908e6c8ed5..0fa5680c5d2 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -159,8 +159,8 @@ rmm::device_buffer decompress_data(datasource& source, if (meta.codec == "deflate") { size_t uncompressed_data_size = 0; - auto inflate_in = hostdevice_vector(meta.block_list.size()); - auto inflate_out = hostdevice_vector(meta.block_list.size()); + auto inflate_in = hostdevice_vector(meta.block_list.size(), stream); + auto inflate_out = hostdevice_vector(meta.block_list.size(), stream); // Guess an initial maximum uncompressed block size uint32_t initial_blk_len = (meta.max_block_size * 2 + 0xfff) & ~0xfff; @@ -343,7 +343,7 @@ std::vector decode_data(metadata& meta, } // Build gpu schema - auto schema_desc = hostdevice_vector(meta.schema.size()); + auto schema_desc = hostdevice_vector(meta.schema.size(), stream); uint32_t min_row_data_size = 0; int skip_field_cnt = 0; diff --git a/cpp/src/io/comp/gpuinflate.h b/cpp/src/io/comp/gpuinflate.h index a37d282997e..3ca9c9eee10 100644 --- a/cpp/src/io/comp/gpuinflate.h +++ b/cpp/src/io/comp/gpuinflate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,26 +49,26 @@ struct gpu_inflate_status_s { * * @param[in] inputs List of input argument structures * @param[out] outputs List of output status structures - * @param[in] count Number of input/output structures, default 1 - * @param[in] parse_hdr Whether or not to parse GZIP header, default false - * @param[in] stream CUDA stream to use, default 0 + * @param[in] count Number of input/output structures + * @param[in] parse_hdr Whether or not to parse GZIP header + * @param[in] stream CUDA stream to use */ cudaError_t gpuinflate(gpu_inflate_input_s* inputs, gpu_inflate_status_s* outputs, - int count = 1, - int parse_hdr = 0, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + int count, + int parse_hdr, + rmm::cuda_stream_view stream); /** * @brief Interface for copying uncompressed byte blocks * * @param[in] inputs List of input argument structures - * @param[in] count Number of input structures, default 1 - * @param[in] stream CUDA stream to use, default 0 + * @param[in] count Number of input structures + * @param[in] stream CUDA stream to use */ cudaError_t gpu_copy_uncompressed_blocks(gpu_inflate_input_s* inputs, - int count = 1, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + int count, + rmm::cuda_stream_view stream); /** * @brief Interface for decompressing Snappy-compressed data @@ -78,13 +78,13 @@ cudaError_t gpu_copy_uncompressed_blocks(gpu_inflate_input_s* inputs, * * @param[in] inputs List of input argument structures * @param[out] outputs List of output status structures - * @param[in] count Number of input/output structures, default 1 - * @param[in] stream CUDA stream to use, default 0 + * @param[in] count Number of input/output structures + * @param[in] stream CUDA stream to use */ cudaError_t gpu_unsnap(gpu_inflate_input_s* inputs, gpu_inflate_status_s* outputs, - int count = 1, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + int count, + rmm::cuda_stream_view stream); /** * @brief Computes the size of temporary memory for Brotli decompression @@ -105,15 +105,15 @@ size_t get_gpu_debrotli_scratch_size(int max_num_inputs = 0); * @param[out] outputs List of output status structures * @param[in] scratch Temporary memory for intermediate work * @param[in] scratch_size Size in bytes of the temporary memory - * @param[in] count Number of input/output structures, default 1 - * @param[in] stream CUDA stream to use, default 0 + * @param[in] count Number of input/output structures + * @param[in] stream CUDA stream to use */ cudaError_t gpu_debrotli(gpu_inflate_input_s* inputs, gpu_inflate_status_s* outputs, void* scratch, size_t scratch_size, - int count = 1, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + int count, + rmm::cuda_stream_view stream); /** * @brief Interface for compressing data with Snappy @@ -123,13 +123,13 @@ cudaError_t gpu_debrotli(gpu_inflate_input_s* inputs, * * @param[in] inputs List of input argument structures * @param[out] outputs List of output status structures - * @param[in] count Number of input/output structures, default 1 - * @param[in] stream CUDA stream to use, default 0 + * @param[in] count Number of input/output structures + * @param[in] stream CUDA stream to use */ cudaError_t gpu_snap(gpu_inflate_input_s* inputs, gpu_inflate_status_s* outputs, - int count = 1, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + int count, + rmm::cuda_stream_view stream); } // namespace io } // namespace cudf diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 7f032b6987c..0e50bb46232 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -237,7 +237,7 @@ std::pair, selected_rows_offsets> load_data_and_gather size_t buffer_size = std::min(max_chunk_bytes, data.size()); size_t max_blocks = std::max((buffer_size / cudf::io::csv::gpu::rowofs_block_bytes) + 1, 2); - hostdevice_vector row_ctx(max_blocks); + hostdevice_vector row_ctx(max_blocks, stream); size_t buffer_pos = std::min(range_begin - std::min(range_begin, sizeof(char)), data.size()); size_t pos = std::min(range_begin, data.size()); size_t header_rows = (reader_opts.get_header() >= 0) ? reader_opts.get_header() + 1 : 0; diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index b9b6fc6cf94..1b66df860a3 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -137,10 +137,9 @@ struct column_to_strings_fn { (cudf::is_timestamp()) || (cudf::is_duration())); } - explicit column_to_strings_fn( - csv_writer_options const& options, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + explicit column_to_strings_fn(csv_writer_options const& options, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) : options_(options), stream_(stream), mr_(mr) { } diff --git a/cpp/src/io/orc/timezone.cuh b/cpp/src/io/orc/timezone.cuh index 77c2bd4ffa0..e15144f9ea5 100644 --- a/cpp/src/io/orc/timezone.cuh +++ b/cpp/src/io/orc/timezone.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,10 +107,13 @@ inline __device__ int32_t get_gmt_offset(cudf::device_span ttimes return get_gmt_offset_impl(ttimes.begin(), offsets.begin(), ttimes.size(), ts); } -struct timezone_table { +class timezone_table { int32_t gmt_offset = 0; rmm::device_uvector ttimes; rmm::device_uvector offsets; + + public: + // Safe to use the default stream, device_uvectors will not change after they are created empty timezone_table() : ttimes{0, rmm::cuda_stream_default}, offsets{0, rmm::cuda_stream_default} {} timezone_table(int32_t gmt_offset, rmm::device_uvector&& ttimes, diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 80c22b09927..d989721334e 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -349,7 +349,7 @@ class writer::impl { private: rmm::mr::device_memory_resource* _mr = nullptr; // Cuda stream to be used - rmm::cuda_stream_view stream = rmm::cuda_stream_default; + rmm::cuda_stream_view stream; stripe_size_limits max_stripe_size; size_type row_index_stride; diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 53bb11c8b70..b77eeac68f5 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -479,7 +479,7 @@ struct dremel_data { dremel_data get_dremel_data(column_view h_col, rmm::device_uvector const& d_nullability, std::vector const& nullability, - rmm::cuda_stream_view stream = rmm::cuda_stream_default); + rmm::cuda_stream_view stream); /** * @brief Launches kernel for initializing encoder page fragments diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 69d480edf85..7e9a8feaffb 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1427,8 +1427,8 @@ void reader::impl::decode_page_data(hostdevice_vector& chu // In order to reduce the number of allocations of hostdevice_vector, we allocate a single vector // to store all per-chunk pointers to nested data/nullmask. `chunk_offsets[i]` will store the // offset into `chunk_nested_data`/`chunk_nested_valids` for the array of pointers for chunk `i` - auto chunk_nested_valids = hostdevice_vector(sum_max_depths); - auto chunk_nested_data = hostdevice_vector(sum_max_depths); + auto chunk_nested_valids = hostdevice_vector(sum_max_depths, stream); + auto chunk_nested_data = hostdevice_vector(sum_max_depths, stream); auto chunk_offsets = std::vector(); // Update chunks with pointers to column data. diff --git a/cpp/src/io/parquet/writer_impl.hpp b/cpp/src/io/parquet/writer_impl.hpp index 1cefb91c904..5168b61aae2 100644 --- a/cpp/src/io/parquet/writer_impl.hpp +++ b/cpp/src/io/parquet/writer_impl.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -206,7 +206,7 @@ class writer::impl { // TODO : figure out if we want to keep this. It is currently unused. rmm::mr::device_memory_resource* _mr = nullptr; // Cuda stream to be used - rmm::cuda_stream_view stream = rmm::cuda_stream_default; + rmm::cuda_stream_view stream; size_t max_row_group_size = default_row_group_size_bytes; size_type max_row_group_rows = default_row_group_size_rows; diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp index 9300bd0f8b2..17df49009c2 100644 --- a/cpp/src/io/utilities/column_buffer.hpp +++ b/cpp/src/io/utilities/column_buffer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,11 +45,10 @@ namespace detail { * * @return `rmm::device_buffer` Device buffer allocation */ -inline rmm::device_buffer create_data( - data_type type, - size_type size, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +inline rmm::device_buffer create_data(data_type type, + size_type size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { std::size_t data_size = size_of(type) * size; @@ -75,9 +74,9 @@ struct column_buffer { // construct with a known size. allocates memory column_buffer(data_type _type, size_type _size, - bool _is_nullable = true, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) + bool _is_nullable, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) : type(_type), is_nullable(_is_nullable) { create(_size, stream, mr); @@ -93,9 +92,7 @@ struct column_buffer { // instantiate a column of known type with a specified size. Allows deferred creation for // preprocessing steps such as in the Parquet reader - void create(size_type _size, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + void create(size_type _size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); auto data() { return _strings ? _strings->data() : _data.data(); } auto data_size() const { return _strings ? _strings->size() : _data.size(); } @@ -134,11 +131,10 @@ struct column_buffer { * * @return `std::unique_ptr` Column from the existing device data */ -std::unique_ptr make_column( - column_buffer& buffer, - column_name_info* schema_info = nullptr, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr make_column(column_buffer& buffer, + column_name_info* schema_info, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Creates an equivalent empty column from an existing set of device memory buffers. @@ -155,11 +151,10 @@ std::unique_ptr make_column( * * @return `std::unique_ptr` Column from the existing device data */ -std::unique_ptr empty_like( - column_buffer& buffer, - column_name_info* schema_info = nullptr, - rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +std::unique_ptr empty_like(column_buffer& buffer, + column_name_info* schema_info, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); } // namespace detail } // namespace io diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index a7f9aec7bb4..cbf914b8da6 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,15 +45,12 @@ class hostdevice_vector { return *this; } - explicit hostdevice_vector(size_t max_size, - rmm::cuda_stream_view stream = rmm::cuda_stream_default) + explicit hostdevice_vector(size_t max_size, rmm::cuda_stream_view stream) : hostdevice_vector(max_size, max_size, stream) { } - explicit hostdevice_vector(size_t initial_size, - size_t max_size, - rmm::cuda_stream_view stream = rmm::cuda_stream_default) + explicit hostdevice_vector(size_t initial_size, size_t max_size, rmm::cuda_stream_view stream) : num_elements(initial_size), max_elements(max_size) { if (max_elements != 0) { @@ -148,9 +145,7 @@ namespace detail { template class hostdevice_2dvector { public: - hostdevice_2dvector(size_t rows, - size_t columns, - rmm::cuda_stream_view stream = rmm::cuda_stream_default) + hostdevice_2dvector(size_t rows, size_t columns, rmm::cuda_stream_view stream) : _size{rows, columns}, _data{rows * columns, stream} { } diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp index 8247ced4629..dd00b201df9 100644 --- a/cpp/tests/io/comp/decomp_test.cpp +++ b/cpp/tests/io/comp/decomp_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -97,7 +97,7 @@ struct GzipDecompressTest : public DecompressTest { cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args, cudf::io::gpu_inflate_status_s* d_inf_stat) { - return cudf::io::gpuinflate(d_inf_args, d_inf_stat, 1, 1); + return cudf::io::gpuinflate(d_inf_args, d_inf_stat, 1, 1, rmm::cuda_stream_default); } }; @@ -108,7 +108,7 @@ struct SnappyDecompressTest : public DecompressTest { cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args, cudf::io::gpu_inflate_status_s* d_inf_stat) { - return cudf::io::gpu_unsnap(d_inf_args, d_inf_stat, 1); + return cudf::io::gpu_unsnap(d_inf_args, d_inf_stat, 1, rmm::cuda_stream_default); } }; @@ -122,7 +122,8 @@ struct BrotliDecompressTest : public DecompressTest { rmm::device_buffer d_scratch{cudf::io::get_gpu_debrotli_scratch_size(1), rmm::cuda_stream_default}; - return cudf::io::gpu_debrotli(d_inf_args, d_inf_stat, d_scratch.data(), d_scratch.size(), 1); + return cudf::io::gpu_debrotli( + d_inf_args, d_inf_stat, d_scratch.data(), d_scratch.size(), 1, rmm::cuda_stream_default); } }; diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu index a9a5151e7c3..044ac3e60f7 100644 --- a/cpp/tests/utilities_tests/span_tests.cu +++ b/cpp/tests/utilities_tests/span_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -248,9 +248,9 @@ class MdSpanTest : public cudf::test::BaseFixture { TEST(MdSpanTest, CanDetermineEmptiness) { - auto const vector = hostdevice_2dvector(1, 2); - auto const no_rows_vector = hostdevice_2dvector(0, 2); - auto const no_columns_vector = hostdevice_2dvector(1, 0); + auto const vector = hostdevice_2dvector(1, 2, rmm::cuda_stream_default); + auto const no_rows_vector = hostdevice_2dvector(0, 2, rmm::cuda_stream_default); + auto const no_columns_vector = hostdevice_2dvector(1, 0, rmm::cuda_stream_default); EXPECT_FALSE(host_2dspan{vector}.is_empty()); EXPECT_FALSE(device_2dspan{vector}.is_empty()); @@ -271,7 +271,7 @@ __global__ void readwrite_kernel(device_2dspan result) TEST(MdSpanTest, DeviceReadWrite) { - auto vector = hostdevice_2dvector(11, 23); + auto vector = hostdevice_2dvector(11, 23, rmm::cuda_stream_default); readwrite_kernel<<<1, 1>>>(vector); readwrite_kernel<<<1, 1>>>(vector); @@ -281,7 +281,7 @@ TEST(MdSpanTest, DeviceReadWrite) TEST(MdSpanTest, HostReadWrite) { - auto vector = hostdevice_2dvector(11, 23); + auto vector = hostdevice_2dvector(11, 23, rmm::cuda_stream_default); auto span = host_2dspan{vector}; span[5][6] = 5; if (span[5][6] == 5) { span[5][6] *= 6; } @@ -291,7 +291,7 @@ TEST(MdSpanTest, HostReadWrite) TEST(MdSpanTest, CanGetSize) { - auto const vector = hostdevice_2dvector(1, 2); + auto const vector = hostdevice_2dvector(1, 2, rmm::cuda_stream_default); EXPECT_EQ(host_2dspan{vector}.size(), vector.size()); EXPECT_EQ(device_2dspan{vector}.size(), vector.size()); @@ -299,7 +299,7 @@ TEST(MdSpanTest, CanGetSize) TEST(MdSpanTest, CanGetCount) { - auto const vector = hostdevice_2dvector(11, 23); + auto const vector = hostdevice_2dvector(11, 23, rmm::cuda_stream_default); EXPECT_EQ(host_2dspan{vector}.count(), 11ul * 23); EXPECT_EQ(device_2dspan{vector}.count(), 11ul * 23);