From 27a4aa8faebd77692c12844380a720cb97beb2d7 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Mon, 23 Jan 2023 09:55:11 -0500 Subject: [PATCH] Guard CUDA runtime APIs with error checking (#12531) This PR adds missing error checking for CUDA runtime APIs in libcudf. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/12531 --- cpp/benchmarks/io/json/nested_json.cpp | 4 +-- cpp/benchmarks/io/text/multibyte_split.cpp | 4 +-- cpp/benchmarks/string/json.cu | 4 +-- .../developer_guide/DEVELOPER_GUIDE.md | 2 +- cpp/src/io/json/reader_impl.cu | 20 ++++++------ cpp/src/io/orc/writer_impl.cu | 32 ++++++++++--------- cpp/src/io/parquet/reader_impl.cpp | 10 +++--- cpp/src/io/parquet/reader_impl_preprocess.cu | 27 ++++++++-------- cpp/src/io/text/multibyte_split.cu | 26 +++++++-------- cpp/src/io/utilities/file_io_utilities.cpp | 10 +++--- cpp/src/lists/dremel.cu | 5 +-- cpp/tests/io/comp/decomp_test.cpp | 4 +-- cpp/tests/io/fst/logical_stack_test.cu | 22 ++++++------- cpp/tests/io/text/data_chunk_source_test.cpp | 2 +- cpp/tests/utilities/tdigest_utilities.cu | 8 ++--- 15 files changed, 92 insertions(+), 88 deletions(-) diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp index 5b938d71989..2abae88dca3 100644 --- a/cpp/benchmarks/io/json/nested_json.cpp +++ b/cpp/benchmarks/io/json/nested_json.cpp @@ -148,8 +148,8 @@ auto make_test_json_data(cudf::size_type string_size, rmm::cuda_stream_view stre auto d_scalar = cudf::strings::repeat_string(d_string_scalar, repeat_times); auto data = const_cast(d_scalar->data()); - cudaMemsetAsync(data, '[', 1, stream.value()); - cudaMemsetAsync(data + d_scalar->size() - 1, ']', 1, stream.value()); + CUDF_CUDA_TRY(cudaMemsetAsync(data, '[', 1, stream.value())); + CUDF_CUDA_TRY(cudaMemsetAsync(data + d_scalar->size() - 1, ']', 1, stream.value())); return d_scalar; } diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp index 9d93c01bcaa..c3b7c585055 100644 --- a/cpp/benchmarks/io/text/multibyte_split.cpp +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -148,8 +148,8 @@ static void bench_multibyte_split(nvbench::state& state, } if (source_type == data_chunk_source_type::host_pinned) { host_pinned_input.resize(static_cast(device_input.size())); - cudaMemcpy( - host_pinned_input.data(), device_input.data(), host_pinned_input.size(), cudaMemcpyDefault); + CUDF_CUDA_TRY(cudaMemcpy( + host_pinned_input.data(), device_input.data(), host_pinned_input.size(), cudaMemcpyDefault)); } auto source = [&] { diff --git a/cpp/benchmarks/string/json.cu b/cpp/benchmarks/string/json.cu index 20251aae1b0..7a963ba604b 100644 --- a/cpp/benchmarks/string/json.cu +++ b/cpp/benchmarks/string/json.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -198,7 +198,7 @@ void BM_case(benchmark::State& state, std::string query_arg) for (auto _ : state) { cuda_event_timer raii(state, true); auto result = cudf::strings::get_json_object(scv, json_path); - cudaStreamSynchronize(0); + CUDF_CUDA_TRY(cudaStreamSynchronize(0)); } // this isn't strictly 100% accurate. a given query isn't necessarily diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index 3c085984a0e..44e71dd91e1 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -886,7 +886,7 @@ thrown exception includes a description of the CUDA error code in its `what()` m Example: ```c++ -CUDA_TRY( cudaMemcpy(&dst, &src, num_bytes) ); +CUDF_CUDA_TRY( cudaMemcpy(&dst, &src, num_bytes) ); ``` ## Compile-Time Conditions diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu index 5dc647c29a7..bb35fb3576e 100644 --- a/cpp/src/io/json/reader_impl.cu +++ b/cpp/src/io/json/reader_impl.cu @@ -166,18 +166,18 @@ std::vector create_key_strings(char const* h_data, { auto const num_cols = sorted_info.num_rows(); std::vector h_offsets(num_cols); - cudaMemcpyAsync(h_offsets.data(), - sorted_info.column(0).data(), - sizeof(uint64_t) * num_cols, - cudaMemcpyDefault, - stream.value()); + CUDF_CUDA_TRY(cudaMemcpyAsync(h_offsets.data(), + sorted_info.column(0).data(), + sizeof(uint64_t) * num_cols, + cudaMemcpyDefault, + stream.value())); std::vector h_lens(num_cols); - cudaMemcpyAsync(h_lens.data(), - sorted_info.column(1).data(), - sizeof(uint16_t) * num_cols, - cudaMemcpyDefault, - stream.value()); + CUDF_CUDA_TRY(cudaMemcpyAsync(h_lens.data(), + sorted_info.column(1).data(), + sizeof(uint16_t) * num_cols, + cudaMemcpyDefault, + stream.value())); std::vector names(num_cols); std::transform(h_offsets.cbegin(), diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index d3f155b3618..a6effeefc6c 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -1322,16 +1322,18 @@ writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs( auto const chunk_bytes = stripes_per_col * sizeof(statistics_chunk); auto const merge_bytes = stripes_per_col * sizeof(statistics_merge_group); for (size_t col = 0; col < num_columns; ++col) { - cudaMemcpyAsync(stat_chunks.data() + (num_stripes * col) + num_entries_seen, - per_chunk_stats.stripe_stat_chunks[i].data() + col * stripes_per_col, - chunk_bytes, - cudaMemcpyDefault, - stream); - cudaMemcpyAsync(stats_merge.device_ptr() + (num_stripes * col) + num_entries_seen, - per_chunk_stats.stripe_stat_merge[i].device_ptr() + col * stripes_per_col, - merge_bytes, - cudaMemcpyDefault, - stream); + CUDF_CUDA_TRY( + cudaMemcpyAsync(stat_chunks.data() + (num_stripes * col) + num_entries_seen, + per_chunk_stats.stripe_stat_chunks[i].data() + col * stripes_per_col, + chunk_bytes, + cudaMemcpyDefault, + stream.value())); + CUDF_CUDA_TRY( + cudaMemcpyAsync(stats_merge.device_ptr() + (num_stripes * col) + num_entries_seen, + per_chunk_stats.stripe_stat_merge[i].device_ptr() + col * stripes_per_col, + merge_bytes, + cudaMemcpyDefault, + stream.value())); } num_entries_seen += stripes_per_col; } @@ -1346,11 +1348,11 @@ writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs( } auto d_file_stats_merge = stats_merge.device_ptr(num_stripe_blobs); - cudaMemcpyAsync(d_file_stats_merge, - file_stats_merge.data(), - num_file_blobs * sizeof(statistics_merge_group), - cudaMemcpyDefault, - stream); + CUDF_CUDA_TRY(cudaMemcpyAsync(d_file_stats_merge, + file_stats_merge.data(), + num_file_blobs * sizeof(statistics_merge_group), + cudaMemcpyDefault, + stream.value())); auto file_stat_chunks = stat_chunks.data() + num_stripe_blobs; detail::merge_group_statistics( diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index fb2a34bbcf2..045e5d55ccd 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -150,11 +150,11 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows) // the final offset for a list at level N is the size of it's child int offset = child.type.id() == type_id::LIST ? child.size - 1 : child.size; - cudaMemcpyAsync(static_cast(out_buf.data()) + (out_buf.size - 1), - &offset, - sizeof(offset), - cudaMemcpyDefault, - _stream.value()); + CUDF_CUDA_TRY(cudaMemcpyAsync(static_cast(out_buf.data()) + (out_buf.size - 1), + &offset, + sizeof(offset), + cudaMemcpyDefault, + _stream.value())); out_buf.user_data |= PARQUET_COLUMN_BUFFER_FLAG_LIST_TERMINATED; } } diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index 934ea98c7bb..651830292ea 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -817,10 +817,11 @@ void print_cumulative_page_info(hostdevice_vector& pages, std::vector schemas(pages.size()); std::vector h_page_index(pages.size()); - cudaMemcpy(h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDefault); + CUDF_CUDA_TRY(cudaMemcpy( + h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDefault)); std::vector h_cinfo(pages.size()); - cudaMemcpy( - h_cinfo.data(), c_info.data(), sizeof(cumulative_row_info) * pages.size(), cudaMemcpyDefault); + CUDF_CUDA_TRY(cudaMemcpy( + h_cinfo.data(), c_info.data(), sizeof(cumulative_row_info) * pages.size(), cudaMemcpyDefault)); auto schema_iter = cudf::detail::make_counting_transform_iterator( 0, [&](size_type i) { return pages[h_page_index[i]].src_col_schema; }); thrust::copy(thrust::seq, schema_iter, schema_iter + pages.size(), schemas.begin()); @@ -1107,11 +1108,11 @@ std::vector compute_splits(hostdevice_vector h_c_info_sorted(c_info_sorted.size()); - cudaMemcpy(h_c_info_sorted.data(), - c_info_sorted.data(), - sizeof(cumulative_row_info) * c_info_sorted.size(), - cudaMemcpyDefault); + // std::vector h_c_info_sorted(c_info_sorted.size()); + // CUDF_CUDA_TRY(cudaMemcpy(h_c_info_sorted.data(), + // c_info_sorted.data(), + // sizeof(cumulative_row_info) * c_info_sorted.size(), + // cudaMemcpyDefault)); // print_cumulative_row_info(h_c_info_sorted, "raw"); // generate key offsets (offsets to the start of each partition of keys). worst case is 1 page per @@ -1147,11 +1148,11 @@ std::vector compute_splits(hostdevice_vector h_aggregated_info(aggregated_info.size()); - cudaMemcpyAsync(h_aggregated_info.data(), - aggregated_info.data(), - sizeof(cumulative_row_info) * c_info.size(), - cudaMemcpyDefault, - stream); + CUDF_CUDA_TRY(cudaMemcpyAsync(h_aggregated_info.data(), + aggregated_info.data(), + sizeof(cumulative_row_info) * c_info.size(), + cudaMemcpyDefault, + stream.value())); stream.synchronize(); return find_splits(h_aggregated_info, num_rows, chunk_read_limit); diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu index 1177be6b63f..a0ba3e3ee35 100644 --- a/cpp/src/io/text/multibyte_split.cu +++ b/cpp/src/io/text/multibyte_split.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -312,23 +312,23 @@ namespace detail { void fork_stream(std::vector streams, rmm::cuda_stream_view stream) { cudaEvent_t event; - cudaEventCreate(&event); - cudaEventRecord(event, stream); + CUDF_CUDA_TRY(cudaEventCreate(&event)); + CUDF_CUDA_TRY(cudaEventRecord(event, stream)); for (uint32_t i = 0; i < streams.size(); i++) { - cudaStreamWaitEvent(streams[i], event, 0); + CUDF_CUDA_TRY(cudaStreamWaitEvent(streams[i], event, 0)); } - cudaEventDestroy(event); + CUDF_CUDA_TRY(cudaEventDestroy(event)); } void join_stream(std::vector streams, rmm::cuda_stream_view stream) { cudaEvent_t event; - cudaEventCreate(&event); + CUDF_CUDA_TRY(cudaEventCreate(&event)); for (uint32_t i = 0; i < streams.size(); i++) { - cudaEventRecord(event, streams[i]); - cudaStreamWaitEvent(stream, event, 0); + CUDF_CUDA_TRY(cudaEventRecord(event, streams[i])); + CUDF_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0)); } - cudaEventDestroy(event); + CUDF_CUDA_TRY(cudaEventDestroy(event)); } std::vector get_streams(int32_t count, rmm::cuda_stream_pool& stream_pool) @@ -417,7 +417,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source fork_stream(streams, stream); cudaEvent_t last_launch_event; - cudaEventCreate(&last_launch_event); + CUDF_CUDA_TRY(cudaEventCreate(&last_launch_event)); auto& read_stream = streams[0]; auto& scan_stream = streams[1]; @@ -451,7 +451,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source tile_multistates, tile_offsets); - cudaStreamWaitEvent(scan_stream.value(), last_launch_event); + CUDF_CUDA_TRY(cudaStreamWaitEvent(scan_stream.value(), last_launch_event)); if (delimiter.size() == 1) { // the single-byte case allows for a much more efficient kernel, so we special-case it @@ -525,7 +525,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source char_storage.advance_output(output_size, scan_stream); } - cudaEventRecord(last_launch_event, scan_stream.value()); + CUDF_CUDA_TRY(cudaEventRecord(last_launch_event, scan_stream.value())); std::swap(read_stream, scan_stream); base_tile_idx += tiles_in_launch; @@ -533,7 +533,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source chunk = std::move(next_chunk); } - cudaEventDestroy(last_launch_event); + CUDF_CUDA_TRY(cudaEventDestroy(last_launch_event)); join_stream(streams, stream); diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index 2484a36143a..f1fb50f5340 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -202,11 +202,11 @@ std::future cufile_input_impl::read_async(size_t offset, rmm::cuda_stream_view stream) { int device; - cudaGetDevice(&device); + CUDF_CUDA_TRY(cudaGetDevice(&device)); auto read_slice = [device, gds_read = shim->read, file_handle = cf_file.handle()]( void* dst, size_t size, size_t offset) -> ssize_t { - cudaSetDevice(device); + CUDF_CUDA_TRY(cudaSetDevice(device)); auto read_size = gds_read(file_handle, dst, size, offset, 0); CUDF_EXPECTS(read_size != -1, "cuFile error reading from a file"); return read_size; @@ -234,11 +234,11 @@ cufile_output_impl::cufile_output_impl(std::string const& filepath) std::future cufile_output_impl::write_async(void const* data, size_t offset, size_t size) { int device; - cudaGetDevice(&device); + CUDF_CUDA_TRY(cudaGetDevice(&device)); auto write_slice = [device, gds_write = shim->write, file_handle = cf_file.handle()]( void const* src, size_t size, size_t offset) -> void { - cudaSetDevice(device); + CUDF_CUDA_TRY(cudaSetDevice(device)); auto write_size = gds_write(file_handle, src, size, offset, 0); CUDF_EXPECTS(write_size != -1 and write_size == static_cast(size), "cuFile error writing to a file"); diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu index 66134138a5c..26988622aee 100644 --- a/cpp/src/lists/dremel.cu +++ b/cpp/src/lists/dremel.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -126,7 +126,8 @@ dremel_data get_dremel_data(column_view h_col, std::unique_ptr empty_list_offset_col; if (has_empty_list_offsets) { empty_list_offset_col = make_fixed_width_column(data_type(type_id::INT32), 1); - cudaMemsetAsync(empty_list_offset_col->mutable_view().head(), 0, sizeof(size_type), stream); + CUDF_CUDA_TRY(cudaMemsetAsync( + empty_list_offset_col->mutable_view().head(), 0, sizeof(size_type), stream.value())); std::function normalize_col = [&](column_view const& col) { auto children = [&]() -> std::vector { if (col.type().id() == type_id::LIST) { diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp index 93b53d80e44..5c75e399062 100644 --- a/cpp/tests/io/comp/decomp_test.cpp +++ b/cpp/tests/io/comp/decomp_test.cpp @@ -64,8 +64,8 @@ struct DecompressTest : public cudf::test::BaseFixture { inf_stat.host_to_device(stream); static_cast(this)->dispatch(inf_in, inf_out, inf_stat); - cudaMemcpyAsync( - decompressed->data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value()); + CUDF_CUDA_TRY(cudaMemcpyAsync( + decompressed->data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value())); inf_stat.device_to_host(stream, true); ASSERT_EQ(inf_stat[0].status, cudf::io::compression_status::SUCCESS); } diff --git a/cpp/tests/io/fst/logical_stack_test.cu b/cpp/tests/io/fst/logical_stack_test.cu index f224df4157c..187a041e321 100644 --- a/cpp/tests/io/fst/logical_stack_test.cu +++ b/cpp/tests/io/fst/logical_stack_test.cu @@ -204,17 +204,17 @@ TEST_F(LogicalStackTest, GroundTruth) hostdevice_vector top_of_stack_gpu{string_size, stream_view}; cudf::device_span d_stack_op_idx_span{d_stack_op_indexes}; - cudaMemcpyAsync(d_stack_ops.data(), - stack_symbols.data(), - stack_symbols.size() * sizeof(SymbolT), - cudaMemcpyDefault, - stream.value()); - - cudaMemcpyAsync(d_stack_op_indexes.data(), - stack_op_indexes.data(), - stack_op_indexes.size() * sizeof(SymbolOffsetT), - cudaMemcpyDefault, - stream.value()); + CUDF_CUDA_TRY(cudaMemcpyAsync(d_stack_ops.data(), + stack_symbols.data(), + stack_symbols.size() * sizeof(SymbolT), + cudaMemcpyDefault, + stream.value())); + + CUDF_CUDA_TRY(cudaMemcpyAsync(d_stack_op_indexes.data(), + stack_op_indexes.data(), + stack_op_indexes.size() * sizeof(SymbolOffsetT), + cudaMemcpyDefault, + stream.value())); // Run algorithm fst::sparse_stack_op_to_top_of_stack(d_stack_ops.data(), diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp index 5f192e61ff7..b9733697eb8 100644 --- a/cpp/tests/io/text/data_chunk_source_test.cpp +++ b/cpp/tests/io/text/data_chunk_source_test.cpp @@ -36,7 +36,7 @@ struct DataChunkSourceTest : public BaseFixture { std::string chunk_to_host(const cudf::io::text::device_data_chunk& chunk) { std::string result(chunk.size(), '\0'); - cudaMemcpy(result.data(), chunk.data(), chunk.size(), cudaMemcpyDefault); + CUDF_CUDA_TRY(cudaMemcpy(result.data(), chunk.data(), chunk.size(), cudaMemcpyDefault)); return result; } diff --git a/cpp/tests/utilities/tdigest_utilities.cu b/cpp/tests/utilities/tdigest_utilities.cu index ff2428f0038..15998e32bd0 100644 --- a/cpp/tests/utilities/tdigest_utilities.cu +++ b/cpp/tests/utilities/tdigest_utilities.cu @@ -113,10 +113,10 @@ std::unique_ptr make_expected_tdigest_column(std::vector h_offsets{0, tdigest.mean.size()}; auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, 2, mask_state::UNALLOCATED); - cudaMemcpy(offsets->mutable_view().begin(), - h_offsets.data(), - sizeof(offset_type) * 2, - cudaMemcpyDefault); + CUDF_CUDA_TRY(cudaMemcpy(offsets->mutable_view().begin(), + h_offsets.data(), + sizeof(offset_type) * 2, + cudaMemcpyDefault)); auto list = cudf::make_lists_column(1, std::move(offsets), std::move(tdigests), 0, {});