Skip to content

Commit

Permalink
Guard CUDA runtime APIs with error checking (#12531)
Browse files Browse the repository at this point in the history
This PR adds missing error checking for CUDA runtime APIs in libcudf.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: #12531
  • Loading branch information
PointKernel authored Jan 23, 2023
1 parent 37fa3c2 commit 27a4aa8
Show file tree
Hide file tree
Showing 15 changed files with 92 additions and 88 deletions.
4 changes: 2 additions & 2 deletions cpp/benchmarks/io/json/nested_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ auto make_test_json_data(cudf::size_type string_size, rmm::cuda_stream_view stre
auto d_scalar = cudf::strings::repeat_string(d_string_scalar, repeat_times);

auto data = const_cast<char*>(d_scalar->data());
cudaMemsetAsync(data, '[', 1, stream.value());
cudaMemsetAsync(data + d_scalar->size() - 1, ']', 1, stream.value());
CUDF_CUDA_TRY(cudaMemsetAsync(data, '[', 1, stream.value()));
CUDF_CUDA_TRY(cudaMemsetAsync(data + d_scalar->size() - 1, ']', 1, stream.value()));

return d_scalar;
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/io/text/multibyte_split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ static void bench_multibyte_split(nvbench::state& state,
}
if (source_type == data_chunk_source_type::host_pinned) {
host_pinned_input.resize(static_cast<std::size_t>(device_input.size()));
cudaMemcpy(
host_pinned_input.data(), device_input.data(), host_pinned_input.size(), cudaMemcpyDefault);
CUDF_CUDA_TRY(cudaMemcpy(
host_pinned_input.data(), device_input.data(), host_pinned_input.size(), cudaMemcpyDefault));
}

auto source = [&] {
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/json.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -198,7 +198,7 @@ void BM_case(benchmark::State& state, std::string query_arg)
for (auto _ : state) {
cuda_event_timer raii(state, true);
auto result = cudf::strings::get_json_object(scv, json_path);
cudaStreamSynchronize(0);
CUDF_CUDA_TRY(cudaStreamSynchronize(0));
}

// this isn't strictly 100% accurate. a given query isn't necessarily
Expand Down
2 changes: 1 addition & 1 deletion cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,7 @@ thrown exception includes a description of the CUDA error code in its `what()` m
Example:

```c++
CUDA_TRY( cudaMemcpy(&dst, &src, num_bytes) );
CUDF_CUDA_TRY( cudaMemcpy(&dst, &src, num_bytes) );
```
## Compile-Time Conditions
Expand Down
20 changes: 10 additions & 10 deletions cpp/src/io/json/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -166,18 +166,18 @@ std::vector<std::string> create_key_strings(char const* h_data,
{
auto const num_cols = sorted_info.num_rows();
std::vector<uint64_t> h_offsets(num_cols);
cudaMemcpyAsync(h_offsets.data(),
sorted_info.column(0).data<uint64_t>(),
sizeof(uint64_t) * num_cols,
cudaMemcpyDefault,
stream.value());
CUDF_CUDA_TRY(cudaMemcpyAsync(h_offsets.data(),
sorted_info.column(0).data<uint64_t>(),
sizeof(uint64_t) * num_cols,
cudaMemcpyDefault,
stream.value()));

std::vector<uint16_t> h_lens(num_cols);
cudaMemcpyAsync(h_lens.data(),
sorted_info.column(1).data<uint16_t>(),
sizeof(uint16_t) * num_cols,
cudaMemcpyDefault,
stream.value());
CUDF_CUDA_TRY(cudaMemcpyAsync(h_lens.data(),
sorted_info.column(1).data<uint16_t>(),
sizeof(uint16_t) * num_cols,
cudaMemcpyDefault,
stream.value()));

std::vector<std::string> names(num_cols);
std::transform(h_offsets.cbegin(),
Expand Down
32 changes: 17 additions & 15 deletions cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1322,16 +1322,18 @@ writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs(
auto const chunk_bytes = stripes_per_col * sizeof(statistics_chunk);
auto const merge_bytes = stripes_per_col * sizeof(statistics_merge_group);
for (size_t col = 0; col < num_columns; ++col) {
cudaMemcpyAsync(stat_chunks.data() + (num_stripes * col) + num_entries_seen,
per_chunk_stats.stripe_stat_chunks[i].data() + col * stripes_per_col,
chunk_bytes,
cudaMemcpyDefault,
stream);
cudaMemcpyAsync(stats_merge.device_ptr() + (num_stripes * col) + num_entries_seen,
per_chunk_stats.stripe_stat_merge[i].device_ptr() + col * stripes_per_col,
merge_bytes,
cudaMemcpyDefault,
stream);
CUDF_CUDA_TRY(
cudaMemcpyAsync(stat_chunks.data() + (num_stripes * col) + num_entries_seen,
per_chunk_stats.stripe_stat_chunks[i].data() + col * stripes_per_col,
chunk_bytes,
cudaMemcpyDefault,
stream.value()));
CUDF_CUDA_TRY(
cudaMemcpyAsync(stats_merge.device_ptr() + (num_stripes * col) + num_entries_seen,
per_chunk_stats.stripe_stat_merge[i].device_ptr() + col * stripes_per_col,
merge_bytes,
cudaMemcpyDefault,
stream.value()));
}
num_entries_seen += stripes_per_col;
}
Expand All @@ -1346,11 +1348,11 @@ writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs(
}

auto d_file_stats_merge = stats_merge.device_ptr(num_stripe_blobs);
cudaMemcpyAsync(d_file_stats_merge,
file_stats_merge.data(),
num_file_blobs * sizeof(statistics_merge_group),
cudaMemcpyDefault,
stream);
CUDF_CUDA_TRY(cudaMemcpyAsync(d_file_stats_merge,
file_stats_merge.data(),
num_file_blobs * sizeof(statistics_merge_group),
cudaMemcpyDefault,
stream.value()));

auto file_stat_chunks = stat_chunks.data() + num_stripe_blobs;
detail::merge_group_statistics<detail::io_file_format::ORC>(
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/io/parquet/reader_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,11 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows)

// the final offset for a list at level N is the size of it's child
int offset = child.type.id() == type_id::LIST ? child.size - 1 : child.size;
cudaMemcpyAsync(static_cast<int32_t*>(out_buf.data()) + (out_buf.size - 1),
&offset,
sizeof(offset),
cudaMemcpyDefault,
_stream.value());
CUDF_CUDA_TRY(cudaMemcpyAsync(static_cast<int32_t*>(out_buf.data()) + (out_buf.size - 1),
&offset,
sizeof(offset),
cudaMemcpyDefault,
_stream.value()));
out_buf.user_data |= PARQUET_COLUMN_BUFFER_FLAG_LIST_TERMINATED;
}
}
Expand Down
27 changes: 14 additions & 13 deletions cpp/src/io/parquet/reader_impl_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -817,10 +817,11 @@ void print_cumulative_page_info(hostdevice_vector<gpu::PageInfo>& pages,

std::vector<int> schemas(pages.size());
std::vector<int> h_page_index(pages.size());
cudaMemcpy(h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDefault);
CUDF_CUDA_TRY(cudaMemcpy(
h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDefault));
std::vector<cumulative_row_info> h_cinfo(pages.size());
cudaMemcpy(
h_cinfo.data(), c_info.data(), sizeof(cumulative_row_info) * pages.size(), cudaMemcpyDefault);
CUDF_CUDA_TRY(cudaMemcpy(
h_cinfo.data(), c_info.data(), sizeof(cumulative_row_info) * pages.size(), cudaMemcpyDefault));
auto schema_iter = cudf::detail::make_counting_transform_iterator(
0, [&](size_type i) { return pages[h_page_index[i]].src_col_schema; });
thrust::copy(thrust::seq, schema_iter, schema_iter + pages.size(), schemas.begin());
Expand Down Expand Up @@ -1107,11 +1108,11 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
return a.row_count < b.row_count;
});

std::vector<cumulative_row_info> h_c_info_sorted(c_info_sorted.size());
cudaMemcpy(h_c_info_sorted.data(),
c_info_sorted.data(),
sizeof(cumulative_row_info) * c_info_sorted.size(),
cudaMemcpyDefault);
// std::vector<cumulative_row_info> h_c_info_sorted(c_info_sorted.size());
// CUDF_CUDA_TRY(cudaMemcpy(h_c_info_sorted.data(),
// c_info_sorted.data(),
// sizeof(cumulative_row_info) * c_info_sorted.size(),
// cudaMemcpyDefault));
// print_cumulative_row_info(h_c_info_sorted, "raw");

// generate key offsets (offsets to the start of each partition of keys). worst case is 1 page per
Expand Down Expand Up @@ -1147,11 +1148,11 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo

// bring back to the cpu
std::vector<cumulative_row_info> h_aggregated_info(aggregated_info.size());
cudaMemcpyAsync(h_aggregated_info.data(),
aggregated_info.data(),
sizeof(cumulative_row_info) * c_info.size(),
cudaMemcpyDefault,
stream);
CUDF_CUDA_TRY(cudaMemcpyAsync(h_aggregated_info.data(),
aggregated_info.data(),
sizeof(cumulative_row_info) * c_info.size(),
cudaMemcpyDefault,
stream.value()));
stream.synchronize();

return find_splits(h_aggregated_info, num_rows, chunk_read_limit);
Expand Down
26 changes: 13 additions & 13 deletions cpp/src/io/text/multibyte_split.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -312,23 +312,23 @@ namespace detail {
void fork_stream(std::vector<rmm::cuda_stream_view> streams, rmm::cuda_stream_view stream)
{
cudaEvent_t event;
cudaEventCreate(&event);
cudaEventRecord(event, stream);
CUDF_CUDA_TRY(cudaEventCreate(&event));
CUDF_CUDA_TRY(cudaEventRecord(event, stream));
for (uint32_t i = 0; i < streams.size(); i++) {
cudaStreamWaitEvent(streams[i], event, 0);
CUDF_CUDA_TRY(cudaStreamWaitEvent(streams[i], event, 0));
}
cudaEventDestroy(event);
CUDF_CUDA_TRY(cudaEventDestroy(event));
}

void join_stream(std::vector<rmm::cuda_stream_view> streams, rmm::cuda_stream_view stream)
{
cudaEvent_t event;
cudaEventCreate(&event);
CUDF_CUDA_TRY(cudaEventCreate(&event));
for (uint32_t i = 0; i < streams.size(); i++) {
cudaEventRecord(event, streams[i]);
cudaStreamWaitEvent(stream, event, 0);
CUDF_CUDA_TRY(cudaEventRecord(event, streams[i]));
CUDF_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0));
}
cudaEventDestroy(event);
CUDF_CUDA_TRY(cudaEventDestroy(event));
}

std::vector<rmm::cuda_stream_view> get_streams(int32_t count, rmm::cuda_stream_pool& stream_pool)
Expand Down Expand Up @@ -417,7 +417,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
fork_stream(streams, stream);

cudaEvent_t last_launch_event;
cudaEventCreate(&last_launch_event);
CUDF_CUDA_TRY(cudaEventCreate(&last_launch_event));

auto& read_stream = streams[0];
auto& scan_stream = streams[1];
Expand Down Expand Up @@ -451,7 +451,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
tile_multistates,
tile_offsets);

cudaStreamWaitEvent(scan_stream.value(), last_launch_event);
CUDF_CUDA_TRY(cudaStreamWaitEvent(scan_stream.value(), last_launch_event));

if (delimiter.size() == 1) {
// the single-byte case allows for a much more efficient kernel, so we special-case it
Expand Down Expand Up @@ -525,15 +525,15 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
char_storage.advance_output(output_size, scan_stream);
}

cudaEventRecord(last_launch_event, scan_stream.value());
CUDF_CUDA_TRY(cudaEventRecord(last_launch_event, scan_stream.value()));

std::swap(read_stream, scan_stream);
base_tile_idx += tiles_in_launch;
chunk_offset += chunk->size();
chunk = std::move(next_chunk);
}

cudaEventDestroy(last_launch_event);
CUDF_CUDA_TRY(cudaEventDestroy(last_launch_event));

join_stream(streams, stream);

Expand Down
10 changes: 5 additions & 5 deletions cpp/src/io/utilities/file_io_utilities.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -202,11 +202,11 @@ std::future<size_t> cufile_input_impl::read_async(size_t offset,
rmm::cuda_stream_view stream)
{
int device;
cudaGetDevice(&device);
CUDF_CUDA_TRY(cudaGetDevice(&device));

auto read_slice = [device, gds_read = shim->read, file_handle = cf_file.handle()](
void* dst, size_t size, size_t offset) -> ssize_t {
cudaSetDevice(device);
CUDF_CUDA_TRY(cudaSetDevice(device));
auto read_size = gds_read(file_handle, dst, size, offset, 0);
CUDF_EXPECTS(read_size != -1, "cuFile error reading from a file");
return read_size;
Expand Down Expand Up @@ -234,11 +234,11 @@ cufile_output_impl::cufile_output_impl(std::string const& filepath)
std::future<void> cufile_output_impl::write_async(void const* data, size_t offset, size_t size)
{
int device;
cudaGetDevice(&device);
CUDF_CUDA_TRY(cudaGetDevice(&device));

auto write_slice = [device, gds_write = shim->write, file_handle = cf_file.handle()](
void const* src, size_t size, size_t offset) -> void {
cudaSetDevice(device);
CUDF_CUDA_TRY(cudaSetDevice(device));
auto write_size = gds_write(file_handle, src, size, offset, 0);
CUDF_EXPECTS(write_size != -1 and write_size == static_cast<decltype(write_size)>(size),
"cuFile error writing to a file");
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/lists/dremel.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -126,7 +126,8 @@ dremel_data get_dremel_data(column_view h_col,
std::unique_ptr<column> empty_list_offset_col;
if (has_empty_list_offsets) {
empty_list_offset_col = make_fixed_width_column(data_type(type_id::INT32), 1);
cudaMemsetAsync(empty_list_offset_col->mutable_view().head(), 0, sizeof(size_type), stream);
CUDF_CUDA_TRY(cudaMemsetAsync(
empty_list_offset_col->mutable_view().head(), 0, sizeof(size_type), stream.value()));
std::function<column_view(column_view const&)> normalize_col = [&](column_view const& col) {
auto children = [&]() -> std::vector<column_view> {
if (col.type().id() == type_id::LIST) {
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/io/comp/decomp_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ struct DecompressTest : public cudf::test::BaseFixture {
inf_stat.host_to_device(stream);

static_cast<Decompressor*>(this)->dispatch(inf_in, inf_out, inf_stat);
cudaMemcpyAsync(
decompressed->data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value());
CUDF_CUDA_TRY(cudaMemcpyAsync(
decompressed->data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value()));
inf_stat.device_to_host(stream, true);
ASSERT_EQ(inf_stat[0].status, cudf::io::compression_status::SUCCESS);
}
Expand Down
22 changes: 11 additions & 11 deletions cpp/tests/io/fst/logical_stack_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -204,17 +204,17 @@ TEST_F(LogicalStackTest, GroundTruth)
hostdevice_vector<SymbolT> top_of_stack_gpu{string_size, stream_view};
cudf::device_span<SymbolOffsetT> d_stack_op_idx_span{d_stack_op_indexes};

cudaMemcpyAsync(d_stack_ops.data(),
stack_symbols.data(),
stack_symbols.size() * sizeof(SymbolT),
cudaMemcpyDefault,
stream.value());

cudaMemcpyAsync(d_stack_op_indexes.data(),
stack_op_indexes.data(),
stack_op_indexes.size() * sizeof(SymbolOffsetT),
cudaMemcpyDefault,
stream.value());
CUDF_CUDA_TRY(cudaMemcpyAsync(d_stack_ops.data(),
stack_symbols.data(),
stack_symbols.size() * sizeof(SymbolT),
cudaMemcpyDefault,
stream.value()));

CUDF_CUDA_TRY(cudaMemcpyAsync(d_stack_op_indexes.data(),
stack_op_indexes.data(),
stack_op_indexes.size() * sizeof(SymbolOffsetT),
cudaMemcpyDefault,
stream.value()));

// Run algorithm
fst::sparse_stack_op_to_top_of_stack<StackLevelT>(d_stack_ops.data(),
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/io/text/data_chunk_source_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct DataChunkSourceTest : public BaseFixture {
std::string chunk_to_host(const cudf::io::text::device_data_chunk& chunk)
{
std::string result(chunk.size(), '\0');
cudaMemcpy(result.data(), chunk.data(), chunk.size(), cudaMemcpyDefault);
CUDF_CUDA_TRY(cudaMemcpy(result.data(), chunk.data(), chunk.size(), cudaMemcpyDefault));
return result;
}

Expand Down
8 changes: 4 additions & 4 deletions cpp/tests/utilities/tdigest_utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@ std::unique_ptr<column> make_expected_tdigest_column(std::vector<expected_tdiges
std::vector<offset_type> h_offsets{0, tdigest.mean.size()};
auto offsets =
cudf::make_fixed_width_column(data_type{type_id::INT32}, 2, mask_state::UNALLOCATED);
cudaMemcpy(offsets->mutable_view().begin<offset_type>(),
h_offsets.data(),
sizeof(offset_type) * 2,
cudaMemcpyDefault);
CUDF_CUDA_TRY(cudaMemcpy(offsets->mutable_view().begin<offset_type>(),
h_offsets.data(),
sizeof(offset_type) * 2,
cudaMemcpyDefault));

auto list = cudf::make_lists_column(1, std::move(offsets), std::move(tdigests), 0, {});

Expand Down

0 comments on commit 27a4aa8

Please sign in to comment.