Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Guard CUDA runtime APIs with error checking #12531

Merged
merged 13 commits into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/benchmarks/io/json/nested_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ auto make_test_json_data(cudf::size_type string_size, rmm::cuda_stream_view stre
auto d_scalar = cudf::strings::repeat_string(d_string_scalar, repeat_times);

auto data = const_cast<char*>(d_scalar->data());
cudaMemsetAsync(data, '[', 1, stream.value());
cudaMemsetAsync(data + d_scalar->size() - 1, ']', 1, stream.value());
CUDF_CUDA_TRY(cudaMemsetAsync(data, '[', 1, stream.value()));
CUDF_CUDA_TRY(cudaMemsetAsync(data + d_scalar->size() - 1, ']', 1, stream.value()));

return d_scalar;
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/io/text/multibyte_split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ static void bench_multibyte_split(nvbench::state& state,
}
if (source_type == data_chunk_source_type::host_pinned) {
host_pinned_input.resize(static_cast<std::size_t>(device_input.size()));
cudaMemcpy(
host_pinned_input.data(), device_input.data(), host_pinned_input.size(), cudaMemcpyDefault);
CUDF_CUDA_TRY(cudaMemcpy(
host_pinned_input.data(), device_input.data(), host_pinned_input.size(), cudaMemcpyDefault));
}

auto source = [&] {
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/json.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -198,7 +198,7 @@ void BM_case(benchmark::State& state, std::string query_arg)
for (auto _ : state) {
cuda_event_timer raii(state, true);
auto result = cudf::strings::get_json_object(scv, json_path);
cudaStreamSynchronize(0);
CUDF_CUDA_TRY(cudaStreamSynchronize(0));
}

// this isn't strictly 100% accurate. a given query isn't necessarily
Expand Down
2 changes: 1 addition & 1 deletion cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,7 @@ thrown exception includes a description of the CUDA error code in its `what()` m
Example:

```c++
CUDA_TRY( cudaMemcpy(&dst, &src, num_bytes) );
CUDF_CUDA_TRY( cudaMemcpy(&dst, &src, num_bytes) );
```

## Compile-Time Conditions
Expand Down
20 changes: 10 additions & 10 deletions cpp/src/io/json/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -166,18 +166,18 @@ std::vector<std::string> create_key_strings(char const* h_data,
{
auto const num_cols = sorted_info.num_rows();
std::vector<uint64_t> h_offsets(num_cols);
cudaMemcpyAsync(h_offsets.data(),
sorted_info.column(0).data<uint64_t>(),
sizeof(uint64_t) * num_cols,
cudaMemcpyDefault,
stream.value());
CUDF_CUDA_TRY(cudaMemcpyAsync(h_offsets.data(),
sorted_info.column(0).data<uint64_t>(),
sizeof(uint64_t) * num_cols,
cudaMemcpyDefault,
stream.value()));

std::vector<uint16_t> h_lens(num_cols);
cudaMemcpyAsync(h_lens.data(),
sorted_info.column(1).data<uint16_t>(),
sizeof(uint16_t) * num_cols,
cudaMemcpyDefault,
stream.value());
CUDF_CUDA_TRY(cudaMemcpyAsync(h_lens.data(),
sorted_info.column(1).data<uint16_t>(),
sizeof(uint16_t) * num_cols,
cudaMemcpyDefault,
stream.value()));

std::vector<std::string> names(num_cols);
std::transform(h_offsets.cbegin(),
Expand Down
32 changes: 17 additions & 15 deletions cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1322,16 +1322,18 @@ writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs(
auto const chunk_bytes = stripes_per_col * sizeof(statistics_chunk);
auto const merge_bytes = stripes_per_col * sizeof(statistics_merge_group);
for (size_t col = 0; col < num_columns; ++col) {
cudaMemcpyAsync(stat_chunks.data() + (num_stripes * col) + num_entries_seen,
per_chunk_stats.stripe_stat_chunks[i].data() + col * stripes_per_col,
chunk_bytes,
cudaMemcpyDefault,
stream);
cudaMemcpyAsync(stats_merge.device_ptr() + (num_stripes * col) + num_entries_seen,
per_chunk_stats.stripe_stat_merge[i].device_ptr() + col * stripes_per_col,
merge_bytes,
cudaMemcpyDefault,
stream);
CUDF_CUDA_TRY(
cudaMemcpyAsync(stat_chunks.data() + (num_stripes * col) + num_entries_seen,
per_chunk_stats.stripe_stat_chunks[i].data() + col * stripes_per_col,
chunk_bytes,
cudaMemcpyDefault,
stream.value()));
CUDF_CUDA_TRY(
cudaMemcpyAsync(stats_merge.device_ptr() + (num_stripes * col) + num_entries_seen,
per_chunk_stats.stripe_stat_merge[i].device_ptr() + col * stripes_per_col,
merge_bytes,
cudaMemcpyDefault,
stream.value()));
}
num_entries_seen += stripes_per_col;
}
Expand All @@ -1346,11 +1348,11 @@ writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs(
}

auto d_file_stats_merge = stats_merge.device_ptr(num_stripe_blobs);
cudaMemcpyAsync(d_file_stats_merge,
file_stats_merge.data(),
num_file_blobs * sizeof(statistics_merge_group),
cudaMemcpyDefault,
stream);
CUDF_CUDA_TRY(cudaMemcpyAsync(d_file_stats_merge,
file_stats_merge.data(),
num_file_blobs * sizeof(statistics_merge_group),
cudaMemcpyDefault,
stream.value()));

auto file_stat_chunks = stat_chunks.data() + num_stripe_blobs;
detail::merge_group_statistics<detail::io_file_format::ORC>(
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/io/parquet/reader_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,11 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows)

// the final offset for a list at level N is the size of it's child
int offset = child.type.id() == type_id::LIST ? child.size - 1 : child.size;
cudaMemcpyAsync(static_cast<int32_t*>(out_buf.data()) + (out_buf.size - 1),
&offset,
sizeof(offset),
cudaMemcpyDefault,
_stream.value());
CUDF_CUDA_TRY(cudaMemcpyAsync(static_cast<int32_t*>(out_buf.data()) + (out_buf.size - 1),
&offset,
sizeof(offset),
cudaMemcpyDefault,
_stream.value()));
out_buf.user_data |= PARQUET_COLUMN_BUFFER_FLAG_LIST_TERMINATED;
}
}
Expand Down
27 changes: 14 additions & 13 deletions cpp/src/io/parquet/reader_impl_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -817,10 +817,11 @@ void print_cumulative_page_info(hostdevice_vector<gpu::PageInfo>& pages,

std::vector<int> schemas(pages.size());
std::vector<int> h_page_index(pages.size());
cudaMemcpy(h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDefault);
CUDF_CUDA_TRY(cudaMemcpy(
h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDefault));
std::vector<cumulative_row_info> h_cinfo(pages.size());
cudaMemcpy(
h_cinfo.data(), c_info.data(), sizeof(cumulative_row_info) * pages.size(), cudaMemcpyDefault);
CUDF_CUDA_TRY(cudaMemcpy(
h_cinfo.data(), c_info.data(), sizeof(cumulative_row_info) * pages.size(), cudaMemcpyDefault));
auto schema_iter = cudf::detail::make_counting_transform_iterator(
0, [&](size_type i) { return pages[h_page_index[i]].src_col_schema; });
thrust::copy(thrust::seq, schema_iter, schema_iter + pages.size(), schemas.begin());
Expand Down Expand Up @@ -1107,11 +1108,11 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo
return a.row_count < b.row_count;
});

std::vector<cumulative_row_info> h_c_info_sorted(c_info_sorted.size());
cudaMemcpy(h_c_info_sorted.data(),
c_info_sorted.data(),
sizeof(cumulative_row_info) * c_info_sorted.size(),
cudaMemcpyDefault);
// std::vector<cumulative_row_info> h_c_info_sorted(c_info_sorted.size());
// CUDF_CUDA_TRY(cudaMemcpy(h_c_info_sorted.data(),
// c_info_sorted.data(),
// sizeof(cumulative_row_info) * c_info_sorted.size(),
// cudaMemcpyDefault));
// print_cumulative_row_info(h_c_info_sorted, "raw");

// generate key offsets (offsets to the start of each partition of keys). worst case is 1 page per
Expand Down Expand Up @@ -1147,11 +1148,11 @@ std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo

// bring back to the cpu
std::vector<cumulative_row_info> h_aggregated_info(aggregated_info.size());
cudaMemcpyAsync(h_aggregated_info.data(),
aggregated_info.data(),
sizeof(cumulative_row_info) * c_info.size(),
cudaMemcpyDefault,
stream);
CUDF_CUDA_TRY(cudaMemcpyAsync(h_aggregated_info.data(),
aggregated_info.data(),
sizeof(cumulative_row_info) * c_info.size(),
cudaMemcpyDefault,
stream.value()));
stream.synchronize();

return find_splits(h_aggregated_info, num_rows, chunk_read_limit);
Expand Down
26 changes: 13 additions & 13 deletions cpp/src/io/text/multibyte_split.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -312,23 +312,23 @@ namespace detail {
void fork_stream(std::vector<rmm::cuda_stream_view> streams, rmm::cuda_stream_view stream)
{
cudaEvent_t event;
cudaEventCreate(&event);
cudaEventRecord(event, stream);
CUDF_CUDA_TRY(cudaEventCreate(&event));
CUDF_CUDA_TRY(cudaEventRecord(event, stream));
for (uint32_t i = 0; i < streams.size(); i++) {
cudaStreamWaitEvent(streams[i], event, 0);
CUDF_CUDA_TRY(cudaStreamWaitEvent(streams[i], event, 0));
}
cudaEventDestroy(event);
CUDF_CUDA_TRY(cudaEventDestroy(event));
}

void join_stream(std::vector<rmm::cuda_stream_view> streams, rmm::cuda_stream_view stream)
{
cudaEvent_t event;
cudaEventCreate(&event);
CUDF_CUDA_TRY(cudaEventCreate(&event));
for (uint32_t i = 0; i < streams.size(); i++) {
cudaEventRecord(event, streams[i]);
cudaStreamWaitEvent(stream, event, 0);
CUDF_CUDA_TRY(cudaEventRecord(event, streams[i]));
CUDF_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0));
}
cudaEventDestroy(event);
CUDF_CUDA_TRY(cudaEventDestroy(event));
}

std::vector<rmm::cuda_stream_view> get_streams(int32_t count, rmm::cuda_stream_pool& stream_pool)
Expand Down Expand Up @@ -417,7 +417,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
fork_stream(streams, stream);

cudaEvent_t last_launch_event;
cudaEventCreate(&last_launch_event);
CUDF_CUDA_TRY(cudaEventCreate(&last_launch_event));

auto& read_stream = streams[0];
auto& scan_stream = streams[1];
Expand Down Expand Up @@ -451,7 +451,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
tile_multistates,
tile_offsets);

cudaStreamWaitEvent(scan_stream.value(), last_launch_event);
CUDF_CUDA_TRY(cudaStreamWaitEvent(scan_stream.value(), last_launch_event));

if (delimiter.size() == 1) {
// the single-byte case allows for a much more efficient kernel, so we special-case it
Expand Down Expand Up @@ -525,15 +525,15 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
char_storage.advance_output(output_size, scan_stream);
}

cudaEventRecord(last_launch_event, scan_stream.value());
CUDF_CUDA_TRY(cudaEventRecord(last_launch_event, scan_stream.value()));

std::swap(read_stream, scan_stream);
base_tile_idx += tiles_in_launch;
chunk_offset += chunk->size();
chunk = std::move(next_chunk);
}

cudaEventDestroy(last_launch_event);
CUDF_CUDA_TRY(cudaEventDestroy(last_launch_event));

join_stream(streams, stream);

Expand Down
10 changes: 5 additions & 5 deletions cpp/src/io/utilities/file_io_utilities.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -202,11 +202,11 @@ std::future<size_t> cufile_input_impl::read_async(size_t offset,
rmm::cuda_stream_view stream)
{
int device;
cudaGetDevice(&device);
CUDF_CUDA_TRY(cudaGetDevice(&device));

auto read_slice = [device, gds_read = shim->read, file_handle = cf_file.handle()](
void* dst, size_t size, size_t offset) -> ssize_t {
cudaSetDevice(device);
CUDF_CUDA_TRY(cudaSetDevice(device));
auto read_size = gds_read(file_handle, dst, size, offset, 0);
CUDF_EXPECTS(read_size != -1, "cuFile error reading from a file");
return read_size;
Expand Down Expand Up @@ -234,11 +234,11 @@ cufile_output_impl::cufile_output_impl(std::string const& filepath)
std::future<void> cufile_output_impl::write_async(void const* data, size_t offset, size_t size)
{
int device;
cudaGetDevice(&device);
CUDF_CUDA_TRY(cudaGetDevice(&device));

auto write_slice = [device, gds_write = shim->write, file_handle = cf_file.handle()](
void const* src, size_t size, size_t offset) -> void {
cudaSetDevice(device);
CUDF_CUDA_TRY(cudaSetDevice(device));
auto write_size = gds_write(file_handle, src, size, offset, 0);
CUDF_EXPECTS(write_size != -1 and write_size == static_cast<decltype(write_size)>(size),
"cuFile error writing to a file");
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/lists/dremel.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -126,7 +126,8 @@ dremel_data get_dremel_data(column_view h_col,
std::unique_ptr<column> empty_list_offset_col;
if (has_empty_list_offsets) {
empty_list_offset_col = make_fixed_width_column(data_type(type_id::INT32), 1);
cudaMemsetAsync(empty_list_offset_col->mutable_view().head(), 0, sizeof(size_type), stream);
CUDF_CUDA_TRY(cudaMemsetAsync(
empty_list_offset_col->mutable_view().head(), 0, sizeof(size_type), stream.value()));
std::function<column_view(column_view const&)> normalize_col = [&](column_view const& col) {
auto children = [&]() -> std::vector<column_view> {
if (col.type().id() == type_id::LIST) {
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/io/comp/decomp_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ struct DecompressTest : public cudf::test::BaseFixture {
inf_stat.host_to_device(stream);

static_cast<Decompressor*>(this)->dispatch(inf_in, inf_out, inf_stat);
cudaMemcpyAsync(
decompressed->data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value());
CUDF_CUDA_TRY(cudaMemcpyAsync(
decompressed->data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value()));
inf_stat.device_to_host(stream, true);
ASSERT_EQ(inf_stat[0].status, cudf::io::compression_status::SUCCESS);
}
Expand Down
22 changes: 11 additions & 11 deletions cpp/tests/io/fst/logical_stack_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -204,17 +204,17 @@ TEST_F(LogicalStackTest, GroundTruth)
hostdevice_vector<SymbolT> top_of_stack_gpu{string_size, stream_view};
cudf::device_span<SymbolOffsetT> d_stack_op_idx_span{d_stack_op_indexes};

cudaMemcpyAsync(d_stack_ops.data(),
stack_symbols.data(),
stack_symbols.size() * sizeof(SymbolT),
cudaMemcpyDefault,
stream.value());

cudaMemcpyAsync(d_stack_op_indexes.data(),
stack_op_indexes.data(),
stack_op_indexes.size() * sizeof(SymbolOffsetT),
cudaMemcpyDefault,
stream.value());
CUDF_CUDA_TRY(cudaMemcpyAsync(d_stack_ops.data(),
stack_symbols.data(),
stack_symbols.size() * sizeof(SymbolT),
cudaMemcpyDefault,
stream.value()));

CUDF_CUDA_TRY(cudaMemcpyAsync(d_stack_op_indexes.data(),
stack_op_indexes.data(),
stack_op_indexes.size() * sizeof(SymbolOffsetT),
cudaMemcpyDefault,
stream.value()));

// Run algorithm
fst::sparse_stack_op_to_top_of_stack<StackLevelT>(d_stack_ops.data(),
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/io/text/data_chunk_source_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct DataChunkSourceTest : public BaseFixture {
std::string chunk_to_host(const cudf::io::text::device_data_chunk& chunk)
{
std::string result(chunk.size(), '\0');
cudaMemcpy(result.data(), chunk.data(), chunk.size(), cudaMemcpyDefault);
CUDF_CUDA_TRY(cudaMemcpy(result.data(), chunk.data(), chunk.size(), cudaMemcpyDefault));
return result;
}

Expand Down
8 changes: 4 additions & 4 deletions cpp/tests/utilities/tdigest_utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@ std::unique_ptr<column> make_expected_tdigest_column(std::vector<expected_tdiges
std::vector<offset_type> h_offsets{0, tdigest.mean.size()};
auto offsets =
cudf::make_fixed_width_column(data_type{type_id::INT32}, 2, mask_state::UNALLOCATED);
cudaMemcpy(offsets->mutable_view().begin<offset_type>(),
h_offsets.data(),
sizeof(offset_type) * 2,
cudaMemcpyDefault);
CUDF_CUDA_TRY(cudaMemcpy(offsets->mutable_view().begin<offset_type>(),
h_offsets.data(),
sizeof(offset_type) * 2,
cudaMemcpyDefault));

auto list = cudf::make_lists_column(1, std::move(offsets), std::move(tdigests), 0, {});

Expand Down