diff --git a/cpp/benchmarks/synchronization/synchronization.cpp b/cpp/benchmarks/synchronization/synchronization.cpp index bd8a4d1de..a6a552a03 100644 --- a/cpp/benchmarks/synchronization/synchronization.cpp +++ b/cpp/benchmarks/synchronization/synchronization.cpp @@ -16,7 +16,7 @@ #include "synchronization.hpp" -#include +#include #include #include @@ -29,32 +29,33 @@ cuda_event_timer::cuda_event_timer(benchmark::State& state, // flush all of L2$ if (flush_l2_cache) { int current_device = 0; - CUDA_TRY(cudaGetDevice(¤t_device)); + CUSPATIAL_CUDA_TRY(cudaGetDevice(¤t_device)); int l2_cache_bytes = 0; - CUDA_TRY(cudaDeviceGetAttribute(&l2_cache_bytes, cudaDevAttrL2CacheSize, current_device)); + CUSPATIAL_CUDA_TRY( + cudaDeviceGetAttribute(&l2_cache_bytes, cudaDevAttrL2CacheSize, current_device)); if (l2_cache_bytes > 0) { const int memset_value = 0; rmm::device_buffer l2_cache_buffer(l2_cache_bytes, stream); - CUDA_TRY( + CUSPATIAL_CUDA_TRY( cudaMemsetAsync(l2_cache_buffer.data(), memset_value, l2_cache_bytes, stream.value())); } } - CUDA_TRY(cudaEventCreate(&start)); - CUDA_TRY(cudaEventCreate(&stop)); - CUDA_TRY(cudaEventRecord(start, stream.value())); + CUSPATIAL_CUDA_TRY(cudaEventCreate(&start)); + CUSPATIAL_CUDA_TRY(cudaEventCreate(&stop)); + CUSPATIAL_CUDA_TRY(cudaEventRecord(start, stream.value())); } cuda_event_timer::~cuda_event_timer() { - CUDA_TRY(cudaEventRecord(stop, stream.value())); - CUDA_TRY(cudaEventSynchronize(stop)); + CUSPATIAL_CUDA_TRY(cudaEventRecord(stop, stream.value())); + CUSPATIAL_CUDA_TRY(cudaEventSynchronize(stop)); float milliseconds = 0.0f; - CUDA_TRY(cudaEventElapsedTime(&milliseconds, start, stop)); + CUSPATIAL_CUDA_TRY(cudaEventElapsedTime(&milliseconds, start, stop)); p_state->SetIterationTime(milliseconds / (1000.0f)); - CUDA_TRY(cudaEventDestroy(start)); - CUDA_TRY(cudaEventDestroy(stop)); + CUSPATIAL_CUDA_TRY(cudaEventDestroy(start)); + CUSPATIAL_CUDA_TRY(cudaEventDestroy(stop)); } diff --git a/cpp/include/cuspatial/error.hpp b/cpp/include/cuspatial/error.hpp index 1747bb08f..75c3da3b0 100644 --- a/cpp/include/cuspatial/error.hpp +++ b/cpp/include/cuspatial/error.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ #pragma once +#include +#include #include #include @@ -33,6 +35,12 @@ struct logic_error : public std::logic_error { logic_error(std::string const& message) : std::logic_error(message) {} }; +/** + * @brief Exception thrown when a CUDA error is encountered. + */ +struct cuda_error : public std::runtime_error { + cuda_error(std::string const& message) : std::runtime_error(message) {} +}; } // namespace cuspatial #define STRINGIFY_DETAIL(x) #x @@ -78,5 +86,51 @@ struct logic_error : public std::logic_error { namespace cuspatial { namespace detail { +inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int line) +{ + throw cuspatial::cuda_error(std::string{ + "CUDA error encountered at: " + std::string{file} + ":" + std::to_string(line) + ": " + + std::to_string(error) + " " + cudaGetErrorName(error) + " " + cudaGetErrorString(error)}); +} + } // namespace detail } // namespace cuspatial + +/** + * @brief Error checking macro for CUDA runtime API functions. + * + * Invokes a CUDA runtime API function call, if the call does not return + * cudaSuccess, invokes cudaGetLastError() to clear the error and throws an + * exception detailing the CUDA error that occurred + */ +#define CUSPATIAL_CUDA_TRY(call) \ + do { \ + cudaError_t const status = (call); \ + if (cudaSuccess != status) { \ + cudaGetLastError(); \ + cuspatial::detail::throw_cuda_error(status, __FILE__, __LINE__); \ + } \ + } while (0); + +/** + * @brief Debug macro to check for CUDA errors + * + * In a non-release build, this macro will synchronize the specified stream + * before error checking. In both release and non-release builds, this macro + * checks for any pending CUDA errors from previous calls. If an error is + * reported, an exception is thrown detailing the CUDA error that occurred. + * + * The intent of this macro is to provide a mechanism for synchronous and + * deterministic execution for debugging asynchronous CUDA execution. It should + * be used after any asynchronous CUDA call, e.g., cudaMemcpyAsync, or an + * asynchronous kernel launch. + */ +#ifndef NDEBUG +#define CUSPATIAL_CHECK_CUDA(stream) \ + do { \ + CUSPATIAL_CUDA_TRY(cudaStreamSynchronize(stream)); \ + CUSPATIAL_CUDA_TRY(cudaPeekAtLastError()); \ + } while (0); +#else +#define CUSPATIAL_CHECK_CUDA(stream) CUSPATIAL_CUDA_TRY(cudaPeekAtLastError()); +#endif diff --git a/cpp/src/interpolate/cubic_spline.cu b/cpp/src/interpolate/cubic_spline.cu index c213496aa..80c59919a 100644 --- a/cpp/src/interpolate/cubic_spline.cu +++ b/cpp/src/interpolate/cubic_spline.cu @@ -407,7 +407,7 @@ std::unique_ptr cubicspline_coefficients(cudf::column_view const& t // pBuffer: get size of thisu by gtsv2_bufferSizeExt cusparseHandle_t handle; - CUDF_CUDA_TRY(cudaMalloc(&handle, sizeof(cusparseHandle_t))); + CUSPATIAL_CUDA_TRY(cudaMalloc(&handle, sizeof(cusparseHandle_t))); CUSPARSE_TRY(cusparseCreate(&handle)); size_t pBufferSize; diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index ba9924c40..5d60fc1e4 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -248,7 +247,7 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::device_uvector distances(point_x.size(), stream); // Fill distances with 0 - CUDF_CUDA_TRY( + CUSPATIAL_CUDA_TRY( cudaMemsetAsync(distances.data(), 0, distances.size() * sizeof(T), stream.value())); // Reduce the intermediate point/polyline indices to lists of point/polyline index pairs and diff --git a/cpp/src/spatial/hausdorff.cu b/cpp/src/spatial/hausdorff.cu index 6a696ab9b..df9dd6bca 100644 --- a/cpp/src/spatial/hausdorff.cu +++ b/cpp/src/spatial/hausdorff.cu @@ -178,7 +178,7 @@ struct hausdorff_functor { space_offsets.begin(), result_view.begin()); - CUDF_CUDA_TRY(cudaGetLastError()); + CUSPATIAL_CUDA_TRY(cudaGetLastError()); return result; } diff --git a/cpp/src/trajectory/trajectory_bounding_boxes.cu b/cpp/src/trajectory/trajectory_bounding_boxes.cu index 588d9d1fa..467fbf1bf 100644 --- a/cpp/src/trajectory/trajectory_bounding_boxes.cu +++ b/cpp/src/trajectory/trajectory_bounding_boxes.cu @@ -97,7 +97,7 @@ struct dispatch_element { }); // check for errors - CUDF_CHECK_CUDA(stream.value()); + CUSPATIAL_CHECK_CUDA(stream.value()); return std::make_unique(std::move(cols)); } diff --git a/cpp/src/trajectory/trajectory_distances_and_speeds.cu b/cpp/src/trajectory/trajectory_distances_and_speeds.cu index 206c05cb5..509b713bf 100644 --- a/cpp/src/trajectory/trajectory_distances_and_speeds.cu +++ b/cpp/src/trajectory/trajectory_distances_and_speeds.cu @@ -166,7 +166,7 @@ struct dispatch_timestamp { }); // check for errors - CUDF_CHECK_CUDA(stream.value()); + CUSPATIAL_CHECK_CUDA(stream.value()); return std::make_unique(std::move(cols)); }