diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index a4e73c86e0e..3c4372092e0 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -150,6 +150,7 @@ outputs: - test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp - test -f $PREFIX/include/cudf/detail/utilities/logger.hpp - test -f $PREFIX/include/cudf/detail/utilities/pinned_host_vector.hpp + - test -f $PREFIX/include/cudf/detail/utilities/stacktrace.hpp - test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp - test -f $PREFIX/include/cudf/detail/utilities/visitor_overload.hpp - test -f $PREFIX/include/cudf/dictionary/detail/concatenate.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c50ccbc4775..27985e9d3c2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -62,11 +62,18 @@ option( stream to external libraries." OFF ) +# Option to add all symbols to the dynamic symbol table in the library file, allowing to retrieve +# human-readable stacktrace for debugging. +option( + CUDF_BUILD_STACKTRACE_DEBUG + "Replace the current optimization flags by the options '-rdynamic -Og -NDEBUG', useful for debugging with stacktrace retrieval" + OFF +) option(DISABLE_DEPRECATION_WARNINGS "Disable warnings generated from deprecated declarations." OFF) # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking option(CUDA_ENABLE_LINEINFO - "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF + "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF ) option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON) # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking @@ -94,13 +101,17 @@ message(VERBOSE "CUDF: Use a file cache for JIT compiled kernels: ${JITIFY_USE_C message(VERBOSE "CUDF: Build and statically link Arrow libraries: ${CUDF_USE_ARROW_STATIC}") message(VERBOSE "CUDF: Build and enable S3 filesystem support for Arrow: ${CUDF_ENABLE_ARROW_S3}") message(VERBOSE "CUDF: Build with per-thread default stream: ${CUDF_USE_PER_THREAD_DEFAULT_STREAM}") +message( + VERBOSE + "CUDF: Replace the current optimization flags by the options '-rdynamic -Og' (useful for debugging with stacktrace retrieval): ${CUDF_BUILD_STACKTRACE_DEBUG}" +) message( VERBOSE "CUDF: Disable warnings generated from deprecated declarations: ${DISABLE_DEPRECATION_WARNINGS}" ) message( VERBOSE - "CUDF: Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler: ${CUDA_ENABLE_LINEINFO}" + "CUDF: Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler): ${CUDA_ENABLE_LINEINFO}" ) message(VERBOSE "CUDF: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}") @@ -115,6 +126,10 @@ if(BUILD_TESTS AND NOT CUDF_BUILD_TESTUTIL) ) endif() +if(CUDF_BUILD_STACKTRACE_DEBUG AND NOT CMAKE_COMPILER_IS_GNUCXX) + message(FATAL_ERROR "CUDF_BUILD_STACKTRACE_DEBUG is only supported with GCC compiler") +endif() + set(CUDF_CXX_FLAGS "") set(CUDF_CUDA_FLAGS "") set(CUDF_CXX_DEFINITIONS "") @@ -608,6 +623,7 @@ add_library( src/utilities/default_stream.cpp src/utilities/linked_column.cpp src/utilities/logger.cpp + src/utilities/stacktrace.cpp src/utilities/traits.cpp src/utilities/type_checks.cpp src/utilities/type_dispatcher.cpp @@ -646,6 +662,31 @@ target_compile_options( "$<$:${CUDF_CUDA_FLAGS}>" ) +if(CUDF_BUILD_STACKTRACE_DEBUG) + # Remove any optimization level to avoid nvcc warning "incompatible redefinition for option + # 'optimize'". + string(REGEX REPLACE "(\-O[0123])" "" CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}") + string(REGEX REPLACE "(\-O[0123])" "" CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE}") + string(REGEX REPLACE "(\-O[0123])" "" CMAKE_CUDA_FLAGS_MINSIZEREL + "${CMAKE_CUDA_FLAGS_MINSIZEREL}" + ) + string(REGEX REPLACE "(\-O[0123])" "" CMAKE_CUDA_FLAGS_RELWITHDEBINFO + "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO}" + ) + + add_library(cudf_backtrace INTERFACE) + target_compile_definitions(cudf_backtrace INTERFACE CUDF_BUILD_STACKTRACE_DEBUG) + target_compile_options( + cudf_backtrace INTERFACE "$<$:-Og>" + "$<$:-Xcompiler=-Og>" + ) + target_link_options( + cudf_backtrace INTERFACE "$<$:-rdynamic>" + "$<$:-Xlinker=-rdynamic>" + ) + target_link_libraries(cudf PRIVATE cudf_backtrace) +endif() + # Specify include paths for the current target and dependents target_include_directories( cudf @@ -829,7 +870,9 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL) # depending via ctest and whether it has been updated to expose public stream APIs. foreach(_mode cudf testing) set(_tgt "cudf_identify_stream_usage_mode_${_mode}") - add_library(${_tgt} SHARED tests/utilities/identify_stream_usage.cpp) + add_library( + ${_tgt} SHARED src/utilities/stacktrace.cpp tests/utilities/identify_stream_usage.cpp + ) set_target_properties( ${_tgt} @@ -838,7 +881,14 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL) CXX_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON ) + target_compile_options( + ${_tgt} PRIVATE "$:${CUDF_CXX_FLAGS}>>" + ) + target_include_directories(${_tgt} PRIVATE "$") target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm) + if(CUDF_BUILD_STACKTRACE_DEBUG) + target_link_libraries(${_tgt} PRIVATE cudf_backtrace) + endif() add_library(cudf::${_tgt} ALIAS ${_tgt}) if("${_mode}" STREQUAL "testing") diff --git a/cpp/include/cudf/detail/utilities/stacktrace.hpp b/cpp/include/cudf/detail/utilities/stacktrace.hpp new file mode 100644 index 00000000000..c3ec9ce7a52 --- /dev/null +++ b/cpp/include/cudf/detail/utilities/stacktrace.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf::detail { +/** + * @addtogroup utility_stacktrace + * @{ + * @file + */ + +/** + * @brief Specify whether the last stackframe is included in the stacktrace. + */ +enum class capture_last_stackframe : bool { YES, NO }; + +/** + * @brief Query the current stacktrace and return the whole stacktrace as one string. + * + * Depending on the value of the flag `capture_last_frame`, the caller that executes stacktrace + * retrieval can be included in the output result. + * + * @param capture_last_frame Flag to specify if the current stackframe will be included into + * the output + * @return A string storing the whole current stacktrace + */ +std::string get_stacktrace(capture_last_stackframe capture_last_frame); + +/** @} */ // end of group + +} // namespace cudf::detail diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp index f70ef4e5f07..03db0bde14b 100644 --- a/cpp/include/cudf/utilities/error.hpp +++ b/cpp/include/cudf/utilities/error.hpp @@ -16,6 +16,8 @@ #pragma once +#include + #include #include #include @@ -29,13 +31,35 @@ namespace cudf { * @file */ +/** + * @brief The struct to store the current stacktrace upon its construction. + */ +struct stacktrace_recorder { + stacktrace_recorder() + // Exclude the current stackframe, as it is this constructor. + : _stacktrace{cudf::detail::get_stacktrace(cudf::detail::capture_last_stackframe::NO)} + { + } + + public: + /** + * @brief Get the stored stacktrace captured during object construction. + * + * @return The pointer to a null-terminated string storing the output stacktrace + */ + char const* stacktrace() const { return _stacktrace.c_str(); } + + protected: + std::string const _stacktrace; //!< The whole stacktrace stored as one string. +}; + /** * @brief Exception thrown when logical precondition is violated. * * This exception should not be thrown directly and is instead thrown by the * CUDF_EXPECTS macro. */ -struct logic_error : public std::logic_error { +struct logic_error : public std::logic_error, public stacktrace_recorder { /** * @brief Constructs a logic_error with the error message. * @@ -57,7 +81,7 @@ struct logic_error : public std::logic_error { * @brief Exception thrown when a CUDA error is encountered. * */ -struct cuda_error : public std::runtime_error { +struct cuda_error : public std::runtime_error, public stacktrace_recorder { /** * @brief Construct a new cuda error object with error message and code. * @@ -92,7 +116,7 @@ struct fatal_cuda_error : public cuda_error { * unsupported data_type. This exception should not be thrown directly and is * instead thrown by the CUDF_EXPECTS or CUDF_FAIL macros. */ -struct data_type_error : public std::invalid_argument { +struct data_type_error : public std::invalid_argument, public stacktrace_recorder { /** * @brief Constructs a data_type_error with the error message. * diff --git a/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp b/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp index 946635ab241..e18400422aa 100644 --- a/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp +++ b/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp @@ -17,8 +17,12 @@ #include +#include + #include +#include + /** * @brief Resource that verifies that the default stream is not used in any allocation. * @@ -162,6 +166,10 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res : (cstream != cudf::test::get_default_stream().value()); if (invalid_stream) { + // Exclude the current function from stacktrace. + std::cout << cudf::detail::get_stacktrace(cudf::detail::capture_last_stackframe::NO) + << std::endl; + if (error_on_invalid_stream_) { throw std::runtime_error("Attempted to perform an operation on an unexpected stream!"); } else { diff --git a/cpp/src/utilities/stacktrace.cpp b/cpp/src/utilities/stacktrace.cpp new file mode 100644 index 00000000000..48cf632bbaa --- /dev/null +++ b/cpp/src/utilities/stacktrace.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#if defined(__GNUC__) && defined(CUDF_BUILD_STACKTRACE_DEBUG) +#include +#include + +#include +#include +#include +#endif // defined(__GNUC__) && defined(CUDF_BUILD_STACKTRACE_DEBUG) + +namespace cudf::detail { + +std::string get_stacktrace(capture_last_stackframe capture_last_frame) +{ +#if defined(__GNUC__) && defined(CUDF_BUILD_STACKTRACE_DEBUG) + constexpr int max_stack_depth = 64; + void* stack[max_stack_depth]; + + auto const depth = backtrace(stack, max_stack_depth); + auto const modules = backtrace_symbols(stack, depth); + + if (modules == nullptr) { return "No stacktrace could be captured!"; } + + std::stringstream ss; + + // Skip one more depth to avoid including the stackframe of this function. + auto const skip_depth = 1 + (capture_last_frame == capture_last_stackframe::YES ? 0 : 1); + for (auto i = skip_depth; i < depth; ++i) { + // Each modules[i] string contains a mangled name in the format like following: + // `module_name(function_name+0x012) [0x01234567890a]` + // We need to extract function name and function offset. + char* begin_func_name = std::strstr(modules[i], "("); + char* begin_func_offset = std::strstr(modules[i], "+"); + char* end_func_offset = std::strstr(modules[i], ")"); + + auto const frame_idx = i - skip_depth; + if (begin_func_name && begin_func_offset && end_func_offset && + begin_func_name < begin_func_offset) { + // Split `modules[i]` into separate null-terminated strings. + // After this, mangled function name will then be [begin_func_name, begin_func_offset), and + // function offset is in [begin_func_offset, end_func_offset). + *(begin_func_name++) = '\0'; + *(begin_func_offset++) = '\0'; + *end_func_offset = '\0'; + + // We need to demangle function name. + int status{0}; + char* func_name = abi::__cxa_demangle(begin_func_name, nullptr, nullptr, &status); + + ss << "#" << frame_idx << ": " << modules[i] << " : " + << (status == 0 /*demangle success*/ ? func_name : begin_func_name) << "+" + << begin_func_offset << "\n"; + free(func_name); + } else { + ss << "#" << frame_idx << ": " << modules[i] << "\n"; + } + } + + free(modules); + + return ss.str(); +#else +#ifdef CUDF_BUILD_STACKTRACE_DEBUG + return "Stacktrace is only supported when built with a GNU compiler."; +#else + return "libcudf was not built with stacktrace support."; +#endif // CUDF_BUILD_STACKTRACE_DEBUG +#endif // __GNUC__ +} + +} // namespace cudf::detail diff --git a/cpp/tests/utilities/identify_stream_usage.cpp b/cpp/tests/utilities/identify_stream_usage.cpp index 2ab7fa3844a..3dc95199caa 100644 --- a/cpp/tests/utilities/identify_stream_usage.cpp +++ b/cpp/tests/utilities/identify_stream_usage.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include + #include #include @@ -91,74 +93,10 @@ bool stream_is_invalid(cudaStream_t stream) void check_stream_and_error(cudaStream_t stream) { if (stream_is_invalid(stream)) { -#ifdef __GNUC__ - // If we're on the wrong stream, print the stack trace from the current frame. - // Adapted from from https://panthema.net/2008/0901-stacktrace-demangled/ - constexpr int kMaxStackDepth = 64; - void* stack[kMaxStackDepth]; - auto depth = backtrace(stack, kMaxStackDepth); - auto strings = backtrace_symbols(stack, depth); - - if (strings == nullptr) { - std::cout << "No stack trace could be found!" << std::endl; - } else { - // If we were able to extract a trace, parse it, demangle symbols, and - // print a readable output. - - // allocate string which will be filled with the demangled function name - size_t funcnamesize = 256; - char* funcname = (char*)malloc(funcnamesize); - - // Start at frame 1 to skip print_trace itself. - for (int i = 1; i < depth; ++i) { - char* begin_name = nullptr; - char* begin_offset = nullptr; - char* end_offset = nullptr; - - // find parentheses and +address offset surrounding the mangled name: - // ./module(function+0x15c) [0x8048a6d] - for (char* p = strings[i]; *p; ++p) { - if (*p == '(') { - begin_name = p; - } else if (*p == '+') { - begin_offset = p; - } else if (*p == ')' && begin_offset) { - end_offset = p; - break; - } - } + // Exclude the current function from stacktrace. + std::cout << cudf::detail::get_stacktrace(cudf::detail::capture_last_stackframe::NO) + << std::endl; - if (begin_name && begin_offset && end_offset && begin_name < begin_offset) { - *begin_name++ = '\0'; - *begin_offset++ = '\0'; - *end_offset = '\0'; - - // mangled name is now in [begin_name, begin_offset) and caller offset - // in [begin_offset, end_offset). now apply __cxa_demangle(): - - int status; - char* ret = abi::__cxa_demangle(begin_name, funcname, &funcnamesize, &status); - if (status == 0) { - funcname = - ret; // use possibly realloc()-ed string (__cxa_demangle may realloc funcname) - std::cout << "#" << i << " in " << strings[i] << " : " << funcname << "+" - << begin_offset << std::endl; - } else { - // demangling failed. Output function name as a C function with no arguments. - std::cout << "#" << i << " in " << strings[i] << " : " << begin_name << "()+" - << begin_offset << std::endl; - } - } else { - std::cout << "#" << i << " in " << strings[i] << std::endl; - } - } - - free(funcname); - } - free(strings); -#else - std::cout << "Backtraces are only when built with a GNU compiler." << std::endl; -#endif // __GNUC__ char const* env_stream_error_mode{std::getenv("GTEST_CUDF_STREAM_ERROR_MODE")}; if (env_stream_error_mode && !strcmp(env_stream_error_mode, "print")) { std::cout << "cudf_identify_stream_usage found unexpected stream!" << std::endl; diff --git a/java/src/main/java/ai/rapids/cudf/CudaException.java b/java/src/main/java/ai/rapids/cudf/CudaException.java index ff7ca308f3c..f73a6244cdb 100755 --- a/java/src/main/java/ai/rapids/cudf/CudaException.java +++ b/java/src/main/java/ai/rapids/cudf/CudaException.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,9 +19,9 @@ import java.util.Map; /** - * Exception from the cuda language/library. Be aware that because of how cuda does asynchronous + * Exception from the cuda language/library. Be aware that because of how cuda does asynchronous * processing exceptions from cuda can be thrown by method calls that did not cause the exception - * to take place. These will take place on the same thread that caused the error. + * to take place. These will take place on the same thread that caused the error. *

* Please See * the cuda docs @@ -32,16 +32,32 @@ */ public class CudaException extends RuntimeException { CudaException(String message, int errorCode) { + this(message, "No native stacktrace is available.", errorCode); + } + + CudaException(String message, String nativeStacktrace, int errorCode) { super(message); + this.nativeStacktrace = nativeStacktrace; cudaError = CudaError.parseErrorCode(errorCode); } - CudaException(String message, int errorCode, Throwable cause) { + CudaException(String message, String nativeStacktrace, int errorCode, Throwable cause) { super(message, cause); + this.nativeStacktrace = nativeStacktrace; cudaError = CudaError.parseErrorCode(errorCode); } - public final CudaError cudaError; + public String getNativeStacktrace() { + return nativeStacktrace; + } + + public CudaError getCudaError() { + return cudaError; + } + + private final String nativeStacktrace; + + private final CudaError cudaError; /** * The Java mirror of cudaError, which facilities the tracking of CUDA errors in JVM. diff --git a/java/src/main/java/ai/rapids/cudf/CudaFatalException.java b/java/src/main/java/ai/rapids/cudf/CudaFatalException.java index cf36726aa80..067e2c25207 100644 --- a/java/src/main/java/ai/rapids/cudf/CudaFatalException.java +++ b/java/src/main/java/ai/rapids/cudf/CudaFatalException.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,10 +22,14 @@ */ public class CudaFatalException extends CudaException { CudaFatalException(String message, int errorCode) { - super(message, errorCode); + this(message, "No native stacktrace is available.", errorCode); } - CudaFatalException(String message, int errorCode, Throwable cause) { - super(message, errorCode, cause); + CudaFatalException(String message, String nativeStacktrace, int errorCode) { + super(message, nativeStacktrace, errorCode); + } + + CudaFatalException(String message, String nativeStacktrace, int errorCode, Throwable cause) { + super(message, nativeStacktrace, errorCode, cause); } } diff --git a/java/src/main/java/ai/rapids/cudf/CudfException.java b/java/src/main/java/ai/rapids/cudf/CudfException.java index 2e9c332091a..4038c563754 100755 --- a/java/src/main/java/ai/rapids/cudf/CudfException.java +++ b/java/src/main/java/ai/rapids/cudf/CudfException.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,10 +20,22 @@ */ public class CudfException extends RuntimeException { CudfException(String message) { + this(message, "No native stacktrace is available."); + } + + CudfException(String message, String nativeStacktrace) { super(message); + this.nativeStacktrace = nativeStacktrace; } - CudfException(String message, Throwable cause) { + CudfException(String message, String nativeStacktrace, Throwable cause) { super(message, cause); + this.nativeStacktrace = nativeStacktrace; } + + public final String getNativeStacktrace() { + return nativeStacktrace; + } + + private final String nativeStacktrace; } diff --git a/java/src/main/native/include/jni_utils.hpp b/java/src/main/native/include/jni_utils.hpp index ee2325cc76f..07d4c1a9c34 100644 --- a/java/src/main/native/include/jni_utils.hpp +++ b/java/src/main/native/include/jni_utils.hpp @@ -788,6 +788,13 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) { } // namespace jni } // namespace cudf +#define JNI_EXCEPTION_OCCURRED_CHECK(env, ret_val) \ + { \ + if (env->ExceptionOccurred()) { \ + return ret_val; \ + } \ + } + #define JNI_THROW_NEW(env, class_name, message, ret_val) \ { \ jclass ex_class = env->FindClass(class_name); \ @@ -799,33 +806,65 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) { } // Throw a new exception only if one is not pending then always return with the specified value -#define JNI_CHECK_THROW_NEW(env, class_name, message, ret_val) \ +#define JNI_CHECK_THROW_CUDF_EXCEPTION(env, class_name, message, stacktrace, ret_val) \ { \ - if (env->ExceptionOccurred()) { \ + JNI_EXCEPTION_OCCURRED_CHECK(env, ret_val); \ + auto const ex_class = env->FindClass(class_name); \ + if (ex_class == nullptr) { \ return ret_val; \ } \ - JNI_THROW_NEW(env, class_name, message, ret_val) \ + auto const ctor_id = \ + env->GetMethodID(ex_class, "", "(Ljava/lang/String;Ljava/lang/String;)V"); \ + if (ctor_id == nullptr) { \ + return ret_val; \ + } \ + auto const empty_str = std::string{""}; \ + auto const jmessage = env->NewStringUTF(message == nullptr ? empty_str.c_str() : message); \ + if (jmessage == nullptr) { \ + return ret_val; \ + } \ + auto const jstacktrace = \ + env->NewStringUTF(stacktrace == nullptr ? empty_str.c_str() : stacktrace); \ + if (jstacktrace == nullptr) { \ + return ret_val; \ + } \ + auto const jobj = env->NewObject(ex_class, ctor_id, jmessage, jstacktrace); \ + if (jobj == nullptr) { \ + return ret_val; \ + } \ + env->Throw(reinterpret_cast(jobj)); \ + return ret_val; \ } // Throw a new exception only if one is not pending then always return with the specified value -#define JNI_CHECK_CUDA_ERROR(env, class_name, e, ret_val) \ +#define JNI_CHECK_THROW_CUDA_EXCEPTION(env, class_name, message, stacktrace, error_code, ret_val) \ { \ - if (env->ExceptionOccurred()) { \ + JNI_EXCEPTION_OCCURRED_CHECK(env, ret_val); \ + auto const ex_class = env->FindClass(class_name); \ + if (ex_class == nullptr) { \ return ret_val; \ } \ - std::string n_msg = e.what() == nullptr ? "" : e.what(); \ - jstring j_msg = env->NewStringUTF(n_msg.c_str()); \ - jint e_code = static_cast(e.error_code()); \ - jclass ex_class = env->FindClass(class_name); \ - if (ex_class != NULL) { \ - jmethodID ctor_id = env->GetMethodID(ex_class, "", "(Ljava/lang/String;I)V"); \ - if (ctor_id != NULL) { \ - jobject cuda_error = env->NewObject(ex_class, ctor_id, j_msg, e_code); \ - if (cuda_error != NULL) { \ - env->Throw((jthrowable)cuda_error); \ - } \ - } \ + auto const ctor_id = \ + env->GetMethodID(ex_class, "", "(Ljava/lang/String;Ljava/lang/String;I)V"); \ + if (ctor_id == nullptr) { \ + return ret_val; \ + } \ + auto const empty_str = std::string{""}; \ + auto const jmessage = env->NewStringUTF(message == nullptr ? empty_str.c_str() : message); \ + if (jmessage == nullptr) { \ + return ret_val; \ + } \ + auto const jstacktrace = \ + env->NewStringUTF(stacktrace == nullptr ? empty_str.c_str() : stacktrace); \ + if (jstacktrace == nullptr) { \ + return ret_val; \ + } \ + auto const jerror_code = static_cast(error_code); \ + auto const jobj = env->NewObject(ex_class, ctor_id, jmessage, jstacktrace, jerror_code); \ + if (jobj == nullptr) { \ + return ret_val; \ } \ + env->Throw(reinterpret_cast(jobj)); \ return ret_val; \ } @@ -843,41 +882,41 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) { } \ } -#define JNI_EXCEPTION_OCCURRED_CHECK(env, ret_val) \ - { \ - if (env->ExceptionOccurred()) { \ - return ret_val; \ - } \ - } - #define CATCH_STD_CLASS(env, class_name, ret_val) \ catch (const rmm::out_of_memory &e) { \ - auto what = \ + JNI_EXCEPTION_OCCURRED_CHECK(env, ret_val); \ + auto const what = \ std::string("Could not allocate native memory: ") + (e.what() == nullptr ? "" : e.what()); \ - JNI_CHECK_THROW_NEW(env, cudf::jni::OOM_CLASS, what.c_str(), ret_val); \ + JNI_THROW_NEW(env, cudf::jni::OOM_CLASS, what.c_str(), ret_val); \ } \ catch (const cudf::fatal_cuda_error &e) { \ - JNI_CHECK_CUDA_ERROR(env, cudf::jni::CUDA_FATAL_ERROR_CLASS, e, ret_val); \ + JNI_CHECK_THROW_CUDA_EXCEPTION(env, cudf::jni::CUDA_FATAL_ERROR_CLASS, e.what(), \ + e.stacktrace(), e.error_code(), ret_val); \ } \ catch (const cudf::cuda_error &e) { \ - JNI_CHECK_CUDA_ERROR(env, cudf::jni::CUDA_ERROR_CLASS, e, ret_val); \ + JNI_CHECK_THROW_CUDA_EXCEPTION(env, cudf::jni::CUDA_ERROR_CLASS, e.what(), e.stacktrace(), \ + e.error_code(), ret_val); \ } \ catch (const cudf::data_type_error &e) { \ - JNI_CHECK_THROW_NEW(env, cudf::jni::CUDF_DTYPE_ERROR_CLASS, e.what(), ret_val); \ + JNI_CHECK_THROW_CUDF_EXCEPTION(env, cudf::jni::CUDF_DTYPE_ERROR_CLASS, e.what(), \ + e.stacktrace(), ret_val); \ } \ catch (const std::exception &e) { \ + char const *stacktrace = "No native stacktrace is available."; \ + if (auto const cudf_ex = dynamic_cast(&e); cudf_ex != nullptr) { \ + stacktrace = cudf_ex->stacktrace(); \ + } \ /* Double check whether the thrown exception is unrecoverable CUDA error or not. */ \ /* Like cudf::detail::throw_cuda_error, it is nearly certain that a fatal error */ \ /* occurred if the second call doesn't return with cudaSuccess. */ \ cudaGetLastError(); \ auto const last = cudaFree(0); \ if (cudaSuccess != last && last == cudaDeviceSynchronize()) { \ - auto msg = e.what() == nullptr ? std::string{""} : e.what(); \ - auto cuda_error = cudf::fatal_cuda_error{msg, last}; \ - JNI_CHECK_CUDA_ERROR(env, cudf::jni::CUDA_FATAL_ERROR_CLASS, cuda_error, ret_val); \ + /* Throw CudaFatalException since the thrown exception is unrecoverable CUDA error */ \ + JNI_CHECK_THROW_CUDA_EXCEPTION(env, cudf::jni::CUDA_FATAL_ERROR_CLASS, e.what(), stacktrace, \ + last, ret_val); \ } \ - /* If jni_exception caught then a Java exception is pending and this will not overwrite it. */ \ - JNI_CHECK_THROW_NEW(env, class_name, e.what(), ret_val); \ + JNI_CHECK_THROW_CUDF_EXCEPTION(env, class_name, e.what(), stacktrace, ret_val); \ } #define CATCH_STD(env, ret_val) CATCH_STD_CLASS(env, cudf::jni::CUDF_ERROR_CLASS, ret_val) diff --git a/java/src/test/java/ai/rapids/cudf/CudaFatalTest.java b/java/src/test/java/ai/rapids/cudf/CudaFatalTest.java index ef55ff84b68..70d0925f5b8 100644 --- a/java/src/test/java/ai/rapids/cudf/CudaFatalTest.java +++ b/java/src/test/java/ai/rapids/cudf/CudaFatalTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ public void testCudaFatalException() { assertThrows(CudaFatalException.class, () -> { try (ColumnVector cv2 = cv.asLongs()) { } catch (CudaFatalException ex) { - assertEquals(CudaException.CudaError.cudaErrorIllegalAddress, ex.cudaError); + assertEquals(CudaException.CudaError.cudaErrorIllegalAddress, ex.getCudaError()); throw ex; } }); @@ -47,7 +47,7 @@ public void testCudaFatalException() { assertThrows(CudaFatalException.class, () -> { try (ColumnVector cv = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5)) { } catch (CudaFatalException ex) { - assertEquals(CudaException.CudaError.cudaErrorIllegalAddress, ex.cudaError); + assertEquals(CudaException.CudaError.cudaErrorIllegalAddress, ex.getCudaError()); throw ex; } }); diff --git a/java/src/test/java/ai/rapids/cudf/CudaTest.java b/java/src/test/java/ai/rapids/cudf/CudaTest.java index c20f2435258..e29bf1a672e 100644 --- a/java/src/test/java/ai/rapids/cudf/CudaTest.java +++ b/java/src/test/java/ai/rapids/cudf/CudaTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ public void testCudaException() { Cuda.memset(Long.MAX_VALUE, (byte) 0, 1024); } catch (CudaFatalException ignored) { } catch (CudaException ex) { - assertEquals(CudaException.CudaError.cudaErrorInvalidValue, ex.cudaError); + assertEquals(CudaException.CudaError.cudaErrorInvalidValue, ex.getCudaError()); throw ex; } }