From c8b9e6cc08c2759c183eb0d04115b0fc66b53978 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Mon, 11 Apr 2022 17:29:08 +0800
Subject: [PATCH 01/11] enrich cuDF cuda_error

Signed-off-by: sperlingxx <lovedreamf@gmail.com>
---
 cpp/include/cudf/utilities/error.hpp   | 44 +++++++++++++++++++-------
 cpp/include/cudf_test/cudf_gtest.hpp   | 10 ++++--
 cpp/tests/error/error_handling_test.cu | 17 +++++-----
 3 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp
index 8be1a7e3a32..d95079c5610 100644
--- a/cpp/include/cudf/utilities/error.hpp
+++ b/cpp/include/cudf/utilities/error.hpp
@@ -46,7 +46,25 @@ struct logic_error : public std::logic_error {
  * @brief Exception thrown when a CUDA error is encountered.
  */
 struct cuda_error : public std::runtime_error {
-  cuda_error(std::string const& message) : std::runtime_error(message) {}
+  cuda_error(std::string const& message, cudaError_t const& error)
+    : std::runtime_error(message), _cudaError(error)
+  {
+  }
+  cudaError_t error_code() { return _cudaError; }
+
+ protected:
+  cudaError_t _cudaError;
+};
+
+struct cudart_error : public cuda_error {
+  cudart_error(std::string const& message, cudaError_t const& error) : cuda_error(message, error) {}
+};
+
+struct sticky_cuda_error : public cuda_error {
+  sticky_cuda_error(std::string const& message, cudaError_t const& error)
+    : cuda_error(message, error)
+  {
+  }
 };
 /** @} */
 
@@ -101,9 +119,16 @@ namespace detail {
 
 inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int line)
 {
-  throw cudf::cuda_error(std::string{"CUDA error encountered at: " + std::string{file} + ":" +
-                                     std::to_string(line) + ": " + std::to_string(error) + " " +
-                                     cudaGetErrorName(error) + " " + cudaGetErrorString(error)});
+  cudaGetLastError();
+  auto const last = cudaGetLastError();
+  auto const msg  = std::string{"CUDA error encountered at: " + std::string{file} + ":" +
+                               std::to_string(line) + ": " + std::to_string(error) + " " +
+                               cudaGetErrorName(error) + " " + cudaGetErrorString(error)};
+  if (error == last && last == cudaDeviceSynchronize()) {
+    throw sticky_cuda_error{"Sticky " + msg, error};
+  } else {
+    throw cudart_error{msg, error};
+  }
 }
 }  // namespace detail
 }  // namespace cudf
@@ -115,13 +140,10 @@ inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int l
  * cudaSuccess, invokes cudaGetLastError() to clear the error and throws an
  * exception detailing the CUDA error that occurred
  */
-#define CUDF_CUDA_TRY(call)                                       \
-  do {                                                            \
-    cudaError_t const status = (call);                            \
-    if (cudaSuccess != status) {                                  \
-      cudaGetLastError();                                         \
-      cudf::detail::throw_cuda_error(status, __FILE__, __LINE__); \
-    }                                                             \
+#define CUDF_CUDA_TRY(call)                                                                    \
+  do {                                                                                         \
+    cudaError_t const status = (call);                                                         \
+    if (cudaSuccess != status) { cudf::detail::throw_cuda_error(status, __FILE__, __LINE__); } \
   } while (0);
 
 /**
diff --git a/cpp/include/cudf_test/cudf_gtest.hpp b/cpp/include/cudf_test/cudf_gtest.hpp
index d078bf90a8a..1c6dc7209bf 100644
--- a/cpp/include/cudf_test/cudf_gtest.hpp
+++ b/cpp/include/cudf_test/cudf_gtest.hpp
@@ -117,8 +117,14 @@ struct TypeList<Types<TYPES...>> {
 #define CUDF_EXPECT_THROW_MESSAGE(x, msg) \
   EXPECT_THROW_MESSAGE(x, cudf::logic_error, "cuDF failure at:", msg)
 
-#define CUDA_EXPECT_THROW_MESSAGE(x, msg) \
-  EXPECT_THROW_MESSAGE(x, cudf::cuda_error, "CUDA error encountered at:", msg)
+#define CUDART_EXPECT_THROW_MESSAGE(x, msg) \
+  EXPECT_THROW_MESSAGE(x, cudf::cudart_error, "CUDA error encountered at:", msg)
+
+#define STICKY_CUDA_EXPECT_THROW_MESSAGE(x, msg) \
+  EXPECT_THROW_MESSAGE(x, cudf::sticky_cuda_error, "Sticky CUDA error encountered at:", msg)
+
+#define STICKY_CUDA_EXPECT_THROW_MESSAGE_1(x, msg) \
+  EXPECT_THROW_MESSAGE(x, cudf::cuda_error, "Sticky CUDA error encountered at:", msg)
 
 /**
  * @brief test macro to be expected as no exception.
diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu
index 4327a8b694b..315c5234a40 100644
--- a/cpp/tests/error/error_handling_test.cu
+++ b/cpp/tests/error/error_handling_test.cu
@@ -36,15 +36,16 @@ TEST(ExpectsTest, TryCatch)
 
 TEST(CudaTryTest, Error)
 {
-  CUDA_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorLaunchFailure),
-                            "cudaErrorLaunchFailure unspecified launch failure");
+  CUDART_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorLaunchFailure),
+                              "cudaErrorLaunchFailure unspecified launch failure");
 }
+
 TEST(CudaTryTest, Success) { EXPECT_NO_THROW(CUDF_CUDA_TRY(cudaSuccess)); }
 
 TEST(CudaTryTest, TryCatch)
 {
-  CUDA_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorMemoryAllocation),
-                            "cudaErrorMemoryAllocation out of memory");
+  CUDART_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorMemoryAllocation),
+                              "cudaErrorMemoryAllocation out of memory");
 }
 
 TEST(StreamCheck, success) { EXPECT_NO_THROW(CUDF_CHECK_CUDA(0)); }
@@ -67,7 +68,7 @@ TEST(StreamCheck, FailedKernel)
 #ifdef NDEBUG
   stream.synchronize();
 #endif
-  EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error);
+  EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cudart_error);
 }
 
 TEST(StreamCheck, CatchFailedKernel)
@@ -78,9 +79,9 @@ TEST(StreamCheck, CatchFailedKernel)
 #ifndef NDEBUG
   stream.synchronize();
 #endif
-  CUDA_EXPECT_THROW_MESSAGE(CUDF_CHECK_CUDA(stream.value()),
-                            "cudaErrorInvalidConfiguration "
-                            "invalid configuration argument");
+  CUDART_EXPECT_THROW_MESSAGE(CUDF_CHECK_CUDA(stream.value()),
+                              "cudaErrorInvalidConfiguration "
+                              "invalid configuration argument");
 }
 
 #ifndef NDEBUG

From df095e314ce586578eb21d28508501839bf99458 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Mon, 11 Apr 2022 18:04:08 +0800
Subject: [PATCH 02/11] update year range

---
 cpp/include/cudf_test/cudf_gtest.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf_test/cudf_gtest.hpp b/cpp/include/cudf_test/cudf_gtest.hpp
index 1c6dc7209bf..a3326a5f8f9 100644
--- a/cpp/include/cudf_test/cudf_gtest.hpp
+++ b/cpp/include/cudf_test/cudf_gtest.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 9c61429d773487118c5cf3925b67936e584623b6 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Tue, 12 Apr 2022 14:32:06 +0800
Subject: [PATCH 03/11] update

---
 cpp/include/cudf/utilities/error.hpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp
index d95079c5610..505fe15702b 100644
--- a/cpp/include/cudf/utilities/error.hpp
+++ b/cpp/include/cudf/utilities/error.hpp
@@ -57,14 +57,11 @@ struct cuda_error : public std::runtime_error {
 };
 
 struct cudart_error : public cuda_error {
-  cudart_error(std::string const& message, cudaError_t const& error) : cuda_error(message, error) {}
+  using cuda_error::cuda_error;
 };
 
-struct sticky_cuda_error : public cuda_error {
-  sticky_cuda_error(std::string const& message, cudaError_t const& error)
-    : cuda_error(message, error)
-  {
-  }
+struct fatal_cuda_error : public cuda_error {
+  using cuda_error::cuda_error;
 };
 /** @} */
 
@@ -119,13 +116,17 @@ namespace detail {
 
 inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int line)
 {
+  // Calls cudaGetLastError twice. It is nearly certain that a fatal error occurred if the second
+  // call doesn't return with cudaSuccess.
   cudaGetLastError();
   auto const last = cudaGetLastError();
   auto const msg  = std::string{"CUDA error encountered at: " + std::string{file} + ":" +
                                std::to_string(line) + ": " + std::to_string(error) + " " +
                                cudaGetErrorName(error) + " " + cudaGetErrorString(error)};
+  // Calls cudaDeviceSynchronize to make sure that there is no other asynchronize error occurs
+  // between two calls.
   if (error == last && last == cudaDeviceSynchronize()) {
-    throw sticky_cuda_error{"Sticky " + msg, error};
+    throw fatal_cuda_error{"Sticky " + msg, error};
   } else {
     throw cudart_error{msg, error};
   }

From a4837fb7ad4b6112f3d290b1c5602007091be727 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Tue, 12 Apr 2022 17:52:22 +0800
Subject: [PATCH 04/11] update

---
 cpp/include/cudf/utilities/error.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp
index 505fe15702b..64b40aac425 100644
--- a/cpp/include/cudf/utilities/error.hpp
+++ b/cpp/include/cudf/utilities/error.hpp
@@ -50,6 +50,8 @@ struct cuda_error : public std::runtime_error {
     : std::runtime_error(message), _cudaError(error)
   {
   }
+
+ public:
   cudaError_t error_code() { return _cudaError; }
 
  protected:

From 51bf915c11d31cb05fed33f910e5a04102f6ce56 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Wed, 13 Apr 2022 16:53:36 +0800
Subject: [PATCH 05/11] add

---
 cpp/include/cudf/utilities/error.hpp   |  6 +++---
 cpp/include/cudf_test/cudf_gtest.hpp   | 11 ++++-------
 cpp/tests/error/error_handling_test.cu | 16 ++++++++--------
 3 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp
index 64b40aac425..8249f20f66b 100644
--- a/cpp/include/cudf/utilities/error.hpp
+++ b/cpp/include/cudf/utilities/error.hpp
@@ -52,7 +52,7 @@ struct cuda_error : public std::runtime_error {
   }
 
  public:
-  cudaError_t error_code() { return _cudaError; }
+  cudaError_t error_code() const { return _cudaError; }
 
  protected:
   cudaError_t _cudaError;
@@ -128,9 +128,9 @@ inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int l
   // Calls cudaDeviceSynchronize to make sure that there is no other asynchronize error occurs
   // between two calls.
   if (error == last && last == cudaDeviceSynchronize()) {
-    throw fatal_cuda_error{"Sticky " + msg, error};
+    throw fatal_cuda_error{"Fatal " + msg, error};
   } else {
-    throw cudart_error{msg, error};
+    throw cuda_error{msg, error};
   }
 }
 }  // namespace detail
diff --git a/cpp/include/cudf_test/cudf_gtest.hpp b/cpp/include/cudf_test/cudf_gtest.hpp
index a3326a5f8f9..7bd704a288d 100644
--- a/cpp/include/cudf_test/cudf_gtest.hpp
+++ b/cpp/include/cudf_test/cudf_gtest.hpp
@@ -117,14 +117,11 @@ struct TypeList<Types<TYPES...>> {
 #define CUDF_EXPECT_THROW_MESSAGE(x, msg) \
   EXPECT_THROW_MESSAGE(x, cudf::logic_error, "cuDF failure at:", msg)
 
-#define CUDART_EXPECT_THROW_MESSAGE(x, msg) \
-  EXPECT_THROW_MESSAGE(x, cudf::cudart_error, "CUDA error encountered at:", msg)
+#define CUDA_EXPECT_THROW_MESSAGE(x, msg) \
+  EXPECT_THROW_MESSAGE(x, cudf::cuda_error, "CUDA error encountered at:", msg)
 
-#define STICKY_CUDA_EXPECT_THROW_MESSAGE(x, msg) \
-  EXPECT_THROW_MESSAGE(x, cudf::sticky_cuda_error, "Sticky CUDA error encountered at:", msg)
-
-#define STICKY_CUDA_EXPECT_THROW_MESSAGE_1(x, msg) \
-  EXPECT_THROW_MESSAGE(x, cudf::cuda_error, "Sticky CUDA error encountered at:", msg)
+#define FATAL_CUDA_EXPECT_THROW_MESSAGE(x, msg) \
+  EXPECT_THROW_MESSAGE(x, cudf::fatal_cuda_error, "Fatal CUDA error encountered at:", msg)
 
 /**
  * @brief test macro to be expected as no exception.
diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu
index 315c5234a40..bde8ccc6de7 100644
--- a/cpp/tests/error/error_handling_test.cu
+++ b/cpp/tests/error/error_handling_test.cu
@@ -36,16 +36,16 @@ TEST(ExpectsTest, TryCatch)
 
 TEST(CudaTryTest, Error)
 {
-  CUDART_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorLaunchFailure),
-                              "cudaErrorLaunchFailure unspecified launch failure");
+  CUDA_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorLaunchFailure),
+                            "cudaErrorLaunchFailure unspecified launch failure");
 }
 
 TEST(CudaTryTest, Success) { EXPECT_NO_THROW(CUDF_CUDA_TRY(cudaSuccess)); }
 
 TEST(CudaTryTest, TryCatch)
 {
-  CUDART_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorMemoryAllocation),
-                              "cudaErrorMemoryAllocation out of memory");
+  CUDA_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorMemoryAllocation),
+                            "cudaErrorMemoryAllocation out of memory");
 }
 
 TEST(StreamCheck, success) { EXPECT_NO_THROW(CUDF_CHECK_CUDA(0)); }
@@ -68,7 +68,7 @@ TEST(StreamCheck, FailedKernel)
 #ifdef NDEBUG
   stream.synchronize();
 #endif
-  EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cudart_error);
+  EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error);
 }
 
 TEST(StreamCheck, CatchFailedKernel)
@@ -79,9 +79,9 @@ TEST(StreamCheck, CatchFailedKernel)
 #ifndef NDEBUG
   stream.synchronize();
 #endif
-  CUDART_EXPECT_THROW_MESSAGE(CUDF_CHECK_CUDA(stream.value()),
-                              "cudaErrorInvalidConfiguration "
-                              "invalid configuration argument");
+  CUDA_EXPECT_THROW_MESSAGE(CUDF_CHECK_CUDA(stream.value()),
+                            "cudaErrorInvalidConfiguration "
+                            "invalid configuration argument");
 }
 
 #ifndef NDEBUG

From 413923997e6d4a7d1c2709ca17ec3db0d3014617 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Wed, 13 Apr 2022 17:12:36 +0800
Subject: [PATCH 06/11] with JNI

---
 cpp/include/cudf/utilities/error.hpp          |   4 -
 .../java/ai/rapids/cudf/CudaException.java    | 161 +++++++++++++++++-
 java/src/main/native/include/jni_utils.hpp    |  73 ++++----
 java/src/main/native/src/CudaJni.cpp          |   4 +-
 java/src/main/native/src/RmmJni.cpp           |   6 +-
 .../test/java/ai/rapids/cudf/CudaTest.java    |  17 +-
 6 files changed, 209 insertions(+), 56 deletions(-)

diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp
index 8249f20f66b..07874ae6ece 100644
--- a/cpp/include/cudf/utilities/error.hpp
+++ b/cpp/include/cudf/utilities/error.hpp
@@ -58,10 +58,6 @@ struct cuda_error : public std::runtime_error {
   cudaError_t _cudaError;
 };
 
-struct cudart_error : public cuda_error {
-  using cuda_error::cuda_error;
-};
-
 struct fatal_cuda_error : public cuda_error {
   using cuda_error::cuda_error;
 };
diff --git a/java/src/main/java/ai/rapids/cudf/CudaException.java b/java/src/main/java/ai/rapids/cudf/CudaException.java
index 2d862b47ef8..2cad2c09979 100755
--- a/java/src/main/java/ai/rapids/cudf/CudaException.java
+++ b/java/src/main/java/ai/rapids/cudf/CudaException.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,9 +30,168 @@
 public class CudaException extends RuntimeException {
   CudaException(String message) {
     super(message);
+    isFatal = message.startsWith("Fatal");
+    cudaError = extractCudaError(message);
   }
 
   CudaException(String message, Throwable cause) {
     super(message, cause);
+    isFatal = message.startsWith("Fatal");
+    cudaError = extractCudaError(message);
+  }
+
+  /**
+   * Returns whether this CudaError is fatal or not.
+   *
+   * Fatal errors leave the process in an inconsistent state and any further CUDA work will return
+   * the same error. To continue using CUDA, the process must be terminated and relaunched.
+   */
+  public boolean isFatal() {
+    return isFatal;
+  }
+
+  public final CudaError cudaError;
+
+  private final boolean isFatal;
+
+  private static CudaError extractCudaError(String msg) {
+    int startIdx = msg.indexOf('[');
+    int endIdx = msg.indexOf(']');
+    return CudaError.valueOf(msg.substring(startIdx + 1, endIdx));
+  }
+
+  /**
+   * The Java mirror of cudaError, which facilities the tracking of CUDA errors in JVM.
+   */
+  public enum CudaError {
+    cudaErrorInvalidValue(1),
+    cudaErrorMemoryAllocation(2),
+    cudaErrorInitializationError(3),
+    cudaErrorCudartUnloading(4),
+    cudaErrorProfilerDisabled(5),
+    cudaErrorProfilerNotInitialized(6),
+    cudaErrorProfilerAlreadyStarted(7),
+    cudaErrorProfilerAlreadyStopped(8),
+    cudaErrorInvalidConfiguration(9),
+    cudaErrorInvalidPitchValue(12),
+    cudaErrorInvalidSymbol(13),
+    cudaErrorInvalidHostPointer(16),
+    cudaErrorInvalidDevicePointer(17),
+    cudaErrorInvalidTexture(18),
+    cudaErrorInvalidTextureBinding(19),
+    cudaErrorInvalidChannelDescriptor(20),
+    cudaErrorInvalidMemcpyDirection(21),
+    cudaErrorAddressOfConstant(22),
+    cudaErrorTextureFetchFailed(23),
+    cudaErrorTextureNotBound(24),
+    cudaErrorSynchronizationError(25),
+    cudaErrorInvalidFilterSetting(26),
+    cudaErrorInvalidNormSetting(27),
+    cudaErrorMixedDeviceExecution(28),
+    cudaErrorNotYetImplemented(31),
+    cudaErrorMemoryValueTooLarge(32),
+    cudaErrorStubLibrary(34),
+    cudaErrorInsufficientDriver(35),
+    cudaErrorCallRequiresNewerDriver(36),
+    cudaErrorInvalidSurface(37),
+    cudaErrorDuplicateVariableName(43),
+    cudaErrorDuplicateTextureName(44),
+    cudaErrorDuplicateSurfaceName(45),
+    cudaErrorDevicesUnavailable(46),
+    cudaErrorIncompatibleDriverContext(49),
+    cudaErrorMissingConfiguration(52),
+    cudaErrorPriorLaunchFailure(53),
+    cudaErrorLaunchMaxDepthExceeded(65),
+    cudaErrorLaunchFileScopedTex(66),
+    cudaErrorLaunchFileScopedSurf(67),
+    cudaErrorSyncDepthExceeded(68),
+    cudaErrorLaunchPendingCountExceeded(69),
+    cudaErrorInvalidDeviceFunction(98),
+    cudaErrorNoDevice(100),
+    cudaErrorInvalidDevice(101),
+    cudaErrorDeviceNotLicensed(102),
+    cudaErrorSoftwareValidityNotEstablished(103),
+    cudaErrorStartupFailure(127),
+    cudaErrorInvalidKernelImage(200),
+    cudaErrorDeviceUninitialized(201),
+    cudaErrorMapBufferObjectFailed(205),
+    cudaErrorUnmapBufferObjectFailed(206),
+    cudaErrorArrayIsMapped(207),
+    cudaErrorAlreadyMapped(208),
+    cudaErrorNoKernelImageForDevice(209),
+    cudaErrorAlreadyAcquired(210),
+    cudaErrorNotMapped(211),
+    cudaErrorNotMappedAsArray(212),
+    cudaErrorNotMappedAsPointer(213),
+    cudaErrorECCUncorrectable(214),
+    cudaErrorUnsupportedLimit(215),
+    cudaErrorDeviceAlreadyInUse(216),
+    cudaErrorPeerAccessUnsupported(217),
+    cudaErrorInvalidPtx(218),
+    cudaErrorInvalidGraphicsContext(219),
+    cudaErrorNvlinkUncorrectable(220),
+    cudaErrorJitCompilerNotFound(221),
+    cudaErrorUnsupportedPtxVersion(222),
+    cudaErrorJitCompilationDisabled(223),
+    cudaErrorUnsupportedExecAffinity(224),
+    cudaErrorInvalidSource(300),
+    cudaErrorFileNotFound(301),
+    cudaErrorSharedObjectSymbolNotFound(302),
+    cudaErrorSharedObjectInitFailed(303),
+    cudaErrorOperatingSystem(304),
+    cudaErrorInvalidResourceHandle(400),
+    cudaErrorIllegalState(401),
+    cudaErrorSymbolNotFound(500),
+    cudaErrorNotReady(600),
+    cudaErrorIllegalAddress(700),
+    cudaErrorLaunchOutOfResources(701),
+    cudaErrorLaunchTimeout(702),
+    cudaErrorLaunchIncompatibleTexturing(703),
+    cudaErrorPeerAccessAlreadyEnabled(704),
+    cudaErrorPeerAccessNotEnabled(705),
+    cudaErrorSetOnActiveProcess(708),
+    cudaErrorContextIsDestroyed(709),
+    cudaErrorAssert(710),
+    cudaErrorTooManyPeers(711),
+    cudaErrorHostMemoryAlreadyRegistered(712),
+    cudaErrorHostMemoryNotRegistered(713),
+    cudaErrorHardwareStackError(714),
+    cudaErrorIllegalInstruction(715),
+    cudaErrorMisalignedAddress(716),
+    cudaErrorInvalidAddressSpace(717),
+    cudaErrorInvalidPc(718),
+    cudaErrorLaunchFailure(719),
+    cudaErrorCooperativeLaunchTooLarge(720),
+    cudaErrorNotPermitted(800),
+    cudaErrorNotSupported(801),
+    cudaErrorSystemNotReady(802),
+    cudaErrorSystemDriverMismatch(803),
+    cudaErrorCompatNotSupportedOnDevice(804),
+    cudaErrorMpsConnectionFailed(805),
+    cudaErrorMpsRpcFailure(806),
+    cudaErrorMpsServerNotReady(807),
+    cudaErrorMpsMaxClientsReached(808),
+    cudaErrorMpsMaxConnectionsReached(809),
+    cudaErrorStreamCaptureUnsupported(900),
+    cudaErrorStreamCaptureInvalidated(901),
+    cudaErrorStreamCaptureMerge(902),
+    cudaErrorStreamCaptureUnmatched(903),
+    cudaErrorStreamCaptureUnjoined(904),
+    cudaErrorStreamCaptureIsolation(905),
+    cudaErrorStreamCaptureImplicit(906),
+    cudaErrorCapturedEvent(907),
+    cudaErrorStreamCaptureWrongThread(908),
+    cudaErrorTimeout(909),
+    cudaErrorGraphExecUpdateFailure(910),
+    cudaErrorExternalDevice(911),
+    cudaErrorUnknown(999),
+    cudaErrorApiFailureBase(10000);
+
+    final int code;
+
+    CudaError(int errorCode) {
+      this.code = errorCode;
+    }
+
   }
 }
diff --git a/java/src/main/native/include/jni_utils.hpp b/java/src/main/native/include/jni_utils.hpp
index a45716a89b3..ebaa1e90b5e 100644
--- a/java/src/main/native/include/jni_utils.hpp
+++ b/java/src/main/native/include/jni_utils.hpp
@@ -733,41 +733,6 @@ class native_jstringArray {
   }
 };
 
-/**
- * @brief create a cuda exception from a given cudaError_t
- */
-inline jthrowable cuda_exception(JNIEnv *const env, cudaError_t status, jthrowable cause = NULL) {
-  jclass ex_class = env->FindClass(cudf::jni::CUDA_ERROR_CLASS);
-  if (ex_class == NULL) {
-    return NULL;
-  }
-  jmethodID ctor_id =
-      env->GetMethodID(ex_class, "<init>", "(Ljava/lang/String;Ljava/lang/Throwable;)V");
-  if (ctor_id == NULL) {
-    return NULL;
-  }
-
-  jstring msg = env->NewStringUTF(cudaGetErrorString(status));
-  if (msg == NULL) {
-    return NULL;
-  }
-
-  jobject ret = env->NewObject(ex_class, ctor_id, msg, cause);
-  return (jthrowable)ret;
-}
-
-inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) {
-  if (cudaSuccess != cuda_status) {
-    // Clear the last error so it does not propagate.
-    cudaGetLastError();
-    jthrowable jt = cuda_exception(env, cuda_status);
-    if (jt != NULL) {
-      env->Throw(jt);
-      throw jni_exception("CUDA ERROR");
-    }
-  }
-}
-
 } // namespace jni
 } // namespace cudf
 
@@ -790,19 +755,35 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) {
     JNI_THROW_NEW(env, class_name, message, ret_val)                                               \
   }
 
+// Throw a new exception only if one is not pending then always return with the specified value
+#define JNI_CHECK_THROW_NEW_CUDA_ERROR(env, e, fatal, ret_val)                                     \
+  do {                                                                                             \
+    if (env->ExceptionOccurred()) {                                                                \
+      return ret_val;                                                                              \
+    }                                                                                              \
+    jclass ex_class = env->FindClass(cudf::jni::CUDA_ERROR_CLASS);                                 \
+    const char *e_name = cudaGetErrorName(e.error_code());                                         \
+    std::string what = std::string(fatal ? "Fatal CUDA ERROR [" : "CUDA ERROR [") + e_name +       \
+                       "]: " + (e.what() == nullptr ? "" : e.what());                              \
+    env->ThrowNew(ex_class, what.c_str());                                                         \
+    return ret_val;                                                                                \
+  } while (0)
+
 #define JNI_CUDA_TRY(env, ret_val, call)                                                           \
-  {                                                                                                \
+  do {                                                                                             \
     cudaError_t internal_cuda_status = (call);                                                     \
     if (cudaSuccess != internal_cuda_status) {                                                     \
-      /* Clear the last error so it does not propagate.*/                                          \
-      cudaGetLastError();                                                                          \
-      jthrowable jt = cudf::jni::cuda_exception(env, internal_cuda_status);                        \
-      if (jt != NULL) {                                                                            \
-        env->Throw(jt);                                                                            \
-      }                                                                                            \
+      cudf::detail::throw_cuda_error(internal_cuda_status, __FILE__, __LINE__);                    \
       return ret_val;                                                                              \
     }                                                                                              \
-  }
+  } while (0)
+
+#define JNI_CUDA_CHECK(env, cuda_status)                                                           \
+  do {                                                                                             \
+    if (cudaSuccess != cuda_status) {                                                              \
+      cudf::detail::throw_cuda_error(cuda_status, __FILE__, __LINE__);                             \
+    }                                                                                              \
+  } while (0)
 
 #define JNI_NULL_CHECK(env, obj, error_msg, ret_val)                                               \
   {                                                                                                \
@@ -831,6 +812,12 @@ inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) {
         std::string("Could not allocate native memory: ") + (e.what() == nullptr ? "" : e.what()); \
     JNI_CHECK_THROW_NEW(env, cudf::jni::OOM_CLASS, what.c_str(), ret_val);                         \
   }                                                                                                \
+  catch (const cudf::cuda_error &e) {                                                              \
+    JNI_CHECK_THROW_NEW_CUDA_ERROR(env, e, false, ret_val);                                        \
+  }                                                                                                \
+  catch (const cudf::fatal_cuda_error &e) {                                                        \
+    JNI_CHECK_THROW_NEW_CUDA_ERROR(env, e, true, ret_val);                                         \
+  }                                                                                                \
   catch (const std::exception &e) {                                                                \
     /* If jni_exception caught then a Java exception is pending and this will not overwrite it. */ \
     JNI_CHECK_THROW_NEW(env, class_name, e.what(), ret_val);                                       \
diff --git a/java/src/main/native/src/CudaJni.cpp b/java/src/main/native/src/CudaJni.cpp
index 9862c3bface..caa48b00e77 100644
--- a/java/src/main/native/src/CudaJni.cpp
+++ b/java/src/main/native/src/CudaJni.cpp
@@ -44,7 +44,7 @@ void auto_set_device(JNIEnv *env) {
   if (Cudf_device != cudaInvalidDeviceId) {
     if (Thread_device != Cudf_device) {
       cudaError_t cuda_status = cudaSetDevice(Cudf_device);
-      jni_cuda_check(env, cuda_status);
+      JNI_CUDA_CHECK(env, cuda_status);
       Thread_device = Cudf_device;
     }
   }
@@ -53,7 +53,7 @@ void auto_set_device(JNIEnv *env) {
 /** Fills all the bytes in the buffer 'buf' with 'value'. */
 void device_memset_async(JNIEnv *env, rmm::device_buffer &buf, char value) {
   cudaError_t cuda_status = cudaMemsetAsync((void *)buf.data(), value, buf.size());
-  jni_cuda_check(env, cuda_status);
+  JNI_CUDA_CHECK(env, cuda_status);
 }
 
 } // namespace jni
diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
index ce3e6ffb285..41f1c23ac11 100644
--- a/java/src/main/native/src/RmmJni.cpp
+++ b/java/src/main/native/src/RmmJni.cpp
@@ -327,11 +327,9 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j
                                                                   jstring jpath, jlong pool_size) {
   try {
     // make sure the CUDA device is setup in the context
-    cudaError_t cuda_status = cudaFree(0);
-    cudf::jni::jni_cuda_check(env, cuda_status);
+    JNI_CUDA_CHECK(env, cudaFree(0));
     int device_id;
-    cuda_status = cudaGetDevice(&device_id);
-    cudf::jni::jni_cuda_check(env, cuda_status);
+    JNI_CUDA_CHECK(env, cudaGetDevice(&device_id));
 
     bool use_pool_alloc = allocation_mode & 1;
     bool use_managed_mem = allocation_mode & 2;
diff --git a/java/src/test/java/ai/rapids/cudf/CudaTest.java b/java/src/test/java/ai/rapids/cudf/CudaTest.java
index 8905c2edd56..f5af2b02fc5 100644
--- a/java/src/test/java/ai/rapids/cudf/CudaTest.java
+++ b/java/src/test/java/ai/rapids/cudf/CudaTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@
 
 import org.junit.jupiter.api.Test;
 
-import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class CudaTest {
 
@@ -32,4 +32,17 @@ public void testGetCudaRuntimeInfo() {
     assertEquals(Cuda.getNativeComputeMode(), Cuda.getComputeMode().nativeId);
   }
 
+  @Test
+  public void testCudaException() {
+    assertThrows(CudaException.class, () -> {
+          try {
+            Cuda.memset(Long.MAX_VALUE, (byte) 0, 1024);
+          } catch (CudaException ex) {
+            assertEquals(CudaException.CudaError.cudaErrorInvalidValue, ex.cudaError);
+            assertFalse(ex.isFatal());
+            throw ex;
+          }
+        }
+    );
+  }
 }

From 3209291813e6c0698a6cbf942c87defe17138462 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Wed, 13 Apr 2022 17:21:18 +0800
Subject: [PATCH 07/11] fix

---
 java/src/main/native/src/CudaJni.cpp | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/java/src/main/native/src/CudaJni.cpp b/java/src/main/native/src/CudaJni.cpp
index caa48b00e77..3af58605864 100644
--- a/java/src/main/native/src/CudaJni.cpp
+++ b/java/src/main/native/src/CudaJni.cpp
@@ -41,19 +41,23 @@ void set_cudf_device(int device) {
  * is using the same device.
  */
 void auto_set_device(JNIEnv *env) {
-  if (Cudf_device != cudaInvalidDeviceId) {
-    if (Thread_device != Cudf_device) {
-      cudaError_t cuda_status = cudaSetDevice(Cudf_device);
-      JNI_CUDA_CHECK(env, cuda_status);
-      Thread_device = Cudf_device;
+  try {
+    if (Cudf_device != cudaInvalidDeviceId) {
+      if (Thread_device != Cudf_device) {
+        JNI_CUDA_CHECK(env, cudaSetDevice(Cudf_device));
+        Thread_device = Cudf_device;
+      }
     }
   }
+  CATCH_STD(env, );
 }
 
 /** Fills all the bytes in the buffer 'buf' with 'value'. */
 void device_memset_async(JNIEnv *env, rmm::device_buffer &buf, char value) {
-  cudaError_t cuda_status = cudaMemsetAsync((void *)buf.data(), value, buf.size());
-  JNI_CUDA_CHECK(env, cuda_status);
+  try {
+    JNI_CUDA_CHECK(env, cudaMemsetAsync((void *)buf.data(), value, buf.size()));
+  }
+  CATCH_STD(env, );
 }
 
 } // namespace jni

From 5a45016c1225e5e1a414043f9101caa1c8da5ab4 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Wed, 13 Apr 2022 17:34:23 +0800
Subject: [PATCH 08/11] revert JNI

---
 .../java/ai/rapids/cudf/CudaException.java    | 161 +-----------------
 java/src/main/native/include/jni_utils.hpp    |  73 ++++----
 java/src/main/native/src/CudaJni.cpp          |  18 +-
 java/src/main/native/src/RmmJni.cpp           |   6 +-
 4 files changed, 55 insertions(+), 203 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/CudaException.java b/java/src/main/java/ai/rapids/cudf/CudaException.java
index 2cad2c09979..2d862b47ef8 100755
--- a/java/src/main/java/ai/rapids/cudf/CudaException.java
+++ b/java/src/main/java/ai/rapids/cudf/CudaException.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,168 +30,9 @@
 public class CudaException extends RuntimeException {
   CudaException(String message) {
     super(message);
-    isFatal = message.startsWith("Fatal");
-    cudaError = extractCudaError(message);
   }
 
   CudaException(String message, Throwable cause) {
     super(message, cause);
-    isFatal = message.startsWith("Fatal");
-    cudaError = extractCudaError(message);
-  }
-
-  /**
-   * Returns whether this CudaError is fatal or not.
-   *
-   * Fatal errors leave the process in an inconsistent state and any further CUDA work will return
-   * the same error. To continue using CUDA, the process must be terminated and relaunched.
-   */
-  public boolean isFatal() {
-    return isFatal;
-  }
-
-  public final CudaError cudaError;
-
-  private final boolean isFatal;
-
-  private static CudaError extractCudaError(String msg) {
-    int startIdx = msg.indexOf('[');
-    int endIdx = msg.indexOf(']');
-    return CudaError.valueOf(msg.substring(startIdx + 1, endIdx));
-  }
-
-  /**
-   * The Java mirror of cudaError, which facilities the tracking of CUDA errors in JVM.
-   */
-  public enum CudaError {
-    cudaErrorInvalidValue(1),
-    cudaErrorMemoryAllocation(2),
-    cudaErrorInitializationError(3),
-    cudaErrorCudartUnloading(4),
-    cudaErrorProfilerDisabled(5),
-    cudaErrorProfilerNotInitialized(6),
-    cudaErrorProfilerAlreadyStarted(7),
-    cudaErrorProfilerAlreadyStopped(8),
-    cudaErrorInvalidConfiguration(9),
-    cudaErrorInvalidPitchValue(12),
-    cudaErrorInvalidSymbol(13),
-    cudaErrorInvalidHostPointer(16),
-    cudaErrorInvalidDevicePointer(17),
-    cudaErrorInvalidTexture(18),
-    cudaErrorInvalidTextureBinding(19),
-    cudaErrorInvalidChannelDescriptor(20),
-    cudaErrorInvalidMemcpyDirection(21),
-    cudaErrorAddressOfConstant(22),
-    cudaErrorTextureFetchFailed(23),
-    cudaErrorTextureNotBound(24),
-    cudaErrorSynchronizationError(25),
-    cudaErrorInvalidFilterSetting(26),
-    cudaErrorInvalidNormSetting(27),
-    cudaErrorMixedDeviceExecution(28),
-    cudaErrorNotYetImplemented(31),
-    cudaErrorMemoryValueTooLarge(32),
-    cudaErrorStubLibrary(34),
-    cudaErrorInsufficientDriver(35),
-    cudaErrorCallRequiresNewerDriver(36),
-    cudaErrorInvalidSurface(37),
-    cudaErrorDuplicateVariableName(43),
-    cudaErrorDuplicateTextureName(44),
-    cudaErrorDuplicateSurfaceName(45),
-    cudaErrorDevicesUnavailable(46),
-    cudaErrorIncompatibleDriverContext(49),
-    cudaErrorMissingConfiguration(52),
-    cudaErrorPriorLaunchFailure(53),
-    cudaErrorLaunchMaxDepthExceeded(65),
-    cudaErrorLaunchFileScopedTex(66),
-    cudaErrorLaunchFileScopedSurf(67),
-    cudaErrorSyncDepthExceeded(68),
-    cudaErrorLaunchPendingCountExceeded(69),
-    cudaErrorInvalidDeviceFunction(98),
-    cudaErrorNoDevice(100),
-    cudaErrorInvalidDevice(101),
-    cudaErrorDeviceNotLicensed(102),
-    cudaErrorSoftwareValidityNotEstablished(103),
-    cudaErrorStartupFailure(127),
-    cudaErrorInvalidKernelImage(200),
-    cudaErrorDeviceUninitialized(201),
-    cudaErrorMapBufferObjectFailed(205),
-    cudaErrorUnmapBufferObjectFailed(206),
-    cudaErrorArrayIsMapped(207),
-    cudaErrorAlreadyMapped(208),
-    cudaErrorNoKernelImageForDevice(209),
-    cudaErrorAlreadyAcquired(210),
-    cudaErrorNotMapped(211),
-    cudaErrorNotMappedAsArray(212),
-    cudaErrorNotMappedAsPointer(213),
-    cudaErrorECCUncorrectable(214),
-    cudaErrorUnsupportedLimit(215),
-    cudaErrorDeviceAlreadyInUse(216),
-    cudaErrorPeerAccessUnsupported(217),
-    cudaErrorInvalidPtx(218),
-    cudaErrorInvalidGraphicsContext(219),
-    cudaErrorNvlinkUncorrectable(220),
-    cudaErrorJitCompilerNotFound(221),
-    cudaErrorUnsupportedPtxVersion(222),
-    cudaErrorJitCompilationDisabled(223),
-    cudaErrorUnsupportedExecAffinity(224),
-    cudaErrorInvalidSource(300),
-    cudaErrorFileNotFound(301),
-    cudaErrorSharedObjectSymbolNotFound(302),
-    cudaErrorSharedObjectInitFailed(303),
-    cudaErrorOperatingSystem(304),
-    cudaErrorInvalidResourceHandle(400),
-    cudaErrorIllegalState(401),
-    cudaErrorSymbolNotFound(500),
-    cudaErrorNotReady(600),
-    cudaErrorIllegalAddress(700),
-    cudaErrorLaunchOutOfResources(701),
-    cudaErrorLaunchTimeout(702),
-    cudaErrorLaunchIncompatibleTexturing(703),
-    cudaErrorPeerAccessAlreadyEnabled(704),
-    cudaErrorPeerAccessNotEnabled(705),
-    cudaErrorSetOnActiveProcess(708),
-    cudaErrorContextIsDestroyed(709),
-    cudaErrorAssert(710),
-    cudaErrorTooManyPeers(711),
-    cudaErrorHostMemoryAlreadyRegistered(712),
-    cudaErrorHostMemoryNotRegistered(713),
-    cudaErrorHardwareStackError(714),
-    cudaErrorIllegalInstruction(715),
-    cudaErrorMisalignedAddress(716),
-    cudaErrorInvalidAddressSpace(717),
-    cudaErrorInvalidPc(718),
-    cudaErrorLaunchFailure(719),
-    cudaErrorCooperativeLaunchTooLarge(720),
-    cudaErrorNotPermitted(800),
-    cudaErrorNotSupported(801),
-    cudaErrorSystemNotReady(802),
-    cudaErrorSystemDriverMismatch(803),
-    cudaErrorCompatNotSupportedOnDevice(804),
-    cudaErrorMpsConnectionFailed(805),
-    cudaErrorMpsRpcFailure(806),
-    cudaErrorMpsServerNotReady(807),
-    cudaErrorMpsMaxClientsReached(808),
-    cudaErrorMpsMaxConnectionsReached(809),
-    cudaErrorStreamCaptureUnsupported(900),
-    cudaErrorStreamCaptureInvalidated(901),
-    cudaErrorStreamCaptureMerge(902),
-    cudaErrorStreamCaptureUnmatched(903),
-    cudaErrorStreamCaptureUnjoined(904),
-    cudaErrorStreamCaptureIsolation(905),
-    cudaErrorStreamCaptureImplicit(906),
-    cudaErrorCapturedEvent(907),
-    cudaErrorStreamCaptureWrongThread(908),
-    cudaErrorTimeout(909),
-    cudaErrorGraphExecUpdateFailure(910),
-    cudaErrorExternalDevice(911),
-    cudaErrorUnknown(999),
-    cudaErrorApiFailureBase(10000);
-
-    final int code;
-
-    CudaError(int errorCode) {
-      this.code = errorCode;
-    }
-
   }
 }
diff --git a/java/src/main/native/include/jni_utils.hpp b/java/src/main/native/include/jni_utils.hpp
index ebaa1e90b5e..a45716a89b3 100644
--- a/java/src/main/native/include/jni_utils.hpp
+++ b/java/src/main/native/include/jni_utils.hpp
@@ -733,6 +733,41 @@ class native_jstringArray {
   }
 };
 
+/**
+ * @brief create a cuda exception from a given cudaError_t
+ */
+inline jthrowable cuda_exception(JNIEnv *const env, cudaError_t status, jthrowable cause = NULL) {
+  jclass ex_class = env->FindClass(cudf::jni::CUDA_ERROR_CLASS);
+  if (ex_class == NULL) {
+    return NULL;
+  }
+  jmethodID ctor_id =
+      env->GetMethodID(ex_class, "<init>", "(Ljava/lang/String;Ljava/lang/Throwable;)V");
+  if (ctor_id == NULL) {
+    return NULL;
+  }
+
+  jstring msg = env->NewStringUTF(cudaGetErrorString(status));
+  if (msg == NULL) {
+    return NULL;
+  }
+
+  jobject ret = env->NewObject(ex_class, ctor_id, msg, cause);
+  return (jthrowable)ret;
+}
+
+inline void jni_cuda_check(JNIEnv *const env, cudaError_t cuda_status) {
+  if (cudaSuccess != cuda_status) {
+    // Clear the last error so it does not propagate.
+    cudaGetLastError();
+    jthrowable jt = cuda_exception(env, cuda_status);
+    if (jt != NULL) {
+      env->Throw(jt);
+      throw jni_exception("CUDA ERROR");
+    }
+  }
+}
+
 } // namespace jni
 } // namespace cudf
 
@@ -755,35 +790,19 @@ class native_jstringArray {
     JNI_THROW_NEW(env, class_name, message, ret_val)                                               \
   }
 
-// Throw a new exception only if one is not pending then always return with the specified value
-#define JNI_CHECK_THROW_NEW_CUDA_ERROR(env, e, fatal, ret_val)                                     \
-  do {                                                                                             \
-    if (env->ExceptionOccurred()) {                                                                \
-      return ret_val;                                                                              \
-    }                                                                                              \
-    jclass ex_class = env->FindClass(cudf::jni::CUDA_ERROR_CLASS);                                 \
-    const char *e_name = cudaGetErrorName(e.error_code());                                         \
-    std::string what = std::string(fatal ? "Fatal CUDA ERROR [" : "CUDA ERROR [") + e_name +       \
-                       "]: " + (e.what() == nullptr ? "" : e.what());                              \
-    env->ThrowNew(ex_class, what.c_str());                                                         \
-    return ret_val;                                                                                \
-  } while (0)
-
 #define JNI_CUDA_TRY(env, ret_val, call)                                                           \
-  do {                                                                                             \
+  {                                                                                                \
     cudaError_t internal_cuda_status = (call);                                                     \
     if (cudaSuccess != internal_cuda_status) {                                                     \
-      cudf::detail::throw_cuda_error(internal_cuda_status, __FILE__, __LINE__);                    \
+      /* Clear the last error so it does not propagate.*/                                          \
+      cudaGetLastError();                                                                          \
+      jthrowable jt = cudf::jni::cuda_exception(env, internal_cuda_status);                        \
+      if (jt != NULL) {                                                                            \
+        env->Throw(jt);                                                                            \
+      }                                                                                            \
       return ret_val;                                                                              \
     }                                                                                              \
-  } while (0)
-
-#define JNI_CUDA_CHECK(env, cuda_status)                                                           \
-  do {                                                                                             \
-    if (cudaSuccess != cuda_status) {                                                              \
-      cudf::detail::throw_cuda_error(cuda_status, __FILE__, __LINE__);                             \
-    }                                                                                              \
-  } while (0)
+  }
 
 #define JNI_NULL_CHECK(env, obj, error_msg, ret_val)                                               \
   {                                                                                                \
@@ -812,12 +831,6 @@ class native_jstringArray {
         std::string("Could not allocate native memory: ") + (e.what() == nullptr ? "" : e.what()); \
     JNI_CHECK_THROW_NEW(env, cudf::jni::OOM_CLASS, what.c_str(), ret_val);                         \
   }                                                                                                \
-  catch (const cudf::cuda_error &e) {                                                              \
-    JNI_CHECK_THROW_NEW_CUDA_ERROR(env, e, false, ret_val);                                        \
-  }                                                                                                \
-  catch (const cudf::fatal_cuda_error &e) {                                                        \
-    JNI_CHECK_THROW_NEW_CUDA_ERROR(env, e, true, ret_val);                                         \
-  }                                                                                                \
   catch (const std::exception &e) {                                                                \
     /* If jni_exception caught then a Java exception is pending and this will not overwrite it. */ \
     JNI_CHECK_THROW_NEW(env, class_name, e.what(), ret_val);                                       \
diff --git a/java/src/main/native/src/CudaJni.cpp b/java/src/main/native/src/CudaJni.cpp
index 3af58605864..9862c3bface 100644
--- a/java/src/main/native/src/CudaJni.cpp
+++ b/java/src/main/native/src/CudaJni.cpp
@@ -41,23 +41,19 @@ void set_cudf_device(int device) {
  * is using the same device.
  */
 void auto_set_device(JNIEnv *env) {
-  try {
-    if (Cudf_device != cudaInvalidDeviceId) {
-      if (Thread_device != Cudf_device) {
-        JNI_CUDA_CHECK(env, cudaSetDevice(Cudf_device));
-        Thread_device = Cudf_device;
-      }
+  if (Cudf_device != cudaInvalidDeviceId) {
+    if (Thread_device != Cudf_device) {
+      cudaError_t cuda_status = cudaSetDevice(Cudf_device);
+      jni_cuda_check(env, cuda_status);
+      Thread_device = Cudf_device;
     }
   }
-  CATCH_STD(env, );
 }
 
 /** Fills all the bytes in the buffer 'buf' with 'value'. */
 void device_memset_async(JNIEnv *env, rmm::device_buffer &buf, char value) {
-  try {
-    JNI_CUDA_CHECK(env, cudaMemsetAsync((void *)buf.data(), value, buf.size()));
-  }
-  CATCH_STD(env, );
+  cudaError_t cuda_status = cudaMemsetAsync((void *)buf.data(), value, buf.size());
+  jni_cuda_check(env, cuda_status);
 }
 
 } // namespace jni
diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
index 41f1c23ac11..ce3e6ffb285 100644
--- a/java/src/main/native/src/RmmJni.cpp
+++ b/java/src/main/native/src/RmmJni.cpp
@@ -327,9 +327,11 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j
                                                                   jstring jpath, jlong pool_size) {
   try {
     // make sure the CUDA device is setup in the context
-    JNI_CUDA_CHECK(env, cudaFree(0));
+    cudaError_t cuda_status = cudaFree(0);
+    cudf::jni::jni_cuda_check(env, cuda_status);
     int device_id;
-    JNI_CUDA_CHECK(env, cudaGetDevice(&device_id));
+    cuda_status = cudaGetDevice(&device_id);
+    cudf::jni::jni_cuda_check(env, cuda_status);
 
     bool use_pool_alloc = allocation_mode & 1;
     bool use_managed_mem = allocation_mode & 2;

From 56d00d71ab8fa6e166a34da3026e6043c68c4c15 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Wed, 13 Apr 2022 17:38:05 +0800
Subject: [PATCH 09/11] revert JNI

---
 java/src/test/java/ai/rapids/cudf/CudaTest.java | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/java/src/test/java/ai/rapids/cudf/CudaTest.java b/java/src/test/java/ai/rapids/cudf/CudaTest.java
index f5af2b02fc5..8905c2edd56 100644
--- a/java/src/test/java/ai/rapids/cudf/CudaTest.java
+++ b/java/src/test/java/ai/rapids/cudf/CudaTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@
 
 import org.junit.jupiter.api.Test;
 
-import static org.junit.jupiter.api.Assertions.*;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class CudaTest {
 
@@ -32,17 +32,4 @@ public void testGetCudaRuntimeInfo() {
     assertEquals(Cuda.getNativeComputeMode(), Cuda.getComputeMode().nativeId);
   }
 
-  @Test
-  public void testCudaException() {
-    assertThrows(CudaException.class, () -> {
-          try {
-            Cuda.memset(Long.MAX_VALUE, (byte) 0, 1024);
-          } catch (CudaException ex) {
-            assertEquals(CudaException.CudaError.cudaErrorInvalidValue, ex.cudaError);
-            assertFalse(ex.isFatal());
-            throw ex;
-          }
-        }
-    );
-  }
 }

From 2e5844cdf634f6d9f742f7669c33bba6f2a885c7 Mon Sep 17 00:00:00 2001
From: Alfred Xu <lovedreamf@gmail.com>
Date: Thu, 14 Apr 2022 09:55:06 +0800
Subject: [PATCH 10/11] Update cpp/include/cudf/utilities/error.hpp

Co-authored-by: Jake Hemstad <jhemstad@nvidia.com>
---
 cpp/include/cudf/utilities/error.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp
index 07874ae6ece..9cff693e375 100644
--- a/cpp/include/cudf/utilities/error.hpp
+++ b/cpp/include/cudf/utilities/error.hpp
@@ -121,7 +121,7 @@ inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int l
   auto const msg  = std::string{"CUDA error encountered at: " + std::string{file} + ":" +
                                std::to_string(line) + ": " + std::to_string(error) + " " +
                                cudaGetErrorName(error) + " " + cudaGetErrorString(error)};
-  // Calls cudaDeviceSynchronize to make sure that there is no other asynchronize error occurs
+  // Call cudaDeviceSynchronize to ensure `last` did not result from an asynchronous error. 
   // between two calls.
   if (error == last && last == cudaDeviceSynchronize()) {
     throw fatal_cuda_error{"Fatal " + msg, error};

From 53156c986eb84140571c9e9b5a8331545d865a0d Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Thu, 14 Apr 2022 10:01:17 +0800
Subject: [PATCH 11/11] fix

---
 cpp/include/cudf/utilities/error.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp
index 9cff693e375..8f6190bbaf7 100644
--- a/cpp/include/cudf/utilities/error.hpp
+++ b/cpp/include/cudf/utilities/error.hpp
@@ -121,7 +121,7 @@ inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int l
   auto const msg  = std::string{"CUDA error encountered at: " + std::string{file} + ":" +
                                std::to_string(line) + ": " + std::to_string(error) + " " +
                                cudaGetErrorName(error) + " " + cudaGetErrorString(error)};
-  // Call cudaDeviceSynchronize to ensure `last` did not result from an asynchronous error. 
+  // Call cudaDeviceSynchronize to ensure `last` did not result from an asynchronous error.
   // between two calls.
   if (error == last && last == cudaDeviceSynchronize()) {
     throw fatal_cuda_error{"Fatal " + msg, error};