From 5dd70174ac97a795675403a92c69862d1b6e0038 Mon Sep 17 00:00:00 2001
From: Matt Joux <mjoux@nvidia.com>
Date: Sat, 10 Jun 2023 05:39:17 +0200
Subject: [PATCH] [IMP] move core CUDA RT macros to cuda_rt_essentials.hpp
 (#1584)

The reasoning behind this PR is as follows:
for now, anyone wanting to use `RAFT_CUDA_TRY_NO_THROW` still needs to include `cudart_utils.hpp` which can be costly (compilation) due to the include of `memory_pool.hpp`.
By moving the macros to the essentials, we should not break anything for anyone, but allow anyone to improve compilation times by including the essentials only. At the same time, it should add minimal overhead to the compilation time of the essentials file since the pre-processor is (usually) fast compared to the rest of the compilation pipeline.

Authors:
  - Matt Joux (https://github.com/MatthiasKohl)

Approvers:
  - Allard Hendriksen (https://github.com/ahendriksen)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1584
---
 cpp/include/raft/util/cuda_rt_essentials.hpp | 37 ++++++++++++++++++++
 cpp/include/raft/util/cudart_utils.hpp       | 35 ------------------
 2 files changed, 37 insertions(+), 35 deletions(-)
diff --git a/cpp/include/raft/util/cuda_rt_essentials.hpp b/cpp/include/raft/util/cuda_rt_essentials.hpp
index e5f3af4e61..77612f97bc 100644
--- a/cpp/include/raft/util/cuda_rt_essentials.hpp
+++ b/cpp/include/raft/util/cuda_rt_essentials.hpp
@@ -23,6 +23,8 @@
 #include <cuda_runtime.h>
 #include <raft/core/error.hpp>
 
+#include <cstdio>
+
 namespace raft {
 
 /**
@@ -58,3 +60,38 @@ struct cuda_error : public raft::exception {
       throw raft::cuda_error(msg);                 \
     }                                              \
   } while (0)
+
+/**
+ * @brief Debug macro to check for CUDA errors
+ *
+ * In a non-release build, this macro will synchronize the specified stream
+ * before error checking. In both release and non-release builds, this macro
+ * checks for any pending CUDA errors from previous calls. If an error is
+ * reported, an exception is thrown detailing the CUDA error that occurred.
+ *
+ * The intent of this macro is to provide a mechanism for synchronous and
+ * deterministic execution for debugging asynchronous CUDA execution. It should
+ * be used after any asynchronous CUDA call, e.g., cudaMemcpyAsync, or an
+ * asynchronous kernel launch.
+ */
+#ifndef NDEBUG
+#define RAFT_CHECK_CUDA(stream) RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+#else
+#define RAFT_CHECK_CUDA(stream) RAFT_CUDA_TRY(cudaPeekAtLastError());
+#endif
+
+// /**
+//  * @brief check for cuda runtime API errors but log error instead of raising
+//  *        exception.
+//  */
+#define RAFT_CUDA_TRY_NO_THROW(call)                               \
+  do {                                                             \
+    cudaError_t const status = call;                               \
+    if (cudaSuccess != status) {                                   \
+      printf("CUDA call='%s' at file=%s line=%d failed with %s\n", \
+             #call,                                                \
+             __FILE__,                                             \
+             __LINE__,                                             \
+             cudaGetErrorString(status));                          \
+    }                                                              \
+  } while (0)
diff --git a/cpp/include/raft/util/cudart_utils.hpp b/cpp/include/raft/util/cudart_utils.hpp
index f3b083ac4a..743ffd743c 100644
--- a/cpp/include/raft/util/cudart_utils.hpp
+++ b/cpp/include/raft/util/cudart_utils.hpp
@@ -34,41 +34,6 @@
 #include <mutex>
 #include <string>
 
-/**
- * @brief Debug macro to check for CUDA errors
- *
- * In a non-release build, this macro will synchronize the specified stream
- * before error checking. In both release and non-release builds, this macro
- * checks for any pending CUDA errors from previous calls. If an error is
- * reported, an exception is thrown detailing the CUDA error that occurred.
- *
- * The intent of this macro is to provide a mechanism for synchronous and
- * deterministic execution for debugging asynchronous CUDA execution. It should
- * be used after any asynchronous CUDA call, e.g., cudaMemcpyAsync, or an
- * asynchronous kernel launch.
- */
-#ifndef NDEBUG
-#define RAFT_CHECK_CUDA(stream) RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-#else
-#define RAFT_CHECK_CUDA(stream) RAFT_CUDA_TRY(cudaPeekAtLastError());
-#endif
-
-// /**
-//  * @brief check for cuda runtime API errors but log error instead of raising
-//  *        exception.
-//  */
-#define RAFT_CUDA_TRY_NO_THROW(call)                               \
-  do {                                                             \
-    cudaError_t const status = call;                               \
-    if (cudaSuccess != status) {                                   \
-      printf("CUDA call='%s' at file=%s line=%d failed with %s\n", \
-             #call,                                                \
-             __FILE__,                                             \
-             __LINE__,                                             \
-             cudaGetErrorString(status));                          \
-    }                                                              \
-  } while (0)
-
 namespace raft {
 
 /** Helper method to get to know warp size in device code */