From a92846874481c947f345b2073b3dc7a58927788d Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 3 Jun 2020 11:06:20 -0400 Subject: [PATCH 01/28] copy error.hpp from cuDF, add license statement, and initial update --- cpp/include/raft/error.hpp | 124 +++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 cpp/include/raft/error.hpp diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp new file mode 100644 index 0000000000..a75d98cf4d --- /dev/null +++ b/cpp/include/raft/error.hpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + + +namespace raft { + +/** + * @brief Exception thrown when logical precondition is violated. + * + * This exception should not be thrown directly and is instead thrown by the + * RAFT_EXPECTS macro. + * + */ +struct logic_error : public std::logic_error { + logic_error(char const* const message) : std::logic_error(message) {} + + logic_error(std::string const& message) : std::logic_error(message) {} +}; + +/** + * @brief Exception thrown when a CUDA error is encountered. + */ +struct cuda_error : public std::runtime_error { + cuda_error(std::string const& message) : std::runtime_error(message) {} +}; + +} // namespace raft + +#define STRINGIFY_DETAIL(x) #x +#define RAFT_STRINGIFY(x) STRINGIFY_DETAIL(x) + +/** + * @brief Macro for checking (pre-)conditions that throws an exception when + * a condition is violated. + * + * @param[in] cond Expression that evaluates to true or false + * @param[in] reason String literal description of the reason that cond is + * expected to be true + * @throw raft::logic_error if the condition evaluates to false. + **/ +#define RAFT_EXPECTS(cond, reason) \ + (!!(cond)) ? static_cast(0) \ + : throw raft::logic_error("RAFT failure at: " __FILE__ \ + ":" RAFT_STRINGIFY(__LINE__) ": " reason) + +/** + * @brief Indicates that an erroneous code path has been taken. + * + * In host code, throws a `raft::logic_error`. + * + * @param[in] reason String literal description of the reason + **/ +#define RAFT_FAIL(reason) \ + throw raft::logic_error("RAFT failure at: " __FILE__ ":" RAFT_STRINGIFY(__LINE__) ": " reason) + +namespace raft { +namespace detail { + +inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int line) +{ + throw raft::cuda_error(std::string{"CUDA error encountered at: " + std::string{file} + ":" + + std::to_string(line) + ": " + std::to_string(error) + " " + + cudaGetErrorName(error) + " " + cudaGetErrorString(error)}); +} + +} // namespace detail +} // namespace raft + +/** + * @brief Error checking macro for CUDA runtime API functions. + * + * Invokes a CUDA runtime API function call, if the call does not return + * cudaSuccess, invokes cudaGetLastError() to clear the error and throws an + * exception detailing the CUDA error that occurred + * + **/ +#define CUDA_TRY(call) \ + do { \ + cudaError_t const status = (call); \ + if (cudaSuccess != status) { \ + cudaGetLastError(); \ + raft::detail::throw_cuda_error(status, __FILE__, __LINE__); \ + } \ + } while (0); + +/** + * @brief Debug macro to check for CUDA errors + * + * In a non-release build, this macro will synchronize the specified stream + * before error checking. In both release and non-release builds, this macro + * checks for any pending CUDA errors from previous calls. If an error is + * reported, an exception is thrown detailing the CUDA error that occurred. + * + * The intent of this macro is to provide a mechanism for synchronous and + * deterministic execution for debugging asynchronous CUDA execution. It should + * be used after any asynchronous CUDA call, e.g., cudaMemcpyAsync, or an + * asynchronous kernel launch. + * + **/ +#ifndef NDEBUG +#define CHECK_CUDA(stream) CUDA_TRY(cudaStreamSynchronize(stream)); +#else +#define CHECK_CUDA(stream) CUDA_TRY(cudaPeekAtLastError()); +#endif From 328462f58eb02098e4803ebf3225a4a5fa45aec4 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 3 Jun 2020 11:25:46 -0400 Subject: [PATCH 02/28] add CUML_EXPECTS, CUML_FAIL, CUGRAPH_EXPECTS, and CUGRAPH_FAIL --- cpp/include/raft/error.hpp | 48 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index a75d98cf4d..1bd74ec3c2 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -73,6 +73,54 @@ struct cuda_error : public std::runtime_error { #define RAFT_FAIL(reason) \ throw raft::logic_error("RAFT failure at: " __FILE__ ":" RAFT_STRINGIFY(__LINE__) ": " reason) +/** + * @brief Macro for checking (pre-)conditions that throws an exception when + * a condition is violated. + * + * @param[in] cond Expression that evaluates to true or false + * @param[in] reason String literal description of the reason that cond is + * expected to be true + * @throw raft::logic_error if the condition evaluates to false. + **/ +#define CUML_EXPECTS(cond, reason) \ + (!!(cond)) ? static_cast(0) \ + : throw raft::logic_error("cuML failure at: " __FILE__ \ + ":" RAFT_STRINGIFY(__LINE__) ": " reason) + +/** + * @brief Indicates that an erroneous code path has been taken. + * + * In host code, throws a `raft::logic_error`. + * + * @param[in] reason String literal description of the reason + **/ +#define CUML_FAIL(reason) \ + throw raft::logic_error("cuML failure at: " __FILE__ ":" RAFT_STRINGIFY(__LINE__) ": " reason) + +/** + * @brief Macro for checking (pre-)conditions that throws an exception when + * a condition is violated. + * + * @param[in] cond Expression that evaluates to true or false + * @param[in] reason String literal description of the reason that cond is + * expected to be true + * @throw raft::logic_error if the condition evaluates to false. + **/ +#define CUGRAPH_EXPECTS(cond, reason) \ + (!!(cond)) ? static_cast(0) \ + : throw raft::logic_error("cuGRAPH failure at: " __FILE__ \ + ":" RAFT_STRINGIFY(__LINE__) ": " reason) + +/** + * @brief Indicates that an erroneous code path has been taken. + * + * In host code, throws a `raft::logic_error`. + * + * @param[in] reason String literal description of the reason + **/ +#define CUGRAPH_FAIL(reason) \ + throw raft::logic_error("cuGRAPH failure at: " __FILE__ ":" RAFT_STRINGIFY(__LINE__) ": " reason) + namespace raft { namespace detail { From 187e12ab3a6b52f5106128afc52715eb3a8affe5 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 3 Jun 2020 11:33:19 -0400 Subject: [PATCH 03/28] add NCCL_TRY --- cpp/include/raft/error.hpp | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 1bd74ec3c2..4821a9761d 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -18,6 +18,8 @@ #include #include +#include + #include #include @@ -33,7 +35,6 @@ namespace raft { */ struct logic_error : public std::logic_error { logic_error(char const* const message) : std::logic_error(message) {} - logic_error(std::string const& message) : std::logic_error(message) {} }; @@ -41,9 +42,18 @@ struct logic_error : public std::logic_error { * @brief Exception thrown when a CUDA error is encountered. */ struct cuda_error : public std::runtime_error { + cuda_error(char const* const message) : std::runtime_error(message) {} cuda_error(std::string const& message) : std::runtime_error(message) {} }; +/** + * @brief Exception thrown when a NCCL error is encountered. + */ +struct nccl_error : public std::runtime_error { + nccl_error(char const* const message) : std::runtime_error(message) {} + nccl_error(std::string const& message) : std::runtime_error(message) {} +}; + } // namespace raft #define STRINGIFY_DETAIL(x) #x @@ -131,6 +141,13 @@ inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int l cudaGetErrorName(error) + " " + cudaGetErrorString(error)}); } +inline void throw_nccl_error(ncclResult_t error, const char* file, unsigned int line) { + throw cugraph::nccl_error( + std::string{"NCCL error encountered at: " + std::string{file} + ":" + + std::to_string(line) + ": " + std::to_string(error) + " " + + ncclGetErrorString(error)}); +} + } // namespace detail } // namespace raft @@ -170,3 +187,17 @@ inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int l #else #define CHECK_CUDA(stream) CUDA_TRY(cudaPeekAtLastError()); #endif + +/** + * @brief Error checking macro for NCCL runtime API functions. + * + * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an + * exception detailing the NCCL error that occurred + */ +#define NCCL_TRY(call) \ + do { \ + ncclResult_t const status = (call); \ + if (ncclSuccess != status) { \ + cugraph::detail::throw_nccl_error(status, __FILE__, __LINE__);\ + } \ + } while (0); From 4ce8f372b406d0ba23a9dcb153d06a4fae6d4594 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 3 Jun 2020 12:12:13 -0400 Subject: [PATCH 04/28] fix compile/clang-tidy errors --- cpp/include/raft/error.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 4821a9761d..9d32943fb5 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -34,24 +34,24 @@ namespace raft { * */ struct logic_error : public std::logic_error { - logic_error(char const* const message) : std::logic_error(message) {} - logic_error(std::string const& message) : std::logic_error(message) {} + explicit logic_error(char const* const message) : std::logic_error(message) {} + explicit logic_error(std::string const& message) : std::logic_error(message) {} }; /** * @brief Exception thrown when a CUDA error is encountered. */ struct cuda_error : public std::runtime_error { - cuda_error(char const* const message) : std::runtime_error(message) {} - cuda_error(std::string const& message) : std::runtime_error(message) {} + explicit cuda_error(char const* const message) : std::runtime_error(message) {} + explicit cuda_error(std::string const& message) : std::runtime_error(message) {} }; /** * @brief Exception thrown when a NCCL error is encountered. */ struct nccl_error : public std::runtime_error { - nccl_error(char const* const message) : std::runtime_error(message) {} - nccl_error(std::string const& message) : std::runtime_error(message) {} + explicit nccl_error(char const* const message) : std::runtime_error(message) {} + explicit nccl_error(std::string const& message) : std::runtime_error(message) {} }; } // namespace raft @@ -142,7 +142,7 @@ inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int l } inline void throw_nccl_error(ncclResult_t error, const char* file, unsigned int line) { - throw cugraph::nccl_error( + throw raft::nccl_error( std::string{"NCCL error encountered at: " + std::string{file} + ":" + std::to_string(line) + ": " + std::to_string(error) + " " + ncclGetErrorString(error)}); @@ -198,6 +198,6 @@ inline void throw_nccl_error(ncclResult_t error, const char* file, unsigned int do { \ ncclResult_t const status = (call); \ if (ncclSuccess != status) { \ - cugraph::detail::throw_nccl_error(status, __FILE__, __LINE__);\ + raft::detail::throw_nccl_error(status, __FILE__, __LINE__);\ } \ } while (0); From 086abd3f80d9dd8e2453b83591a056c0fc3e26fd Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 3 Jun 2020 12:31:38 -0400 Subject: [PATCH 05/28] fix an error in a comment --- cpp/include/raft/error.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 9d32943fb5..5c083b9f6e 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -30,7 +30,7 @@ namespace raft { * @brief Exception thrown when logical precondition is violated. * * This exception should not be thrown directly and is instead thrown by the - * RAFT_EXPECTS macro. + * RAFT_EXPECTS, RAFT_FAIL, CUML_EXPECTS, CUML_FAIL, CUGRAPH_EXPECTS, CUGRAPH_FAIL macros. * */ struct logic_error : public std::logic_error { From 4f7225773633e83f690b0ba1def7e324e4e54a92 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 3 Jun 2020 13:07:28 -0400 Subject: [PATCH 06/28] add CUSPARSE_TRY --- cpp/include/raft/error.hpp | 82 +++++++++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 14 deletions(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 5c083b9f6e..fd602d57fd 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -46,6 +47,14 @@ struct cuda_error : public std::runtime_error { explicit cuda_error(std::string const& message) : std::runtime_error(message) {} }; +/** + * @brief Exception thrown when a cuSparse error is encountered. + */ +struct cusparse_error : public std::runtime_error { + explicit cusparse_error(char const* const message) : std::runtime_error(message) {} + explicit cusparse_error(std::string const& message) : std::runtime_error(message) {} +}; + /** * @brief Exception thrown when a NCCL error is encountered. */ @@ -67,7 +76,7 @@ struct nccl_error : public std::runtime_error { * @param[in] reason String literal description of the reason that cond is * expected to be true * @throw raft::logic_error if the condition evaluates to false. - **/ + */ #define RAFT_EXPECTS(cond, reason) \ (!!(cond)) ? static_cast(0) \ : throw raft::logic_error("RAFT failure at: " __FILE__ \ @@ -79,7 +88,7 @@ struct nccl_error : public std::runtime_error { * In host code, throws a `raft::logic_error`. * * @param[in] reason String literal description of the reason - **/ + */ #define RAFT_FAIL(reason) \ throw raft::logic_error("RAFT failure at: " __FILE__ ":" RAFT_STRINGIFY(__LINE__) ": " reason) @@ -91,7 +100,7 @@ struct nccl_error : public std::runtime_error { * @param[in] reason String literal description of the reason that cond is * expected to be true * @throw raft::logic_error if the condition evaluates to false. - **/ + */ #define CUML_EXPECTS(cond, reason) \ (!!(cond)) ? static_cast(0) \ : throw raft::logic_error("cuML failure at: " __FILE__ \ @@ -103,7 +112,7 @@ struct nccl_error : public std::runtime_error { * In host code, throws a `raft::logic_error`. * * @param[in] reason String literal description of the reason - **/ + */ #define CUML_FAIL(reason) \ throw raft::logic_error("cuML failure at: " __FILE__ ":" RAFT_STRINGIFY(__LINE__) ": " reason) @@ -115,7 +124,7 @@ struct nccl_error : public std::runtime_error { * @param[in] reason String literal description of the reason that cond is * expected to be true * @throw raft::logic_error if the condition evaluates to false. - **/ + */ #define CUGRAPH_EXPECTS(cond, reason) \ (!!(cond)) ? static_cast(0) \ : throw raft::logic_error("cuGRAPH failure at: " __FILE__ \ @@ -127,7 +136,7 @@ struct nccl_error : public std::runtime_error { * In host code, throws a `raft::logic_error`. * * @param[in] reason String literal description of the reason - **/ + */ #define CUGRAPH_FAIL(reason) \ throw raft::logic_error("cuGRAPH failure at: " __FILE__ ":" RAFT_STRINGIFY(__LINE__) ": " reason) @@ -148,6 +157,37 @@ inline void throw_nccl_error(ncclResult_t error, const char* file, unsigned int ncclGetErrorString(error)}); } +// FIXME: unnecessary once CUDA 10.1+ becomes the minimum supported version +#define _CUSPARSE_ERR_TO_STR(err) \ + case err: \ + return #err; +inline const char* cusparse_error_to_string(cusparseStatus_t err) { +#if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 + return cusparseGetErrorString(status); +#else // CUDART_VERSION + switch (err) { + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR); + _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED); + default: + return "CUSPARSE_STATUS_UNKNOWN"; + }; +#endif // CUDART_VERSION +} +#undef _CUSPARSE_ERR_TO_STR + +inline void throw_cusparse_error(cusparseStatus_t error, const char* file, unsigned int line) { + throw raft::cusparse_error( + std::string{"cuSparse error encountered at: " + std::string{file} + ":" + + std::to_string(line) + ": " + std::to_string(error) + " " + + cusparse_error_to_string(error)}); +} + } // namespace detail } // namespace raft @@ -158,7 +198,7 @@ inline void throw_nccl_error(ncclResult_t error, const char* file, unsigned int * cudaSuccess, invokes cudaGetLastError() to clear the error and throws an * exception detailing the CUDA error that occurred * - **/ + */ #define CUDA_TRY(call) \ do { \ cudaError_t const status = (call); \ @@ -181,23 +221,37 @@ inline void throw_nccl_error(ncclResult_t error, const char* file, unsigned int * be used after any asynchronous CUDA call, e.g., cudaMemcpyAsync, or an * asynchronous kernel launch. * - **/ + */ #ifndef NDEBUG #define CHECK_CUDA(stream) CUDA_TRY(cudaStreamSynchronize(stream)); #else #define CHECK_CUDA(stream) CUDA_TRY(cudaPeekAtLastError()); #endif +/** + * @brief Error checking macro for cuSparse runtime API functions. + * + * Invokes a cuSparse runtime API function call, if the call does not return + * CUSPARSE_STATUS_SUCCESS, throws an exception detailing the cuSparse error that occurred + */ +#define CUSPARSE_TRY(call) \ + do { \ + cusparseStatus_t const status = (call); \ + if (CUSPARSE_STATUS_SUCCESS != status) { \ + raft::detail::throw_cusparse_error(status, __FILE__, __LINE__); \ + } \ + } while (0); + /** * @brief Error checking macro for NCCL runtime API functions. * * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an * exception detailing the NCCL error that occurred */ -#define NCCL_TRY(call) \ - do { \ - ncclResult_t const status = (call); \ - if (ncclSuccess != status) { \ - raft::detail::throw_nccl_error(status, __FILE__, __LINE__);\ - } \ +#define NCCL_TRY(call) \ + do { \ + ncclResult_t const status = (call); \ + if (ncclSuccess != status) { \ + raft::detail::throw_nccl_error(status, __FILE__, __LINE__); \ + } \ } while (0); From a428c6ec9b690943628b03f051110e702ca32a96 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 3 Jun 2020 13:34:23 -0400 Subject: [PATCH 07/28] add CURAND_TRY --- cpp/include/raft/error.hpp | 54 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index fd602d57fd..7861157507 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -47,6 +48,14 @@ struct cuda_error : public std::runtime_error { explicit cuda_error(std::string const& message) : std::runtime_error(message) {} }; +/** + * @brief Exception thrown when a cuRAND error is encountered. + */ +struct curand_error : public std::runtime_error { + explicit curand_error(char const* const message) : std::runtime_error(message) {} + explicit curand_error(std::string const& message) : std::runtime_error(message) {} +}; + /** * @brief Exception thrown when a cuSparse error is encountered. */ @@ -157,6 +166,37 @@ inline void throw_nccl_error(ncclResult_t error, const char* file, unsigned int ncclGetErrorString(error)}); } +#define _CURAND_ERR_TO_STR(err) \ + case err: \ + return #err; +inline const char* curand_error_to_string(curandStatus_t err) { + switch(err) { + _CURAND_ERR_TO_STR(CURAND_STATUS_SUCCESS); + _CURAND_ERR_TO_STR(CURAND_STATUS_VERSION_MISMATCH); + _CURAND_ERR_TO_STR(CURAND_STATUS_NOT_INITIALIZED); + _CURAND_ERR_TO_STR(CURAND_STATUS_ALLOCATION_FAILED); + _CURAND_ERR_TO_STR(CURAND_STATUS_TYPE_ERROR); + _CURAND_ERR_TO_STR(CURAND_STATUS_OUT_OF_RANGE); + _CURAND_ERR_TO_STR(CURAND_STATUS_LENGTH_NOT_MULTIPLE); + _CURAND_ERR_TO_STR(CURAND_STATUS_DOUBLE_PRECISION_REQUIRED); + _CURAND_ERR_TO_STR(CURAND_STATUS_LAUNCH_FAILURE); + _CURAND_ERR_TO_STR(CURAND_STATUS_PREEXISTING_FAILURE); + _CURAND_ERR_TO_STR(CURAND_STATUS_INITIALIZATION_FAILED); + _CURAND_ERR_TO_STR(CURAND_STATUS_ARCH_MISMATCH); + _CURAND_ERR_TO_STR(CURAND_STATUS_INTERNAL_ERROR); + default: + return "CURAND_STATUS_UNKNOWN"; + }; +} +#undef _CURAND_ERR_TO_STR + +inline void throw_curand_error(curandStatus_t error, const char* file, unsigned int line) { + throw raft::curand_error( + std::string{"cuRAND error encountered at: " + std::string{file} + ":" + + std::to_string(line) + ": " + std::to_string(error) + " " + + curand_error_to_string(error)}); +} + // FIXME: unnecessary once CUDA 10.1+ becomes the minimum supported version #define _CUSPARSE_ERR_TO_STR(err) \ case err: \ @@ -228,6 +268,20 @@ inline void throw_cusparse_error(cusparseStatus_t error, const char* file, unsig #define CHECK_CUDA(stream) CUDA_TRY(cudaPeekAtLastError()); #endif +/** + * @brief Error checking macro for cuRAND runtime API functions. + * + * Invokes a cuRAND runtime API function call, if the call does not return + * CURAND_STATUS_SUCCESS, throws an exception detailing the cuRAND error that occurred + */ +#define CURAND_TRY(call) \ + do { \ + curandStatus_t const status = (call); \ + if (CURAND_STATUS_SUCCESS != status) { \ + raft::detail::throw_curand_error(status, __FILE__, __LINE__); \ + } \ + } while (0); + /** * @brief Error checking macro for cuSparse runtime API functions. * From b9cee2b75617868cd34761add08b1feeec83ed4a Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 3 Jun 2020 13:36:21 -0400 Subject: [PATCH 08/28] address clang-tidy warnings --- cpp/include/raft/error.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 7861157507..9430b6efc8 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -169,7 +169,7 @@ inline void throw_nccl_error(ncclResult_t error, const char* file, unsigned int #define _CURAND_ERR_TO_STR(err) \ case err: \ return #err; -inline const char* curand_error_to_string(curandStatus_t err) { +inline auto curand_error_to_string(curandStatus_t err) -> const char* { switch(err) { _CURAND_ERR_TO_STR(CURAND_STATUS_SUCCESS); _CURAND_ERR_TO_STR(CURAND_STATUS_VERSION_MISMATCH); @@ -201,7 +201,7 @@ inline void throw_curand_error(curandStatus_t error, const char* file, unsigned #define _CUSPARSE_ERR_TO_STR(err) \ case err: \ return #err; -inline const char* cusparse_error_to_string(cusparseStatus_t err) { +inline auto cusparse_error_to_string(cusparseStatus_t err) -> const char* { #if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 return cusparseGetErrorString(status); #else // CUDART_VERSION From b373267e304576583a0fcc6fe90cfe4304b55afa Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 3 Jun 2020 13:38:16 -0400 Subject: [PATCH 09/28] update change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9490099450..190fdc4e9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # RAFT 0.15.0 (Date TBD) ## New Features +- PR #15: add exception based error handling macros ## Improvements From e471f1d8f72375f36629010cebbc0fda821a7f6d Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 3 Jun 2020 13:48:13 -0400 Subject: [PATCH 10/28] clang-format fixes --- cpp/include/raft/error.hpp | 96 ++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 39 deletions(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 9430b6efc8..6952e31f0a 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -25,7 +25,6 @@ #include #include - namespace raft { /** @@ -37,39 +36,48 @@ namespace raft { */ struct logic_error : public std::logic_error { explicit logic_error(char const* const message) : std::logic_error(message) {} - explicit logic_error(std::string const& message) : std::logic_error(message) {} + explicit logic_error(std::string const& message) + : std::logic_error(message) {} }; /** * @brief Exception thrown when a CUDA error is encountered. */ struct cuda_error : public std::runtime_error { - explicit cuda_error(char const* const message) : std::runtime_error(message) {} - explicit cuda_error(std::string const& message) : std::runtime_error(message) {} + explicit cuda_error(char const* const message) + : std::runtime_error(message) {} + explicit cuda_error(std::string const& message) + : std::runtime_error(message) {} }; /** * @brief Exception thrown when a cuRAND error is encountered. */ struct curand_error : public std::runtime_error { - explicit curand_error(char const* const message) : std::runtime_error(message) {} - explicit curand_error(std::string const& message) : std::runtime_error(message) {} + explicit curand_error(char const* const message) + : std::runtime_error(message) {} + explicit curand_error(std::string const& message) + : std::runtime_error(message) {} }; /** * @brief Exception thrown when a cuSparse error is encountered. */ struct cusparse_error : public std::runtime_error { - explicit cusparse_error(char const* const message) : std::runtime_error(message) {} - explicit cusparse_error(std::string const& message) : std::runtime_error(message) {} + explicit cusparse_error(char const* const message) + : std::runtime_error(message) {} + explicit cusparse_error(std::string const& message) + : std::runtime_error(message) {} }; /** * @brief Exception thrown when a NCCL error is encountered. */ struct nccl_error : public std::runtime_error { - explicit nccl_error(char const* const message) : std::runtime_error(message) {} - explicit nccl_error(std::string const& message) : std::runtime_error(message) {} + explicit nccl_error(char const* const message) + : std::runtime_error(message) {} + explicit nccl_error(std::string const& message) + : std::runtime_error(message) {} }; } // namespace raft @@ -86,10 +94,11 @@ struct nccl_error : public std::runtime_error { * expected to be true * @throw raft::logic_error if the condition evaluates to false. */ -#define RAFT_EXPECTS(cond, reason) \ - (!!(cond)) ? static_cast(0) \ - : throw raft::logic_error("RAFT failure at: " __FILE__ \ - ":" RAFT_STRINGIFY(__LINE__) ": " reason) +#define RAFT_EXPECTS(cond, reason) \ + (!!(cond)) \ + ? static_cast(0) \ + : throw raft::logic_error("RAFT failure at: " __FILE__ \ + ":" RAFT_STRINGIFY(__LINE__) ": " reason) /** * @brief Indicates that an erroneous code path has been taken. @@ -98,8 +107,9 @@ struct nccl_error : public std::runtime_error { * * @param[in] reason String literal description of the reason */ -#define RAFT_FAIL(reason) \ - throw raft::logic_error("RAFT failure at: " __FILE__ ":" RAFT_STRINGIFY(__LINE__) ": " reason) +#define RAFT_FAIL(reason) \ + throw raft::logic_error("RAFT failure at: " __FILE__ \ + ":" RAFT_STRINGIFY(__LINE__) ": " reason) /** * @brief Macro for checking (pre-)conditions that throws an exception when @@ -110,10 +120,11 @@ struct nccl_error : public std::runtime_error { * expected to be true * @throw raft::logic_error if the condition evaluates to false. */ -#define CUML_EXPECTS(cond, reason) \ - (!!(cond)) ? static_cast(0) \ - : throw raft::logic_error("cuML failure at: " __FILE__ \ - ":" RAFT_STRINGIFY(__LINE__) ": " reason) +#define CUML_EXPECTS(cond, reason) \ + (!!(cond)) \ + ? static_cast(0) \ + : throw raft::logic_error("cuML failure at: " __FILE__ \ + ":" RAFT_STRINGIFY(__LINE__) ": " reason) /** * @brief Indicates that an erroneous code path has been taken. @@ -122,8 +133,9 @@ struct nccl_error : public std::runtime_error { * * @param[in] reason String literal description of the reason */ -#define CUML_FAIL(reason) \ - throw raft::logic_error("cuML failure at: " __FILE__ ":" RAFT_STRINGIFY(__LINE__) ": " reason) +#define CUML_FAIL(reason) \ + throw raft::logic_error("cuML failure at: " __FILE__ \ + ":" RAFT_STRINGIFY(__LINE__) ": " reason) /** * @brief Macro for checking (pre-)conditions that throws an exception when @@ -134,10 +146,11 @@ struct nccl_error : public std::runtime_error { * expected to be true * @throw raft::logic_error if the condition evaluates to false. */ -#define CUGRAPH_EXPECTS(cond, reason) \ - (!!(cond)) ? static_cast(0) \ - : throw raft::logic_error("cuGRAPH failure at: " __FILE__ \ - ":" RAFT_STRINGIFY(__LINE__) ": " reason) +#define CUGRAPH_EXPECTS(cond, reason) \ + (!!(cond)) \ + ? static_cast(0) \ + : throw raft::logic_error("cuGRAPH failure at: " __FILE__ \ + ":" RAFT_STRINGIFY(__LINE__) ": " reason) /** * @brief Indicates that an erroneous code path has been taken. @@ -146,20 +159,23 @@ struct nccl_error : public std::runtime_error { * * @param[in] reason String literal description of the reason */ -#define CUGRAPH_FAIL(reason) \ - throw raft::logic_error("cuGRAPH failure at: " __FILE__ ":" RAFT_STRINGIFY(__LINE__) ": " reason) +#define CUGRAPH_FAIL(reason) \ + throw raft::logic_error("cuGRAPH failure at: " __FILE__ \ + ":" RAFT_STRINGIFY(__LINE__) ": " reason) namespace raft { namespace detail { -inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int line) -{ - throw raft::cuda_error(std::string{"CUDA error encountered at: " + std::string{file} + ":" + - std::to_string(line) + ": " + std::to_string(error) + " " + - cudaGetErrorName(error) + " " + cudaGetErrorString(error)}); +inline void throw_cuda_error(cudaError_t error, const char* file, + unsigned int line) { + throw raft::cuda_error( + std::string{"CUDA error encountered at: " + std::string{file} + ":" + + std::to_string(line) + ": " + std::to_string(error) + " " + + cudaGetErrorName(error) + " " + cudaGetErrorString(error)}); } -inline void throw_nccl_error(ncclResult_t error, const char* file, unsigned int line) { +inline void throw_nccl_error(ncclResult_t error, const char* file, + unsigned int line) { throw raft::nccl_error( std::string{"NCCL error encountered at: " + std::string{file} + ":" + std::to_string(line) + ": " + std::to_string(error) + " " + @@ -167,10 +183,10 @@ inline void throw_nccl_error(ncclResult_t error, const char* file, unsigned int } #define _CURAND_ERR_TO_STR(err) \ - case err: \ + case err: \ return #err; inline auto curand_error_to_string(curandStatus_t err) -> const char* { - switch(err) { + switch (err) { _CURAND_ERR_TO_STR(CURAND_STATUS_SUCCESS); _CURAND_ERR_TO_STR(CURAND_STATUS_VERSION_MISMATCH); _CURAND_ERR_TO_STR(CURAND_STATUS_NOT_INITIALIZED); @@ -190,7 +206,8 @@ inline auto curand_error_to_string(curandStatus_t err) -> const char* { } #undef _CURAND_ERR_TO_STR -inline void throw_curand_error(curandStatus_t error, const char* file, unsigned int line) { +inline void throw_curand_error(curandStatus_t error, const char* file, + unsigned int line) { throw raft::curand_error( std::string{"cuRAND error encountered at: " + std::string{file} + ":" + std::to_string(line) + ": " + std::to_string(error) + " " + @@ -204,7 +221,7 @@ inline void throw_curand_error(curandStatus_t error, const char* file, unsigned inline auto cusparse_error_to_string(cusparseStatus_t err) -> const char* { #if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 return cusparseGetErrorString(status); -#else // CUDART_VERSION +#else // CUDART_VERSION switch (err) { _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); @@ -221,7 +238,8 @@ inline auto cusparse_error_to_string(cusparseStatus_t err) -> const char* { } #undef _CUSPARSE_ERR_TO_STR -inline void throw_cusparse_error(cusparseStatus_t error, const char* file, unsigned int line) { +inline void throw_cusparse_error(cusparseStatus_t error, const char* file, + unsigned int line) { throw raft::cusparse_error( std::string{"cuSparse error encountered at: " + std::string{file} + ":" + std::to_string(line) + ": " + std::to_string(error) + " " + From 035dc0046734db15d129cf6c7dabad8c0f9769b1 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 3 Jun 2020 14:02:14 -0400 Subject: [PATCH 11/28] another try to make clang-format happy --- cpp/include/raft/error.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 6952e31f0a..9424d28001 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -221,7 +221,7 @@ inline void throw_curand_error(curandStatus_t error, const char* file, inline auto cusparse_error_to_string(cusparseStatus_t err) -> const char* { #if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 return cusparseGetErrorString(status); -#else // CUDART_VERSION +#else // CUDART_VERSION switch (err) { _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); From 2566b247d30ed829e2f1bb1b33d70edc23c9b08a Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 10 Jun 2020 00:13:29 -0400 Subject: [PATCH 12/28] move common error handling utilities from cuda_utils.h to error.hpp --- cpp/include/raft/cudart_utils.h | 79 ++------------------------------- cpp/include/raft/error.hpp | 72 ++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 75 deletions(-) diff --git a/cpp/include/raft/cudart_utils.h b/cpp/include/raft/cudart_utils.h index 47e76ab916..5ae4bcbac2 100644 --- a/cpp/include/raft/cudart_utils.h +++ b/cpp/include/raft/cudart_utils.h @@ -16,89 +16,18 @@ #pragma once +#include "raft/error.hpp" + #include + #include -#include #include -#include -#include -#include -#include -#include + ///@todo: enable once logging has been enabled in raft //#include "logger.hpp" namespace raft { -/** base exception class for the whole of raft */ -class exception : public std::exception { - public: - /** default ctor */ - explicit exception() noexcept : std::exception(), msg_() {} - - /** copy ctor */ - exception(const exception& src) noexcept - : std::exception(), msg_(src.what()) { - collect_call_stack(); - } - - /** ctor from an input message */ - explicit exception(const std::string _msg) noexcept - : std::exception(), msg_(std::move(_msg)) { - collect_call_stack(); - } - - /** get the message associated with this exception */ - const char* what() const noexcept override { return msg_.c_str(); } - - private: - /** message associated with this exception */ - std::string msg_; - - /** append call stack info to this exception's message for ease of debug */ - // Courtesy: https://www.gnu.org/software/libc/manual/html_node/Backtraces.html - void collect_call_stack() noexcept { -#ifdef __GNUC__ - constexpr int kMaxStackDepth = 64; - void* stack[kMaxStackDepth]; // NOLINT - auto depth = backtrace(stack, kMaxStackDepth); - std::ostringstream oss; - oss << std::endl << "Obtained " << depth << " stack frames" << std::endl; - char** strings = backtrace_symbols(stack, depth); - if (strings == nullptr) { - oss << "But no stack trace could be found!" << std::endl; - msg_ += oss.str(); - return; - } - ///@todo: support for demangling of C++ symbol names - for (int i = 0; i < depth; ++i) { - oss << "#" << i << " in " << strings[i] << std::endl; - } - free(strings); - msg_ += oss.str(); -#endif // __GNUC__ - } -}; - -/** macro to throw a runtime error */ -#define THROW(fmt, ...) \ - do { \ - std::string msg; \ - char errMsg[2048]; /* NOLINT */ \ - std::snprintf(errMsg, sizeof(errMsg), \ - "exception occured! file=%s line=%d: ", __FILE__, __LINE__); \ - msg += errMsg; \ - std::snprintf(errMsg, sizeof(errMsg), fmt, ##__VA_ARGS__); \ - msg += errMsg; \ - throw raft::exception(msg); \ - } while (0) - -/** macro to check for a conditional and assert on failure */ -#define ASSERT(check, fmt, ...) \ - do { \ - if (!(check)) THROW(fmt, ##__VA_ARGS__); \ - } while (0) - /** check for cuda runtime API errors and assert accordingly */ #define CUDA_CHECK(call) \ do { \ diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 9424d28001..42fdbc9897 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -22,11 +22,83 @@ #include #include +#include +#include +#include #include #include namespace raft { +/** base exception class for the whole of raft */ +class exception : public std::exception { + public: + /** default ctor */ + explicit exception() noexcept : std::exception(), msg_() {} + + /** copy ctor */ + exception(const exception& src) noexcept + : std::exception(), msg_(src.what()) { + collect_call_stack(); + } + + /** ctor from an input message */ + explicit exception(const std::string _msg) noexcept + : std::exception(), msg_(std::move(_msg)) { + collect_call_stack(); + } + + /** get the message associated with this exception */ + const char* what() const noexcept override { return msg_.c_str(); } + + private: + /** message associated with this exception */ + std::string msg_; + + /** append call stack info to this exception's message for ease of debug */ + // Courtesy: https://www.gnu.org/software/libc/manual/html_node/Backtraces.html + void collect_call_stack() noexcept { +#ifdef __GNUC__ + constexpr int kMaxStackDepth = 64; + void* stack[kMaxStackDepth]; // NOLINT + auto depth = backtrace(stack, kMaxStackDepth); + std::ostringstream oss; + oss << std::endl << "Obtained " << depth << " stack frames" << std::endl; + char** strings = backtrace_symbols(stack, depth); + if (strings == nullptr) { + oss << "But no stack trace could be found!" << std::endl; + msg_ += oss.str(); + return; + } + ///@todo: support for demangling of C++ symbol names + for (int i = 0; i < depth; ++i) { + oss << "#" << i << " in " << strings[i] << std::endl; + } + free(strings); + msg_ += oss.str(); +#endif // __GNUC__ + } +}; + +/** macro to throw a runtime error */ +#define THROW(fmt, ...) \ + do { \ + std::string msg; \ + char errMsg[2048]; /* NOLINT */ \ + std::snprintf(errMsg, sizeof(errMsg), \ + "exception occured! file=%s line=%d: ", __FILE__, __LINE__); \ + msg += errMsg; \ + std::snprintf(errMsg, sizeof(errMsg), fmt, ##__VA_ARGS__); \ + msg += errMsg; \ + throw raft::exception(msg); \ + } while (0) + +/** macro to check for a conditional and assert on failure */ +#define ASSERT(check, fmt, ...) \ + do { \ + if (!(check)) THROW(fmt, ##__VA_ARGS__); \ + } while (0) + /** * @brief Exception thrown when logical precondition is violated. * From 36561255fbcc3270e511cddcb6bc35b60021165d Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 10 Jun 2020 16:45:35 -0400 Subject: [PATCH 13/28] update raft error classes to inherit raft::exception (instead of std::exception) --- cpp/include/raft/error.hpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 42fdbc9897..89c836f2c8 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -106,50 +106,50 @@ class exception : public std::exception { * RAFT_EXPECTS, RAFT_FAIL, CUML_EXPECTS, CUML_FAIL, CUGRAPH_EXPECTS, CUGRAPH_FAIL macros. * */ -struct logic_error : public std::logic_error { - explicit logic_error(char const* const message) : std::logic_error(message) {} +struct logic_error : public raft::exception { + explicit logic_error(char const* const message) : raft::exception(message) {} explicit logic_error(std::string const& message) - : std::logic_error(message) {} + : raft::exception(message) {} }; /** * @brief Exception thrown when a CUDA error is encountered. */ -struct cuda_error : public std::runtime_error { +struct cuda_error : public raft::exception { explicit cuda_error(char const* const message) - : std::runtime_error(message) {} + : raft::exception(message) {} explicit cuda_error(std::string const& message) - : std::runtime_error(message) {} + : raft::exception(message) {} }; /** * @brief Exception thrown when a cuRAND error is encountered. */ -struct curand_error : public std::runtime_error { +struct curand_error : public raft::exception { explicit curand_error(char const* const message) - : std::runtime_error(message) {} + : raft::exception(message) {} explicit curand_error(std::string const& message) - : std::runtime_error(message) {} + : raft::exception(message) {} }; /** * @brief Exception thrown when a cuSparse error is encountered. */ -struct cusparse_error : public std::runtime_error { +struct cusparse_error : public raft::exception { explicit cusparse_error(char const* const message) - : std::runtime_error(message) {} + : raft::exception(message) {} explicit cusparse_error(std::string const& message) - : std::runtime_error(message) {} + : raft::exception(message) {} }; /** * @brief Exception thrown when a NCCL error is encountered. */ -struct nccl_error : public std::runtime_error { +struct nccl_error : public raft::exception { explicit nccl_error(char const* const message) - : std::runtime_error(message) {} + : raft::exception(message) {} explicit nccl_error(std::string const& message) - : std::runtime_error(message) {} + : raft::exception(message) {} }; } // namespace raft From 0e62ceab09105258897ce2d5562fe2b7d992d9bb Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 10 Jun 2020 16:55:14 -0400 Subject: [PATCH 14/28] move macros out from the raft namespace --- cpp/include/raft/cudart_utils.h | 4 +-- cpp/include/raft/error.hpp | 38 ++++++++++----------- cpp/include/raft/linalg/cublas_wrappers.h | 6 ++-- cpp/include/raft/linalg/cusolver_wrappers.h | 6 ++-- cpp/include/raft/sparse/cusparse_wrappers.h | 6 ++-- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/cpp/include/raft/cudart_utils.h b/cpp/include/raft/cudart_utils.h index 5ae4bcbac2..2eea710897 100644 --- a/cpp/include/raft/cudart_utils.h +++ b/cpp/include/raft/cudart_utils.h @@ -26,8 +26,6 @@ ///@todo: enable once logging has been enabled in raft //#include "logger.hpp" -namespace raft { - /** check for cuda runtime API errors and assert accordingly */ #define CUDA_CHECK(call) \ do { \ @@ -50,6 +48,8 @@ namespace raft { } \ } while (0) +namespace raft { + /** helper method to get max usable shared mem per block parameter */ inline int get_shared_memory_per_block() { int dev_id; diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 89c836f2c8..2a10854918 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -80,25 +80,6 @@ class exception : public std::exception { } }; -/** macro to throw a runtime error */ -#define THROW(fmt, ...) \ - do { \ - std::string msg; \ - char errMsg[2048]; /* NOLINT */ \ - std::snprintf(errMsg, sizeof(errMsg), \ - "exception occured! file=%s line=%d: ", __FILE__, __LINE__); \ - msg += errMsg; \ - std::snprintf(errMsg, sizeof(errMsg), fmt, ##__VA_ARGS__); \ - msg += errMsg; \ - throw raft::exception(msg); \ - } while (0) - -/** macro to check for a conditional and assert on failure */ -#define ASSERT(check, fmt, ...) \ - do { \ - if (!(check)) THROW(fmt, ##__VA_ARGS__); \ - } while (0) - /** * @brief Exception thrown when logical precondition is violated. * @@ -154,6 +135,25 @@ struct nccl_error : public raft::exception { } // namespace raft +/** macro to throw a runtime error */ +#define THROW(fmt, ...) \ + do { \ + std::string msg; \ + char errMsg[2048]; /* NOLINT */ \ + std::snprintf(errMsg, sizeof(errMsg), \ + "exception occured! file=%s line=%d: ", __FILE__, __LINE__); \ + msg += errMsg; \ + std::snprintf(errMsg, sizeof(errMsg), fmt, ##__VA_ARGS__); \ + msg += errMsg; \ + throw raft::exception(msg); \ + } while (0) + +/** macro to check for a conditional and assert on failure */ +#define ASSERT(check, fmt, ...) \ + do { \ + if (!(check)) THROW(fmt, ##__VA_ARGS__); \ + } while (0) + #define STRINGIFY_DETAIL(x) #x #define RAFT_STRINGIFY(x) STRINGIFY_DETAIL(x) diff --git a/cpp/include/raft/linalg/cublas_wrappers.h b/cpp/include/raft/linalg/cublas_wrappers.h index cd8a508a84..170221a844 100644 --- a/cpp/include/raft/linalg/cublas_wrappers.h +++ b/cpp/include/raft/linalg/cublas_wrappers.h @@ -22,9 +22,6 @@ #include #include -namespace raft { -namespace linalg { - #define _CUBLAS_ERR_TO_STR(err) \ case err: \ return #err @@ -66,6 +63,9 @@ inline const char *cublas_error_to_string(cublasStatus_t err) { // } \ // } while (0) +namespace raft { +namespace linalg { + /** * @defgroup Axpy cublas ax+y operations * @{ diff --git a/cpp/include/raft/linalg/cusolver_wrappers.h b/cpp/include/raft/linalg/cusolver_wrappers.h index 92ba1a2194..e5705ada5d 100644 --- a/cpp/include/raft/linalg/cusolver_wrappers.h +++ b/cpp/include/raft/linalg/cusolver_wrappers.h @@ -22,9 +22,6 @@ //#include #include -namespace raft { -namespace linalg { - #define _CUSOLVER_ERR_TO_STR(err) \ case err: \ return #err; @@ -66,6 +63,9 @@ inline const char *cusolver_error_to_string(cusolverStatus_t err) { // } \ // } while (0) +namespace raft { +namespace linalg { + /** * @defgroup Getrf cusolver getrf operations * @{ diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h index 1c63d2348b..ccb6622d5b 100644 --- a/cpp/include/raft/sparse/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/cusparse_wrappers.h @@ -21,9 +21,6 @@ //#include #include -namespace raft { -namespace sparse { - #define _CUSPARSE_ERR_TO_STR(err) \ case err: \ return #err; @@ -67,6 +64,9 @@ inline const char* cusparse_error_to_string(cusparseStatus_t err) { // } \ // } while (0) +namespace raft { +namespace sparse { + /** * @defgroup gthr cusparse gather methods * @{ From 55922cb277d244a6bd8e14fd1391bb48591ff51f Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 11 Jun 2020 00:31:30 -0400 Subject: [PATCH 15/28] remove CUML(GRAPH)_EXPECTS(FAIL) --- cpp/include/raft/error.hpp | 54 +------------------------------------- 1 file changed, 1 insertion(+), 53 deletions(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 2a10854918..38705e17d8 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -84,7 +84,7 @@ class exception : public std::exception { * @brief Exception thrown when logical precondition is violated. * * This exception should not be thrown directly and is instead thrown by the - * RAFT_EXPECTS, RAFT_FAIL, CUML_EXPECTS, CUML_FAIL, CUGRAPH_EXPECTS, CUGRAPH_FAIL macros. + * RAFT_EXPECTS and RAFT_FAIL macros. * */ struct logic_error : public raft::exception { @@ -183,58 +183,6 @@ struct nccl_error : public raft::exception { throw raft::logic_error("RAFT failure at: " __FILE__ \ ":" RAFT_STRINGIFY(__LINE__) ": " reason) -/** - * @brief Macro for checking (pre-)conditions that throws an exception when - * a condition is violated. - * - * @param[in] cond Expression that evaluates to true or false - * @param[in] reason String literal description of the reason that cond is - * expected to be true - * @throw raft::logic_error if the condition evaluates to false. - */ -#define CUML_EXPECTS(cond, reason) \ - (!!(cond)) \ - ? static_cast(0) \ - : throw raft::logic_error("cuML failure at: " __FILE__ \ - ":" RAFT_STRINGIFY(__LINE__) ": " reason) - -/** - * @brief Indicates that an erroneous code path has been taken. - * - * In host code, throws a `raft::logic_error`. - * - * @param[in] reason String literal description of the reason - */ -#define CUML_FAIL(reason) \ - throw raft::logic_error("cuML failure at: " __FILE__ \ - ":" RAFT_STRINGIFY(__LINE__) ": " reason) - -/** - * @brief Macro for checking (pre-)conditions that throws an exception when - * a condition is violated. - * - * @param[in] cond Expression that evaluates to true or false - * @param[in] reason String literal description of the reason that cond is - * expected to be true - * @throw raft::logic_error if the condition evaluates to false. - */ -#define CUGRAPH_EXPECTS(cond, reason) \ - (!!(cond)) \ - ? static_cast(0) \ - : throw raft::logic_error("cuGRAPH failure at: " __FILE__ \ - ":" RAFT_STRINGIFY(__LINE__) ": " reason) - -/** - * @brief Indicates that an erroneous code path has been taken. - * - * In host code, throws a `raft::logic_error`. - * - * @param[in] reason String literal description of the reason - */ -#define CUGRAPH_FAIL(reason) \ - throw raft::logic_error("cuGRAPH failure at: " __FILE__ \ - ":" RAFT_STRINGIFY(__LINE__) ": " reason) - namespace raft { namespace detail { From acd5824e19765f1ff9d5933bd8d50b0e0aa99b60 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 11 Jun 2020 00:59:23 -0400 Subject: [PATCH 16/28] update RAFT_EXPECTS and RAFT_FAIL --- cpp/include/raft/error.hpp | 39 ++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 38705e17d8..6eeeacdb92 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -148,40 +148,43 @@ struct nccl_error : public raft::exception { throw raft::exception(msg); \ } while (0) +// FIXME: Need to be replaced with RAFT_EXPECTS /** macro to check for a conditional and assert on failure */ #define ASSERT(check, fmt, ...) \ do { \ if (!(check)) THROW(fmt, ##__VA_ARGS__); \ } while (0) -#define STRINGIFY_DETAIL(x) #x -#define RAFT_STRINGIFY(x) STRINGIFY_DETAIL(x) - /** - * @brief Macro for checking (pre-)conditions that throws an exception when - * a condition is violated. + * @brief Macro for checking (pre-)conditions that throws an exception when a condition is false * * @param[in] cond Expression that evaluates to true or false - * @param[in] reason String literal description of the reason that cond is - * expected to be true + * @param[in] fmt String literal description of the reason that cond is expected to be true with + * optinal format tagas * @throw raft::logic_error if the condition evaluates to false. */ -#define RAFT_EXPECTS(cond, reason) \ - (!!(cond)) \ - ? static_cast(0) \ - : throw raft::logic_error("RAFT failure at: " __FILE__ \ - ":" RAFT_STRINGIFY(__LINE__) ": " reason) +#define RAFT_EXPECTS(cond, fmt, ...) \ + do { \ + if (!cond) { \ + std::string msg{}; \ + char err_msg[2048]; /* NOLINT */ \ + std::snprintf(err_msg, sizeof(err_msg), \ + "RAFT failure at file=%s line=%d: ", __FILE__, __LINE__); \ + msg += err_msg; \ + std::snprintf(err_msg, sizeof(err_msg), fmt, ##__VA_ARGS__); \ + msg += err_msg; \ + throw raft::logic_error(msg); \ + } \ + } while (0) /** * @brief Indicates that an erroneous code path has been taken. * - * In host code, throws a `raft::logic_error`. - * - * @param[in] reason String literal description of the reason + * @param[in] fmt String literal description of the reason that this code path is erroneous with + * optinal format tagas + * @throw always throws raft::logic_error */ -#define RAFT_FAIL(reason) \ - throw raft::logic_error("RAFT failure at: " __FILE__ \ - ":" RAFT_STRINGIFY(__LINE__) ": " reason) +#define RAFT_FAIL(fmt, ...) RAFT_EXPECTS(false, fmt, ##__VA_ARGS__) namespace raft { namespace detail { From 4a48b57294947a1134ab1c3f2c11f04394003f6d Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 11 Jun 2020 01:17:24 -0400 Subject: [PATCH 17/28] compile error fix (namespace) --- cpp/include/raft/linalg/cublas_wrappers.h | 14 ++++++++++++-- cpp/include/raft/linalg/cusolver_wrappers.h | 14 ++++++++++++-- cpp/include/raft/sparse/cusparse_wrappers.h | 14 ++++++++++++-- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/cpp/include/raft/linalg/cublas_wrappers.h b/cpp/include/raft/linalg/cublas_wrappers.h index 170221a844..84b3add031 100644 --- a/cpp/include/raft/linalg/cublas_wrappers.h +++ b/cpp/include/raft/linalg/cublas_wrappers.h @@ -25,6 +25,11 @@ #define _CUBLAS_ERR_TO_STR(err) \ case err: \ return #err + +namespace raft { +namespace linalg { +namespace detail { + inline const char *cublas_error_to_string(cublasStatus_t err) { switch (err) { _CUBLAS_ERR_TO_STR(CUBLAS_STATUS_SUCCESS); @@ -41,6 +46,11 @@ inline const char *cublas_error_to_string(cublasStatus_t err) { return "CUBLAS_STATUS_UNKNOWN"; }; } + +}; // namespace detail +}; // namespace linalg +}; // namespace raft + #undef _CUBLAS_ERR_TO_STR /** check for cublas runtime API errors and assert accordingly */ @@ -49,7 +59,7 @@ inline const char *cublas_error_to_string(cublasStatus_t err) { cublasStatus_t err = call; \ ASSERT(err == CUBLAS_STATUS_SUCCESS, \ "CUBLAS call='%s' got errorcode=%d err=%s", #call, err, \ - raft::linalg::cublas_error_to_string(err)); \ + raft::linalg::detail::cublas_error_to_string(err)); \ } while (0) ///@todo: enable this once we have logging enabled @@ -59,7 +69,7 @@ inline const char *cublas_error_to_string(cublasStatus_t err) { // cublasStatus_t err = call; \ // if (err != CUBLAS_STATUS_SUCCESS) { \ // CUML_LOG_ERROR("CUBLAS call='%s' got errorcode=%d err=%s", #call, err, \ -// raft::linalg::cublas_error_to_string(err)); \ +// raft::linalg::detail::cublas_error_to_string(err)); \ // } \ // } while (0) diff --git a/cpp/include/raft/linalg/cusolver_wrappers.h b/cpp/include/raft/linalg/cusolver_wrappers.h index e5705ada5d..d7df86ac21 100644 --- a/cpp/include/raft/linalg/cusolver_wrappers.h +++ b/cpp/include/raft/linalg/cusolver_wrappers.h @@ -25,6 +25,11 @@ #define _CUSOLVER_ERR_TO_STR(err) \ case err: \ return #err; + +namespace raft { +namespace linalg { +namespace detail { + inline const char *cusolver_error_to_string(cusolverStatus_t err) { switch (err) { _CUSOLVER_ERR_TO_STR(CUSOLVER_STATUS_SUCCESS); @@ -41,6 +46,11 @@ inline const char *cusolver_error_to_string(cusolverStatus_t err) { return "CUSOLVER_STATUS_UNKNOWN"; }; } + +}; // namespace detail +}; // namespace linalg +}; // namespace raft + #undef _CUSOLVER_ERR_TO_STR /** check for cusolver runtime API errors and assert accordingly */ @@ -49,7 +59,7 @@ inline const char *cusolver_error_to_string(cusolverStatus_t err) { cusolverStatus_t err = call; \ ASSERT(err == CUSOLVER_STATUS_SUCCESS, \ "CUSOLVER call='%s' got errorcode=%d err=%s", #call, err, \ - raft::linalg::cusolver_error_to_string(err)); \ + raft::linalg::detail::cusolver_error_to_string(err)); \ } while (0) ///@todo: enable this once logging is enabled @@ -59,7 +69,7 @@ inline const char *cusolver_error_to_string(cusolverStatus_t err) { // cusolverStatus_t err = call; \ // if (err != CUSOLVER_STATUS_SUCCESS) { \ // CUML_LOG_ERROR("CUSOLVER call='%s' got errorcode=%d err=%s", #call, err, \ -// raft::linalg::cusolver_error_to_string(err)); \ +// raft::linalg::detail::cusolver_error_to_string(err)); \ // } \ // } while (0) diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h index ccb6622d5b..a4a8173b88 100644 --- a/cpp/include/raft/sparse/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/cusparse_wrappers.h @@ -24,6 +24,11 @@ #define _CUSPARSE_ERR_TO_STR(err) \ case err: \ return #err; + +namespace raft { +namespace sparse { +namespace detail { + inline const char* cusparse_error_to_string(cusparseStatus_t err) { #if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 return cusparseGetErrorString(status); @@ -42,6 +47,11 @@ inline const char* cusparse_error_to_string(cusparseStatus_t err) { }; #endif // CUDART_VERSION } + +}; // namespace detail +}; // namespace sparse +}; // namespace raft + #undef _CUSPARSE_ERR_TO_STR /** check for cusparse runtime API errors and assert accordingly */ @@ -50,7 +60,7 @@ inline const char* cusparse_error_to_string(cusparseStatus_t err) { cusparseStatus_t err = call; \ ASSERT(err == CUSPARSE_STATUS_SUCCESS, \ "CUSPARSE call='%s' got errorcode=%d err=%s", #call, err, \ - raft::sparse::cusparse_error_to_string(err)); \ + raft::sparse::detail::cusparse_error_to_string(err)); \ } while (0) ///@todo: enable this once logging is enabled @@ -60,7 +70,7 @@ inline const char* cusparse_error_to_string(cusparseStatus_t err) { // cusparseStatus_t err = call; \ // if (err != CUSPARSE_STATUS_SUCCESS) { \ // CUML_LOG_ERROR("CUSPARSE call='%s' got errorcode=%d err=%s", #call, err, \ -// raft::sparse::cusparse_error_to_string(err)); \ +// raft::sparse::detail::cusparse_error_to_string(err)); \ // } \ // } while (0) From 059f1ec28b6cd350009b899c9fde36f9562ee7a0 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 11 Jun 2020 01:29:02 -0400 Subject: [PATCH 18/28] minor fixes to RAFT_EXPECTS(FAIL) --- cpp/include/raft/error.hpp | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 6eeeacdb92..644cc0f855 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -135,6 +135,7 @@ struct nccl_error : public raft::exception { } // namespace raft +// FIXME: Need to be replaced with RAFT_FAIL /** macro to throw a runtime error */ #define THROW(fmt, ...) \ do { \ @@ -155,6 +156,17 @@ struct nccl_error : public raft::exception { if (!(check)) THROW(fmt, ##__VA_ARGS__); \ } while (0) +#define SET_ERROR_MSG(msg, location_prefix, fmt, ...) \ + do { \ + char err_msg[2048]; /* NOLINT */ \ + std::snprintf(err_msg, sizeof(err_msg), "RAFT failure at %s", __FILE__); \ + msg += err_msg; \ + std::snprintf(err_msg, sizeof(err_msg), "file=%s line=%d: ", __FILE__, __LINE__); \ + msg += err_msg; \ + std::snprintf(err_msg, sizeof(err_msg), fmt, ##__VA_ARGS__); \ + msg += err_msg; \ + } while(0) + /** * @brief Macro for checking (pre-)conditions that throws an exception when a condition is false * @@ -163,18 +175,13 @@ struct nccl_error : public raft::exception { * optinal format tagas * @throw raft::logic_error if the condition evaluates to false. */ -#define RAFT_EXPECTS(cond, fmt, ...) \ - do { \ - if (!cond) { \ - std::string msg{}; \ - char err_msg[2048]; /* NOLINT */ \ - std::snprintf(err_msg, sizeof(err_msg), \ - "RAFT failure at file=%s line=%d: ", __FILE__, __LINE__); \ - msg += err_msg; \ - std::snprintf(err_msg, sizeof(err_msg), fmt, ##__VA_ARGS__); \ - msg += err_msg; \ - throw raft::logic_error(msg); \ - } \ +#define RAFT_EXPECTS(cond, fmt, ...) \ + do { \ + if (!cond) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, "RAFT failure at ", fmt, ##__VA_ARGS__); \ + throw raft::logic_error(msg); \ + } \ } while (0) /** @@ -184,7 +191,12 @@ struct nccl_error : public raft::exception { * optinal format tagas * @throw always throws raft::logic_error */ -#define RAFT_FAIL(fmt, ...) RAFT_EXPECTS(false, fmt, ##__VA_ARGS__) +#define RAFT_FAIL(fmt, ...) \ + do { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, "RAFT failure at ", fmt, ##__VA_ARGS__); \ + throw raft::logic_error(msg); \ + } while (0) namespace raft { namespace detail { From 125911c4e68776197ef2ffae647666d538fde349 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 11 Jun 2020 17:00:01 -0400 Subject: [PATCH 19/28] move error check macros from error.hpp to relevant headers --- cpp/include/raft/comms/std_comms.hpp | 41 +++- cpp/include/raft/cudart_utils.h | 62 +++++- cpp/include/raft/error.hpp | 229 +------------------- cpp/include/raft/linalg/cublas_wrappers.h | 63 ++++-- cpp/include/raft/linalg/cusolver_wrappers.h | 61 ++++-- cpp/include/raft/sparse/cusparse_wrappers.h | 64 ++++-- 6 files changed, 236 insertions(+), 284 deletions(-) diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp index 1ba7552f9c..4aa3f20772 100644 --- a/cpp/include/raft/comms/std_comms.hpp +++ b/cpp/include/raft/comms/std_comms.hpp @@ -44,13 +44,42 @@ #include #include +#include -#define NCCL_CHECK(call) \ - do { \ - ncclResult_t status = call; \ - ASSERT(ncclSuccess == status, "ERROR: NCCL call='%s'. Reason:%s\n", #call, \ - ncclGetErrorString(status)); \ - } while (0) +namespace raft { + +/** + * @brief Exception thrown when a NCCL error is encountered. + */ +struct nccl_error : public raft::exception { + explicit nccl_error(char const* const message) + : raft::exception(message) {} + explicit nccl_error(std::string const& message) + : raft::exception(message) {} +}; + +}; // namespace raft + +/** + * @brief Error checking macro for NCCL runtime API functions. + * + * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an + * exception detailing the NCCL error that occurred + */ +#define NCCL_TRY(call) \ + do { \ + ncclResult_t const status = (call); \ + if (ncclSuccess != status) { \ + std::string msg{}; \ + SET_ERROR_MSG( \ + msg, "NCCL error encountered at: ", "call='%s', Reason=%d:%s", \ + #call, status, ncclGetErrorString(status)); \ + throw raft::nccl_error(msg); \ + } \ + } while (0); + +/** FIXME: temporary alias for cuML compatibility */ +#define NCCL_CHECK(call) NCCL_TRY(call) #define NCCL_CHECK_NO_THROW(call) \ do { \ diff --git a/cpp/include/raft/cudart_utils.h b/cpp/include/raft/cudart_utils.h index 2eea710897..93cf87b8cb 100644 --- a/cpp/include/raft/cudart_utils.h +++ b/cpp/include/raft/cudart_utils.h @@ -26,13 +26,63 @@ ///@todo: enable once logging has been enabled in raft //#include "logger.hpp" -/** check for cuda runtime API errors and assert accordingly */ -#define CUDA_CHECK(call) \ +namespace raft { + +/** + * @brief Exception thrown when a CUDA error is encountered. + */ +struct cuda_error : public raft::exception { + explicit cuda_error(char const* const message) + : raft::exception(message) {} + explicit cuda_error(std::string const& message) + : raft::exception(message) {} +}; + +} + +/** + * @brief Error checking macro for CUDA runtime API functions. + * + * Invokes a CUDA runtime API function call, if the call does not return + * cudaSuccess, invokes cudaGetLastError() to clear the error and throws an + * exception detailing the CUDA error that occurred + * + */ +#define CUDA_TRY(call) \ do { \ - cudaError_t status = call; \ - ASSERT(status == cudaSuccess, "FAIL: call='%s'. Reason:%s", #call, \ - cudaGetErrorString(status)); \ - } while (0) + cudaError_t const status = call; \ + if (status != cudaSuccess) { \ + cudaGetLastError(); \ + std::string msg{}; \ + SET_ERROR_MSG( \ + msg, "CUDA error encountered at: ", "call='%s', Reason=%s:%s", \ + #call, cudaGetErrorName(status), cudaGetErrorString(status)); \ + throw raft::cuda_error(msg); \ + } \ + } while(0) + +/** + * @brief Debug macro to check for CUDA errors + * + * In a non-release build, this macro will synchronize the specified stream + * before error checking. In both release and non-release builds, this macro + * checks for any pending CUDA errors from previous calls. If an error is + * reported, an exception is thrown detailing the CUDA error that occurred. + * + * The intent of this macro is to provide a mechanism for synchronous and + * deterministic execution for debugging asynchronous CUDA execution. It should + * be used after any asynchronous CUDA call, e.g., cudaMemcpyAsync, or an + * asynchronous kernel launch. + * + */ +#ifndef NDEBUG +#define CHECK_CUDA(stream) CUDA_TRY(cudaStreamSynchronize(stream)); +#else +#define CHECK_CUDA(stream) CUDA_TRY(cudaPeekAtLastError()); +#endif + +/** FIXME: temporary alias for cuML compatibility */ +#define CUDA_CHECK(call) CUDA_TRY(call) ///@todo: enable this only after we have added logging support in raft // /** diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 644cc0f855..480805f35f 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -89,48 +89,7 @@ class exception : public std::exception { */ struct logic_error : public raft::exception { explicit logic_error(char const* const message) : raft::exception(message) {} - explicit logic_error(std::string const& message) - : raft::exception(message) {} -}; - -/** - * @brief Exception thrown when a CUDA error is encountered. - */ -struct cuda_error : public raft::exception { - explicit cuda_error(char const* const message) - : raft::exception(message) {} - explicit cuda_error(std::string const& message) - : raft::exception(message) {} -}; - -/** - * @brief Exception thrown when a cuRAND error is encountered. - */ -struct curand_error : public raft::exception { - explicit curand_error(char const* const message) - : raft::exception(message) {} - explicit curand_error(std::string const& message) - : raft::exception(message) {} -}; - -/** - * @brief Exception thrown when a cuSparse error is encountered. - */ -struct cusparse_error : public raft::exception { - explicit cusparse_error(char const* const message) - : raft::exception(message) {} - explicit cusparse_error(std::string const& message) - : raft::exception(message) {} -}; - -/** - * @brief Exception thrown when a NCCL error is encountered. - */ -struct nccl_error : public raft::exception { - explicit nccl_error(char const* const message) - : raft::exception(message) {} - explicit nccl_error(std::string const& message) - : raft::exception(message) {} + explicit logic_error(std::string const& message) : raft::exception(message) {} }; } // namespace raft @@ -156,16 +115,17 @@ struct nccl_error : public raft::exception { if (!(check)) THROW(fmt, ##__VA_ARGS__); \ } while (0) -#define SET_ERROR_MSG(msg, location_prefix, fmt, ...) \ - do { \ - char err_msg[2048]; /* NOLINT */ \ - std::snprintf(err_msg, sizeof(err_msg), "RAFT failure at %s", __FILE__); \ - msg += err_msg; \ - std::snprintf(err_msg, sizeof(err_msg), "file=%s line=%d: ", __FILE__, __LINE__); \ - msg += err_msg; \ - std::snprintf(err_msg, sizeof(err_msg), fmt, ##__VA_ARGS__); \ - msg += err_msg; \ - } while(0) +#define SET_ERROR_MSG(msg, location_prefix, fmt, ...) \ + do { \ + char err_msg[2048]; /* NOLINT */ \ + std::snprintf(err_msg, sizeof(err_msg), "RAFT failure at %s", __FILE__); \ + msg += err_msg; \ + std::snprintf(err_msg, sizeof(err_msg), "file=%s line=%d: ", __FILE__, \ + __LINE__); \ + msg += err_msg; \ + std::snprintf(err_msg, sizeof(err_msg), fmt, ##__VA_ARGS__); \ + msg += err_msg; \ + } while (0) /** * @brief Macro for checking (pre-)conditions that throws an exception when a condition is false @@ -197,168 +157,3 @@ struct nccl_error : public raft::exception { SET_ERROR_MSG(msg, "RAFT failure at ", fmt, ##__VA_ARGS__); \ throw raft::logic_error(msg); \ } while (0) - -namespace raft { -namespace detail { - -inline void throw_cuda_error(cudaError_t error, const char* file, - unsigned int line) { - throw raft::cuda_error( - std::string{"CUDA error encountered at: " + std::string{file} + ":" + - std::to_string(line) + ": " + std::to_string(error) + " " + - cudaGetErrorName(error) + " " + cudaGetErrorString(error)}); -} - -inline void throw_nccl_error(ncclResult_t error, const char* file, - unsigned int line) { - throw raft::nccl_error( - std::string{"NCCL error encountered at: " + std::string{file} + ":" + - std::to_string(line) + ": " + std::to_string(error) + " " + - ncclGetErrorString(error)}); -} - -#define _CURAND_ERR_TO_STR(err) \ - case err: \ - return #err; -inline auto curand_error_to_string(curandStatus_t err) -> const char* { - switch (err) { - _CURAND_ERR_TO_STR(CURAND_STATUS_SUCCESS); - _CURAND_ERR_TO_STR(CURAND_STATUS_VERSION_MISMATCH); - _CURAND_ERR_TO_STR(CURAND_STATUS_NOT_INITIALIZED); - _CURAND_ERR_TO_STR(CURAND_STATUS_ALLOCATION_FAILED); - _CURAND_ERR_TO_STR(CURAND_STATUS_TYPE_ERROR); - _CURAND_ERR_TO_STR(CURAND_STATUS_OUT_OF_RANGE); - _CURAND_ERR_TO_STR(CURAND_STATUS_LENGTH_NOT_MULTIPLE); - _CURAND_ERR_TO_STR(CURAND_STATUS_DOUBLE_PRECISION_REQUIRED); - _CURAND_ERR_TO_STR(CURAND_STATUS_LAUNCH_FAILURE); - _CURAND_ERR_TO_STR(CURAND_STATUS_PREEXISTING_FAILURE); - _CURAND_ERR_TO_STR(CURAND_STATUS_INITIALIZATION_FAILED); - _CURAND_ERR_TO_STR(CURAND_STATUS_ARCH_MISMATCH); - _CURAND_ERR_TO_STR(CURAND_STATUS_INTERNAL_ERROR); - default: - return "CURAND_STATUS_UNKNOWN"; - }; -} -#undef _CURAND_ERR_TO_STR - -inline void throw_curand_error(curandStatus_t error, const char* file, - unsigned int line) { - throw raft::curand_error( - std::string{"cuRAND error encountered at: " + std::string{file} + ":" + - std::to_string(line) + ": " + std::to_string(error) + " " + - curand_error_to_string(error)}); -} - -// FIXME: unnecessary once CUDA 10.1+ becomes the minimum supported version -#define _CUSPARSE_ERR_TO_STR(err) \ - case err: \ - return #err; -inline auto cusparse_error_to_string(cusparseStatus_t err) -> const char* { -#if defined(CUDART_VERSION) && CUDART_VERSION >= 10100 - return cusparseGetErrorString(status); -#else // CUDART_VERSION - switch (err) { - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_SUCCESS); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_NOT_INITIALIZED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ALLOC_FAILED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INVALID_VALUE); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_ARCH_MISMATCH); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_EXECUTION_FAILED); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_INTERNAL_ERROR); - _CUSPARSE_ERR_TO_STR(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED); - default: - return "CUSPARSE_STATUS_UNKNOWN"; - }; -#endif // CUDART_VERSION -} -#undef _CUSPARSE_ERR_TO_STR - -inline void throw_cusparse_error(cusparseStatus_t error, const char* file, - unsigned int line) { - throw raft::cusparse_error( - std::string{"cuSparse error encountered at: " + std::string{file} + ":" + - std::to_string(line) + ": " + std::to_string(error) + " " + - cusparse_error_to_string(error)}); -} - -} // namespace detail -} // namespace raft - -/** - * @brief Error checking macro for CUDA runtime API functions. - * - * Invokes a CUDA runtime API function call, if the call does not return - * cudaSuccess, invokes cudaGetLastError() to clear the error and throws an - * exception detailing the CUDA error that occurred - * - */ -#define CUDA_TRY(call) \ - do { \ - cudaError_t const status = (call); \ - if (cudaSuccess != status) { \ - cudaGetLastError(); \ - raft::detail::throw_cuda_error(status, __FILE__, __LINE__); \ - } \ - } while (0); - -/** - * @brief Debug macro to check for CUDA errors - * - * In a non-release build, this macro will synchronize the specified stream - * before error checking. In both release and non-release builds, this macro - * checks for any pending CUDA errors from previous calls. If an error is - * reported, an exception is thrown detailing the CUDA error that occurred. - * - * The intent of this macro is to provide a mechanism for synchronous and - * deterministic execution for debugging asynchronous CUDA execution. It should - * be used after any asynchronous CUDA call, e.g., cudaMemcpyAsync, or an - * asynchronous kernel launch. - * - */ -#ifndef NDEBUG -#define CHECK_CUDA(stream) CUDA_TRY(cudaStreamSynchronize(stream)); -#else -#define CHECK_CUDA(stream) CUDA_TRY(cudaPeekAtLastError()); -#endif - -/** - * @brief Error checking macro for cuRAND runtime API functions. - * - * Invokes a cuRAND runtime API function call, if the call does not return - * CURAND_STATUS_SUCCESS, throws an exception detailing the cuRAND error that occurred - */ -#define CURAND_TRY(call) \ - do { \ - curandStatus_t const status = (call); \ - if (CURAND_STATUS_SUCCESS != status) { \ - raft::detail::throw_curand_error(status, __FILE__, __LINE__); \ - } \ - } while (0); - -/** - * @brief Error checking macro for cuSparse runtime API functions. - * - * Invokes a cuSparse runtime API function call, if the call does not return - * CUSPARSE_STATUS_SUCCESS, throws an exception detailing the cuSparse error that occurred - */ -#define CUSPARSE_TRY(call) \ - do { \ - cusparseStatus_t const status = (call); \ - if (CUSPARSE_STATUS_SUCCESS != status) { \ - raft::detail::throw_cusparse_error(status, __FILE__, __LINE__); \ - } \ - } while (0); - -/** - * @brief Error checking macro for NCCL runtime API functions. - * - * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an - * exception detailing the NCCL error that occurred - */ -#define NCCL_TRY(call) \ - do { \ - ncclResult_t const status = (call); \ - if (ncclSuccess != status) { \ - raft::detail::throw_nccl_error(status, __FILE__, __LINE__); \ - } \ - } while (0); diff --git a/cpp/include/raft/linalg/cublas_wrappers.h b/cpp/include/raft/linalg/cublas_wrappers.h index 84b3add031..c8d51dca57 100644 --- a/cpp/include/raft/linalg/cublas_wrappers.h +++ b/cpp/include/raft/linalg/cublas_wrappers.h @@ -16,10 +16,12 @@ #pragma once +#include + #include ///@todo: enable this once we have logger enabled //#include -#include + #include #define _CUBLAS_ERR_TO_STR(err) \ @@ -27,6 +29,17 @@ return #err namespace raft { + +/** + * @brief Exception thrown when a cuBLAS error is encountered. + */ +struct cublas_error : public raft::exception { + explicit cublas_error(char const* const message) + : raft::exception(message) {} + explicit cublas_error(std::string const& message) + : raft::exception(message) {} +}; + namespace linalg { namespace detail { @@ -53,25 +66,39 @@ inline const char *cublas_error_to_string(cublasStatus_t err) { #undef _CUBLAS_ERR_TO_STR -/** check for cublas runtime API errors and assert accordingly */ -#define CUBLAS_CHECK(call) \ - do { \ - cublasStatus_t err = call; \ - ASSERT(err == CUBLAS_STATUS_SUCCESS, \ - "CUBLAS call='%s' got errorcode=%d err=%s", #call, err, \ - raft::linalg::detail::cublas_error_to_string(err)); \ - } while (0) +/** + * @brief Error checking macro for cuBLAS runtime API functions. + * + * Invokes a cuBLAS runtime API function call, if the call does not return + * CUBLAS_STATUS_SUCCESS, throws an exception detailing the cuBLAS error that occurred + */ +#define CUBLAS_TRY(call) \ + do { \ + cublasStatus_t const status = (call); \ + if (CUBLAS_STATUS_SUCCESS != status) { \ + std::string msg{}; \ + SET_ERROR_MSG( \ + msg, "cuBLAS error encountered at: ", "call='%s', Reason=%d:%s", \ + #call, status, raft::linalg::detail::cublas_error_to_string(status)); \ + throw raft::cublas_error(msg); \ + } \ + } while(0) + +/** FIXME: temporary alias for cuML compatibility */ +#define CUBLAS_CHECK(call) CUBLAS_TRY(call) ///@todo: enable this once we have logging enabled -// /** check for cublas runtime API errors but do not assert */ -// #define CUBLAS_CHECK_NO_THROW(call) \ -// do { \ -// cublasStatus_t err = call; \ -// if (err != CUBLAS_STATUS_SUCCESS) { \ -// CUML_LOG_ERROR("CUBLAS call='%s' got errorcode=%d err=%s", #call, err, \ -// raft::linalg::detail::cublas_error_to_string(err)); \ -// } \ -// } while (0) +#if 0 +/** check for cublas runtime API errors but do not assert */ +define CUBLAS_CHECK_NO_THROW(call) \ + do { \ + cublasStatus_t err = call; \ + if (err != CUBLAS_STATUS_SUCCESS) { \ + CUML_LOG_ERROR("CUBLAS call='%s' got errorcode=%d err=%s", #call, err, \ + raft::linalg::detail::cublas_error_to_string(err)); \ + } \ + } while (0) +#endif namespace raft { namespace linalg { diff --git a/cpp/include/raft/linalg/cusolver_wrappers.h b/cpp/include/raft/linalg/cusolver_wrappers.h index d7df86ac21..da5455444b 100644 --- a/cpp/include/raft/linalg/cusolver_wrappers.h +++ b/cpp/include/raft/linalg/cusolver_wrappers.h @@ -27,6 +27,17 @@ return #err; namespace raft { + +/** + * @brief Exception thrown when a cuSOLVER error is encountered. + */ +struct cusolver_error : public raft::exception { + explicit cusolver_error(char const* const message) + : raft::exception(message) {} + explicit cusolver_error(std::string const& message) + : raft::exception(message) {} +}; + namespace linalg { namespace detail { @@ -53,25 +64,39 @@ inline const char *cusolver_error_to_string(cusolverStatus_t err) { #undef _CUSOLVER_ERR_TO_STR -/** check for cusolver runtime API errors and assert accordingly */ -#define CUSOLVER_CHECK(call) \ - do { \ - cusolverStatus_t err = call; \ - ASSERT(err == CUSOLVER_STATUS_SUCCESS, \ - "CUSOLVER call='%s' got errorcode=%d err=%s", #call, err, \ - raft::linalg::detail::cusolver_error_to_string(err)); \ +/** + * @brief Error checking macro for cuSOLVER runtime API functions. + * + * Invokes a cuSOLVER runtime API function call, if the call does not return + * CUSolver_STATUS_SUCCESS, throws an exception detailing the cuSOLVER error that occurred + */ +#define CUSOLVER_TRY(call) \ + do { \ + cusolverStatus_t const status = (call); \ + if (CUSOLVER_STATUS_SUCCESS != status) { \ + std::string msg{}; \ + SET_ERROR_MSG( \ + msg, "cuSOLVER error encountered at: ", "call='%s', Reason=%d:%s", \ + #call, status, raft::linalg::detail::cusolver_error_to_string(status)); \ + throw raft::cublas_error(msg); \ + } \ + } while(0) + +/** FIXME: temporary alias for cuML compatibility */ +#define CUSOLVER_CHECK(call) CUSOLVER_TRY(call) + +//@todo: enable this once logging is enabled +#if 0 +** check for cusolver runtime API errors but do not assert */ +define CUSOLVER_CHECK_NO_THROW(call) \ + do { \ + cusolverStatus_t err = call; \ + if (err != CUSOLVER_STATUS_SUCCESS) { \ + CUML_LOG_ERROR("CUSOLVER call='%s' got errorcode=%d err=%s", #call, err, \ + raft::linalg::detail::cusolver_error_to_string(err)); \ + } \ } while (0) - -///@todo: enable this once logging is enabled -// /** check for cusolver runtime API errors but do not assert */ -// #define CUSOLVER_CHECK_NO_THROW(call) \ -// do { \ -// cusolverStatus_t err = call; \ -// if (err != CUSOLVER_STATUS_SUCCESS) { \ -// CUML_LOG_ERROR("CUSOLVER call='%s' got errorcode=%d err=%s", #call, err, \ -// raft::linalg::detail::cusolver_error_to_string(err)); \ -// } \ -// } while (0) +#endif namespace raft { namespace linalg { diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h index a4a8173b88..a337c8289b 100644 --- a/cpp/include/raft/sparse/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/cusparse_wrappers.h @@ -16,16 +16,28 @@ #pragma once +#include + #include ///@todo: enable this once logging is enabled //#include -#include #define _CUSPARSE_ERR_TO_STR(err) \ case err: \ return #err; namespace raft { + +/** + * @brief Exception thrown when a cuSparse error is encountered. + */ +struct cusparse_error : public raft::exception { + explicit cusparse_error(char const* const message) + : raft::exception(message) {} + explicit cusparse_error(std::string const& message) + : raft::exception(message) {} +}; + namespace sparse { namespace detail { @@ -54,25 +66,39 @@ inline const char* cusparse_error_to_string(cusparseStatus_t err) { #undef _CUSPARSE_ERR_TO_STR -/** check for cusparse runtime API errors and assert accordingly */ -#define CUSPARSE_CHECK(call) \ - do { \ - cusparseStatus_t err = call; \ - ASSERT(err == CUSPARSE_STATUS_SUCCESS, \ - "CUSPARSE call='%s' got errorcode=%d err=%s", #call, err, \ - raft::sparse::detail::cusparse_error_to_string(err)); \ +/** + * @brief Error checking macro for cuSparse runtime API functions. + * + * Invokes a cuSparse runtime API function call, if the call does not return + * CUSPARSE_STATUS_SUCCESS, throws an exception detailing the cuSparse error that occurred + */ +#define CUSPARSE_TRY(call) \ + do { \ + cusparseStatus_t const status = (call); \ + if (CUSPARSE_STATUS_SUCCESS != status) { \ + std::string msg{}; \ + SET_ERROR_MSG( \ + msg, "cuSparse error encountered at: ", "call='%s', Reason=%d:%s", \ + #call, status, raft::sparse::detail::cusparse_error_to_string(status)); \ + throw raft::cusparse_error(msg); \ + } \ + } while(0) + +/** FIXME: temporary alias for cuML compatibility */ +#define CUSPARSE_CHECK(call) CUSPARSE_TRY(call) + +//@todo: enable this once logging is enabled +#if 0 +/** check for cusparse runtime API errors but do not assert */ +#define CUSPARSE_CHECK_NO_THROW(call) \ + do { \ + cusparseStatus_t err = call; \ + if (err != CUSPARSE_STATUS_SUCCESS) { \ + CUML_LOG_ERROR("CUSPARSE call='%s' got errorcode=%d err=%s", #call, err, \ + raft::sparse::detail::cusparse_error_to_string(err)); \ + } \ } while (0) - -///@todo: enable this once logging is enabled -// /** check for cusparse runtime API errors but do not assert */ -// #define CUSPARSE_CHECK_NO_THROW(call) \ -// do { \ -// cusparseStatus_t err = call; \ -// if (err != CUSPARSE_STATUS_SUCCESS) { \ -// CUML_LOG_ERROR("CUSPARSE call='%s' got errorcode=%d err=%s", #call, err, \ -// raft::sparse::detail::cusparse_error_to_string(err)); \ -// } \ -// } while (0) +#endif namespace raft { namespace sparse { From d3192f42de52b89aa7a119ad54d8c4f4fe28db4c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 11 Jun 2020 17:07:06 -0400 Subject: [PATCH 20/28] clang-format --- cpp/include/raft/comms/std_comms.hpp | 26 ++++++++--------- cpp/include/raft/cudart_utils.h | 32 ++++++++++----------- cpp/include/raft/linalg/cublas_wrappers.h | 7 ++--- cpp/include/raft/linalg/cusolver_wrappers.h | 26 ++++++++--------- cpp/include/raft/sparse/cusparse_wrappers.h | 22 +++++++------- 5 files changed, 54 insertions(+), 59 deletions(-) diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp index 4aa3f20772..89987a1db7 100644 --- a/cpp/include/raft/comms/std_comms.hpp +++ b/cpp/include/raft/comms/std_comms.hpp @@ -52,10 +52,8 @@ namespace raft { * @brief Exception thrown when a NCCL error is encountered. */ struct nccl_error : public raft::exception { - explicit nccl_error(char const* const message) - : raft::exception(message) {} - explicit nccl_error(std::string const& message) - : raft::exception(message) {} + explicit nccl_error(char const *const message) : raft::exception(message) {} + explicit nccl_error(std::string const &message) : raft::exception(message) {} }; }; // namespace raft @@ -66,16 +64,16 @@ struct nccl_error : public raft::exception { * Invokes a NCCL runtime API function call, if the call does not return ncclSuccess, throws an * exception detailing the NCCL error that occurred */ -#define NCCL_TRY(call) \ - do { \ - ncclResult_t const status = (call); \ - if (ncclSuccess != status) { \ - std::string msg{}; \ - SET_ERROR_MSG( \ - msg, "NCCL error encountered at: ", "call='%s', Reason=%d:%s", \ - #call, status, ncclGetErrorString(status)); \ - throw raft::nccl_error(msg); \ - } \ +#define NCCL_TRY(call) \ + do { \ + ncclResult_t const status = (call); \ + if (ncclSuccess != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, \ + "NCCL error encountered at: ", "call='%s', Reason=%d:%s", \ + #call, status, ncclGetErrorString(status)); \ + throw raft::nccl_error(msg); \ + } \ } while (0); /** FIXME: temporary alias for cuML compatibility */ diff --git a/cpp/include/raft/cudart_utils.h b/cpp/include/raft/cudart_utils.h index 93cf87b8cb..2ea7cb3612 100644 --- a/cpp/include/raft/cudart_utils.h +++ b/cpp/include/raft/cudart_utils.h @@ -32,13 +32,11 @@ namespace raft { * @brief Exception thrown when a CUDA error is encountered. */ struct cuda_error : public raft::exception { - explicit cuda_error(char const* const message) - : raft::exception(message) {} - explicit cuda_error(std::string const& message) - : raft::exception(message) {} + explicit cuda_error(char const* const message) : raft::exception(message) {} + explicit cuda_error(std::string const& message) : raft::exception(message) {} }; -} +} // namespace raft /** * @brief Error checking macro for CUDA runtime API functions. @@ -48,18 +46,18 @@ struct cuda_error : public raft::exception { * exception detailing the CUDA error that occurred * */ -#define CUDA_TRY(call) \ - do { \ - cudaError_t const status = call; \ - if (status != cudaSuccess) { \ - cudaGetLastError(); \ - std::string msg{}; \ - SET_ERROR_MSG( \ - msg, "CUDA error encountered at: ", "call='%s', Reason=%s:%s", \ - #call, cudaGetErrorName(status), cudaGetErrorString(status)); \ - throw raft::cuda_error(msg); \ - } \ - } while(0) +#define CUDA_TRY(call) \ + do { \ + cudaError_t const status = call; \ + if (status != cudaSuccess) { \ + cudaGetLastError(); \ + std::string msg{}; \ + SET_ERROR_MSG( \ + msg, "CUDA error encountered at: ", "call='%s', Reason=%s:%s", #call, \ + cudaGetErrorName(status), cudaGetErrorString(status)); \ + throw raft::cuda_error(msg); \ + } \ + } while (0) /** * @brief Debug macro to check for CUDA errors diff --git a/cpp/include/raft/linalg/cublas_wrappers.h b/cpp/include/raft/linalg/cublas_wrappers.h index c8d51dca57..83f600a49d 100644 --- a/cpp/include/raft/linalg/cublas_wrappers.h +++ b/cpp/include/raft/linalg/cublas_wrappers.h @@ -34,9 +34,8 @@ namespace raft { * @brief Exception thrown when a cuBLAS error is encountered. */ struct cublas_error : public raft::exception { - explicit cublas_error(char const* const message) - : raft::exception(message) {} - explicit cublas_error(std::string const& message) + explicit cublas_error(char const *const message) : raft::exception(message) {} + explicit cublas_error(std::string const &message) : raft::exception(message) {} }; @@ -82,7 +81,7 @@ inline const char *cublas_error_to_string(cublasStatus_t err) { #call, status, raft::linalg::detail::cublas_error_to_string(status)); \ throw raft::cublas_error(msg); \ } \ - } while(0) + } while (0) /** FIXME: temporary alias for cuML compatibility */ #define CUBLAS_CHECK(call) CUBLAS_TRY(call) diff --git a/cpp/include/raft/linalg/cusolver_wrappers.h b/cpp/include/raft/linalg/cusolver_wrappers.h index da5455444b..eed8f9efd2 100644 --- a/cpp/include/raft/linalg/cusolver_wrappers.h +++ b/cpp/include/raft/linalg/cusolver_wrappers.h @@ -32,9 +32,9 @@ namespace raft { * @brief Exception thrown when a cuSOLVER error is encountered. */ struct cusolver_error : public raft::exception { - explicit cusolver_error(char const* const message) + explicit cusolver_error(char const *const message) : raft::exception(message) {} - explicit cusolver_error(std::string const& message) + explicit cusolver_error(std::string const &message) : raft::exception(message) {} }; @@ -70,17 +70,17 @@ inline const char *cusolver_error_to_string(cusolverStatus_t err) { * Invokes a cuSOLVER runtime API function call, if the call does not return * CUSolver_STATUS_SUCCESS, throws an exception detailing the cuSOLVER error that occurred */ -#define CUSOLVER_TRY(call) \ - do { \ - cusolverStatus_t const status = (call); \ - if (CUSOLVER_STATUS_SUCCESS != status) { \ - std::string msg{}; \ - SET_ERROR_MSG( \ - msg, "cuSOLVER error encountered at: ", "call='%s', Reason=%d:%s", \ - #call, status, raft::linalg::detail::cusolver_error_to_string(status)); \ - throw raft::cublas_error(msg); \ - } \ - } while(0) +#define CUSOLVER_TRY(call) \ + do { \ + cusolverStatus_t const status = (call); \ + if (CUSOLVER_STATUS_SUCCESS != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, "cuSOLVER error encountered at: ", \ + "call='%s', Reason=%d:%s", #call, status, \ + raft::linalg::detail::cusolver_error_to_string(status)); \ + throw raft::cublas_error(msg); \ + } \ + } while (0) /** FIXME: temporary alias for cuML compatibility */ #define CUSOLVER_CHECK(call) CUSOLVER_TRY(call) diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h index a337c8289b..3b174b4b13 100644 --- a/cpp/include/raft/sparse/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/cusparse_wrappers.h @@ -72,17 +72,17 @@ inline const char* cusparse_error_to_string(cusparseStatus_t err) { * Invokes a cuSparse runtime API function call, if the call does not return * CUSPARSE_STATUS_SUCCESS, throws an exception detailing the cuSparse error that occurred */ -#define CUSPARSE_TRY(call) \ - do { \ - cusparseStatus_t const status = (call); \ - if (CUSPARSE_STATUS_SUCCESS != status) { \ - std::string msg{}; \ - SET_ERROR_MSG( \ - msg, "cuSparse error encountered at: ", "call='%s', Reason=%d:%s", \ - #call, status, raft::sparse::detail::cusparse_error_to_string(status)); \ - throw raft::cusparse_error(msg); \ - } \ - } while(0) +#define CUSPARSE_TRY(call) \ + do { \ + cusparseStatus_t const status = (call); \ + if (CUSPARSE_STATUS_SUCCESS != status) { \ + std::string msg{}; \ + SET_ERROR_MSG(msg, "cuSparse error encountered at: ", \ + "call='%s', Reason=%d:%s", #call, status, \ + raft::sparse::detail::cusparse_error_to_string(status)); \ + throw raft::cusparse_error(msg); \ + } \ + } while (0) /** FIXME: temporary alias for cuML compatibility */ #define CUSPARSE_CHECK(call) CUSPARSE_TRY(call) From ec0cf978c3d4bfbabfa20111d6ef41172b600c19 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 11 Jun 2020 17:37:09 -0400 Subject: [PATCH 21/28] cosmetic updates --- cpp/include/raft/cudart_utils.h | 2 +- cpp/include/raft/error.hpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/include/raft/cudart_utils.h b/cpp/include/raft/cudart_utils.h index 2ea7cb3612..329e7e2354 100644 --- a/cpp/include/raft/cudart_utils.h +++ b/cpp/include/raft/cudart_utils.h @@ -16,7 +16,7 @@ #pragma once -#include "raft/error.hpp" +#include #include diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 480805f35f..3801792fbc 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -37,19 +37,19 @@ class exception : public std::exception { explicit exception() noexcept : std::exception(), msg_() {} /** copy ctor */ - exception(const exception& src) noexcept + exception(exception const& src) noexcept : std::exception(), msg_(src.what()) { collect_call_stack(); } /** ctor from an input message */ - explicit exception(const std::string _msg) noexcept - : std::exception(), msg_(std::move(_msg)) { + explicit exception(std::string const msg) noexcept + : std::exception(), msg_(std::move(msg)) { collect_call_stack(); } /** get the message associated with this exception */ - const char* what() const noexcept override { return msg_.c_str(); } + char const* what() const noexcept override { return msg_.c_str(); } private: /** message associated with this exception */ From f8f8d32313eef5b4f1926ec3d67a39ff09012c83 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 11 Jun 2020 17:51:30 -0400 Subject: [PATCH 22/28] cosmetic updates --- cpp/include/raft/comms/std_comms.hpp | 8 ++++---- cpp/include/raft/cudart_utils.h | 6 +++--- cpp/include/raft/linalg/cublas_wrappers.h | 10 +++++----- cpp/include/raft/linalg/cusolver_wrappers.h | 10 +++++----- cpp/include/raft/sparse/cusparse_wrappers.h | 10 +++++----- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp index 89987a1db7..7a9d834d02 100644 --- a/cpp/include/raft/comms/std_comms.hpp +++ b/cpp/include/raft/comms/std_comms.hpp @@ -52,11 +52,11 @@ namespace raft { * @brief Exception thrown when a NCCL error is encountered. */ struct nccl_error : public raft::exception { - explicit nccl_error(char const *const message) : raft::exception(message) {} - explicit nccl_error(std::string const &message) : raft::exception(message) {} + explicit nccl_error(char const* const message) : raft::exception(message) {} + explicit nccl_error(std::string const& message) : raft::exception(message) {} }; -}; // namespace raft +} // namespace raft /** * @brief Error checking macro for NCCL runtime API functions. @@ -82,7 +82,7 @@ struct nccl_error : public raft::exception { #define NCCL_CHECK_NO_THROW(call) \ do { \ ncclResult_t status = call; \ - if (status != ncclSuccess) { \ + if (ncclSuccess != status) { \ printf("NCCL call='%s' failed. Reason:%s\n", #call, \ ncclGetErrorString(status)); \ } \ diff --git a/cpp/include/raft/cudart_utils.h b/cpp/include/raft/cudart_utils.h index 329e7e2354..f9d99987f1 100644 --- a/cpp/include/raft/cudart_utils.h +++ b/cpp/include/raft/cudart_utils.h @@ -89,8 +89,8 @@ struct cuda_error : public raft::exception { // */ #define CUDA_CHECK_NO_THROW(call) \ do { \ - cudaError_t status = call; \ - if (status != cudaSuccess) { \ + cudaError_t const status = call; \ + if (cudaSuccess != status) { \ printf("CUDA call='%s' at file=%s line=%d failed with %s\n", #call, \ __FILE__, __LINE__, cudaGetErrorString(status)); \ } \ @@ -188,4 +188,4 @@ void print_device_vector(const char* variable_name, const T* devMem, } /** @} */ -}; // namespace raft +} // namespace raft diff --git a/cpp/include/raft/linalg/cublas_wrappers.h b/cpp/include/raft/linalg/cublas_wrappers.h index 83f600a49d..7e8a52196a 100644 --- a/cpp/include/raft/linalg/cublas_wrappers.h +++ b/cpp/include/raft/linalg/cublas_wrappers.h @@ -59,9 +59,9 @@ inline const char *cublas_error_to_string(cublasStatus_t err) { }; } -}; // namespace detail -}; // namespace linalg -}; // namespace raft +} // namespace detail +} // namespace linalg +} // namespace raft #undef _CUBLAS_ERR_TO_STR @@ -578,5 +578,5 @@ inline cublasStatus_t cublasdot(cublasHandle_t handle, int n, const double *x, } /** @} */ -}; // namespace linalg -}; // namespace raft +} // namespace linalg +} // namespace raft diff --git a/cpp/include/raft/linalg/cusolver_wrappers.h b/cpp/include/raft/linalg/cusolver_wrappers.h index eed8f9efd2..a65042a2fd 100644 --- a/cpp/include/raft/linalg/cusolver_wrappers.h +++ b/cpp/include/raft/linalg/cusolver_wrappers.h @@ -58,9 +58,9 @@ inline const char *cusolver_error_to_string(cusolverStatus_t err) { }; } -}; // namespace detail -}; // namespace linalg -}; // namespace raft +} // namespace detail +} // namespace linalg +} // namespace raft #undef _CUSOLVER_ERR_TO_STR @@ -718,5 +718,5 @@ inline cusolverStatus_t cusolverSpcsrqrsvBatched( // NOLINT } /** @} */ -}; // namespace linalg -}; // namespace raft +} // namespace linalg +} // namespace raft diff --git a/cpp/include/raft/sparse/cusparse_wrappers.h b/cpp/include/raft/sparse/cusparse_wrappers.h index 3b174b4b13..9de242ea10 100644 --- a/cpp/include/raft/sparse/cusparse_wrappers.h +++ b/cpp/include/raft/sparse/cusparse_wrappers.h @@ -60,9 +60,9 @@ inline const char* cusparse_error_to_string(cusparseStatus_t err) { #endif // CUDART_VERSION } -}; // namespace detail -}; // namespace sparse -}; // namespace raft +} // namespace detail +} // namespace sparse +} // namespace raft #undef _CUSPARSE_ERR_TO_STR @@ -198,5 +198,5 @@ inline cusparseStatus_t cusparsegemmi( } /** @} */ -}; // namespace sparse -}; // namespace raft +} // namespace sparse +} // namespace raft From c3f153da98867db21087cf4ac5346aa4684fb192 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 11 Jun 2020 17:58:51 -0400 Subject: [PATCH 23/28] stifle some warnings --- cpp/include/raft/comms/std_comms.hpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp index 7a9d834d02..30644d8889 100644 --- a/cpp/include/raft/comms/std_comms.hpp +++ b/cpp/include/raft/comms/std_comms.hpp @@ -92,8 +92,6 @@ namespace raft { namespace comms { static size_t get_datatype_size(const datatype_t datatype) { - size_t ret = -1; - switch (datatype) { case datatype_t::CHAR: return sizeof(char); @@ -112,7 +110,7 @@ static size_t get_datatype_size(const datatype_t datatype) { case datatype_t::FLOAT64: return sizeof(double); default: - throw "Unsupported"; + RAFT_FAIL("Unsupported datatype."); } } @@ -172,13 +170,13 @@ class std_comms : public comms_iface { const std::shared_ptr device_allocator, cudaStream_t stream) : nccl_comm_(nccl_comm), - ucp_worker_(ucp_worker), - ucp_eps_(eps), + stream_(stream), num_ranks_(num_ranks), rank_(rank), - device_allocator_(device_allocator), - stream_(stream), - next_request_id_(0) { + ucp_worker_(ucp_worker), + ucp_eps_(eps), + next_request_id_(0), + device_allocator_(device_allocator) { initialize(); }; @@ -192,10 +190,10 @@ class std_comms : public comms_iface { const std::shared_ptr device_allocator, cudaStream_t stream) : nccl_comm_(nccl_comm), + stream_(stream), num_ranks_(num_ranks), rank_(rank), - device_allocator_(device_allocator), - stream_(stream) { + device_allocator_(device_allocator) { initialize(); }; From 85c9b7d6c29db429bde187426fd09f934552c879 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 11 Jun 2020 18:26:59 -0400 Subject: [PATCH 24/28] clang-format error --- cpp/include/raft/comms/std_comms.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp index 30644d8889..7304f3bd4e 100644 --- a/cpp/include/raft/comms/std_comms.hpp +++ b/cpp/include/raft/comms/std_comms.hpp @@ -52,8 +52,8 @@ namespace raft { * @brief Exception thrown when a NCCL error is encountered. */ struct nccl_error : public raft::exception { - explicit nccl_error(char const* const message) : raft::exception(message) {} - explicit nccl_error(std::string const& message) : raft::exception(message) {} + explicit nccl_error(char const *const message) : raft::exception(message) {} + explicit nccl_error(std::string const &message) : raft::exception(message) {} }; } // namespace raft From 6d9e392bf65693bc5388ea75eea0fa0aea380bbe Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 16 Jun 2020 09:50:29 -0400 Subject: [PATCH 25/28] fix unused location_prefix in error handling macro --- cpp/include/raft/cudart_utils.h | 1 - cpp/include/raft/error.hpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/include/raft/cudart_utils.h b/cpp/include/raft/cudart_utils.h index f9d99987f1..2ca23ba539 100644 --- a/cpp/include/raft/cudart_utils.h +++ b/cpp/include/raft/cudart_utils.h @@ -71,7 +71,6 @@ struct cuda_error : public raft::exception { * deterministic execution for debugging asynchronous CUDA execution. It should * be used after any asynchronous CUDA call, e.g., cudaMemcpyAsync, or an * asynchronous kernel launch. - * */ #ifndef NDEBUG #define CHECK_CUDA(stream) CUDA_TRY(cudaStreamSynchronize(stream)); diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index 3801792fbc..bcca3f6f9c 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -118,7 +118,7 @@ struct logic_error : public raft::exception { #define SET_ERROR_MSG(msg, location_prefix, fmt, ...) \ do { \ char err_msg[2048]; /* NOLINT */ \ - std::snprintf(err_msg, sizeof(err_msg), "RAFT failure at %s", __FILE__); \ + std::snprintf(err_msg, sizeof(err_msg), location_prefix); \ msg += err_msg; \ std::snprintf(err_msg, sizeof(err_msg), "file=%s line=%d: ", __FILE__, \ __LINE__); \ From 4ebc0af11244db29c0fbcb11e93d5ec5fba18e58 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 16 Jun 2020 09:51:13 -0400 Subject: [PATCH 26/28] remove NCCL_CHECK (replaced with NCCL_TRY) --- cpp/include/raft/comms/std_comms.hpp | 31 +++++++++++++--------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp index 7304f3bd4e..ddc5d8192a 100644 --- a/cpp/include/raft/comms/std_comms.hpp +++ b/cpp/include/raft/comms/std_comms.hpp @@ -76,9 +76,6 @@ struct nccl_error : public raft::exception { } \ } while (0); -/** FIXME: temporary alias for cuML compatibility */ -#define NCCL_CHECK(call) NCCL_TRY(call) - #define NCCL_CHECK_NO_THROW(call) \ do { \ ncclResult_t status = call; \ @@ -349,29 +346,29 @@ class std_comms : public comms_iface { void allreduce(const void *sendbuff, void *recvbuff, size_t count, datatype_t datatype, op_t op, cudaStream_t stream) const { - NCCL_CHECK(ncclAllReduce(sendbuff, recvbuff, count, - get_nccl_datatype(datatype), get_nccl_op(op), - nccl_comm_, stream)); + NCCL_TRY(ncclAllReduce(sendbuff, recvbuff, count, + get_nccl_datatype(datatype), get_nccl_op(op), + nccl_comm_, stream)); } void bcast(void *buff, size_t count, datatype_t datatype, int root, cudaStream_t stream) const { - NCCL_CHECK(ncclBroadcast(buff, buff, count, get_nccl_datatype(datatype), - root, nccl_comm_, stream)); + NCCL_TRY(ncclBroadcast(buff, buff, count, get_nccl_datatype(datatype), + root, nccl_comm_, stream)); } void reduce(const void *sendbuff, void *recvbuff, size_t count, datatype_t datatype, op_t op, int root, cudaStream_t stream) const { - NCCL_CHECK(ncclReduce(sendbuff, recvbuff, count, - get_nccl_datatype(datatype), get_nccl_op(op), root, - nccl_comm_, stream)); + NCCL_TRY(ncclReduce(sendbuff, recvbuff, count, + get_nccl_datatype(datatype), get_nccl_op(op), root, + nccl_comm_, stream)); } void allgather(const void *sendbuff, void *recvbuff, size_t sendcount, datatype_t datatype, cudaStream_t stream) const { - NCCL_CHECK(ncclAllGather(sendbuff, recvbuff, sendcount, - get_nccl_datatype(datatype), nccl_comm_, stream)); + NCCL_TRY(ncclAllGather(sendbuff, recvbuff, sendcount, + get_nccl_datatype(datatype), nccl_comm_, stream)); } void allgatherv(const void *sendbuf, void *recvbuf, const size_t recvcounts[], @@ -381,7 +378,7 @@ class std_comms : public comms_iface { //Listing 1 on page 4. for (int root = 0; root < num_ranks_; ++root) { size_t dtype_size = get_datatype_size(datatype); - NCCL_CHECK(ncclBroadcast( + NCCL_TRY(ncclBroadcast( sendbuf, static_cast(recvbuf) + displs[root] * dtype_size, recvcounts[root], get_nccl_datatype(datatype), root, nccl_comm_, stream)); @@ -390,9 +387,9 @@ class std_comms : public comms_iface { void reducescatter(const void *sendbuff, void *recvbuff, size_t recvcount, datatype_t datatype, op_t op, cudaStream_t stream) const { - NCCL_CHECK(ncclReduceScatter(sendbuff, recvbuff, recvcount, - get_nccl_datatype(datatype), get_nccl_op(op), - nccl_comm_, stream)); + NCCL_TRY(ncclReduceScatter(sendbuff, recvbuff, recvcount, + get_nccl_datatype(datatype), get_nccl_op(op), + nccl_comm_, stream)); } status_t sync_stream(cudaStream_t stream) const { From 851b401383d195537061d35d7d25d03922b96d61 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 16 Jun 2020 09:53:40 -0400 Subject: [PATCH 27/28] clang-format --- cpp/include/raft/error.hpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/include/raft/error.hpp b/cpp/include/raft/error.hpp index bcca3f6f9c..0b001b01b2 100644 --- a/cpp/include/raft/error.hpp +++ b/cpp/include/raft/error.hpp @@ -115,16 +115,16 @@ struct logic_error : public raft::exception { if (!(check)) THROW(fmt, ##__VA_ARGS__); \ } while (0) -#define SET_ERROR_MSG(msg, location_prefix, fmt, ...) \ - do { \ - char err_msg[2048]; /* NOLINT */ \ - std::snprintf(err_msg, sizeof(err_msg), location_prefix); \ - msg += err_msg; \ - std::snprintf(err_msg, sizeof(err_msg), "file=%s line=%d: ", __FILE__, \ - __LINE__); \ - msg += err_msg; \ - std::snprintf(err_msg, sizeof(err_msg), fmt, ##__VA_ARGS__); \ - msg += err_msg; \ +#define SET_ERROR_MSG(msg, location_prefix, fmt, ...) \ + do { \ + char err_msg[2048]; /* NOLINT */ \ + std::snprintf(err_msg, sizeof(err_msg), location_prefix); \ + msg += err_msg; \ + std::snprintf(err_msg, sizeof(err_msg), "file=%s line=%d: ", __FILE__, \ + __LINE__); \ + msg += err_msg; \ + std::snprintf(err_msg, sizeof(err_msg), fmt, ##__VA_ARGS__); \ + msg += err_msg; \ } while (0) /** From 07a51a406263b85a248be73bebf30a92d4e727d5 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 16 Jun 2020 09:56:33 -0400 Subject: [PATCH 28/28] another clang format --- cpp/include/raft/comms/std_comms.hpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/include/raft/comms/std_comms.hpp b/cpp/include/raft/comms/std_comms.hpp index ddc5d8192a..3528c148df 100644 --- a/cpp/include/raft/comms/std_comms.hpp +++ b/cpp/include/raft/comms/std_comms.hpp @@ -353,16 +353,15 @@ class std_comms : public comms_iface { void bcast(void *buff, size_t count, datatype_t datatype, int root, cudaStream_t stream) const { - NCCL_TRY(ncclBroadcast(buff, buff, count, get_nccl_datatype(datatype), - root, nccl_comm_, stream)); + NCCL_TRY(ncclBroadcast(buff, buff, count, get_nccl_datatype(datatype), root, + nccl_comm_, stream)); } void reduce(const void *sendbuff, void *recvbuff, size_t count, datatype_t datatype, op_t op, int root, cudaStream_t stream) const { - NCCL_TRY(ncclReduce(sendbuff, recvbuff, count, - get_nccl_datatype(datatype), get_nccl_op(op), root, - nccl_comm_, stream)); + NCCL_TRY(ncclReduce(sendbuff, recvbuff, count, get_nccl_datatype(datatype), + get_nccl_op(op), root, nccl_comm_, stream)); } void allgather(const void *sendbuff, void *recvbuff, size_t sendcount,