Skip to content

Commit

Permalink
update to review (#103)
Browse files Browse the repository at this point in the history
* update to review

- fix a twice error code checking issue
- change macro name `K2_CUDA_CHECK_ERROR` to `K2_CUDA_PRINT_ERROR` as its effect
- add a optional arg `abort` to control abort if hit error
- trivals
fix

* just revert name: `K2_CUDA_CHECK_ERROR`
  • Loading branch information
megazone87 authored Aug 31, 2020
1 parent 630fa7e commit 4a301ac
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 72 deletions.
65 changes: 39 additions & 26 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,33 +43,46 @@ if(WIN32 AND BUILD_SHARED_LIBS)
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
endif()

# the following settings are modified from cub/CMakeLists.txt
#[[ start settings for CUB ]]

set(CMAKE_CXX_STANDARD 11 CACHE STRING "The C++ version to be used.")
set(CMAKE_CXX_EXTENSIONS OFF)

message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")

# Force CUDA C++ standard to be the same as the C++ standard used.
#
# Now, CMake is unaligned with reality on standard versions: https://gitlab.kitware.com/cmake/cmake/issues/18597
# which means that using standard CMake methods, it's impossible to actually sync the CXX and CUDA versions for pre-11
# versions of C++; CUDA accepts 98 but translates that to 03, while CXX doesn't accept 03 (and doesn't translate that to 03).
# In case this gives You, dear user, any trouble, please escalate the above CMake bug, so we can support reality properly.
if(DEFINED CMAKE_CUDA_STANDARD)
message(WARNING "You've set CMAKE_CUDA_STANDARD; please note that this variable is ignored, and CMAKE_CXX_STANDARD"
" is used as the C++ standard version for both C++ and CUDA.")
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18.0")
find_package(CUDAToolkit REQUIRED)
if(CUDAToolkit_FOUND)
message(STATUS "found CUDAToolkit " ${CUDAToolkit_LIBRARY_DIR})
message(STATUS "CUDAToolkit_INCLUDE_DIRS " ${CUDAToolkit_INCLUDE_DIRS})
message(STATUS "CUDAToolkit_LIBRARY_DIR " ${CUDAToolkit_LIBRARY_DIR})

enable_language(CUDA)

# With many architectures set here, the nvcc build time would be much longer.
# Thus, "61" is put here for speed and compatibility.
# @ToDo Need to cover more architectures, before release these code.
set(CMAKE_CUDA_ARCHITECTURES 61)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
endif()
else()
# the following settings are modified from cub/CMakeLists.txt
set(CMAKE_CXX_STANDARD 11 CACHE STRING "The C++ version to be used.")
set(CMAKE_CXX_EXTENSIONS OFF)

message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")

# Force CUDA C++ standard to be the same as the C++ standard used.
#
# Now, CMake is unaligned with reality on standard versions: https://gitlab.kitware.com/cmake/cmake/issues/18597
# which means that using standard CMake methods, it's impossible to actually sync the CXX and CUDA versions for pre-11
# versions of C++; CUDA accepts 98 but translates that to 03, while CXX doesn't accept 03 (and doesn't translate that to 03).
# In case this gives You, dear user, any trouble, please escalate the above CMake bug, so we can support reality properly.
if(DEFINED CMAKE_CUDA_STANDARD)
message(WARNING "You've set CMAKE_CUDA_STANDARD; please note that this variable is ignored, and CMAKE_CXX_STANDARD"
" is used as the C++ standard version for both C++ and CUDA.")
endif()
unset(CMAKE_CUDA_STANDARD CACHE)
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})

set(K2_COMPUTE_ARCHS 30 32 35 50 52 53 60 61 62 70 72)
foreach(COMPUTE_ARCH IN LISTS K2_COMPUTE_ARCHS)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda -gencode arch=compute_${COMPUTE_ARCH},code=sm_${COMPUTE_ARCH}")
endforeach()
endif()
unset(CMAKE_CUDA_STANDARD CACHE)
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})

set(K2_COMPUTE_ARCHS 30 32 35 50 52 53 60 61 62 70 72)
foreach(COMPUTE_ARCH IN LISTS K2_COMPUTE_ARCHS)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda -gencode arch=compute_${COMPUTE_ARCH},code=sm_${COMPUTE_ARCH}")
endforeach()

#[[ end settings for CUB ]]

enable_testing()

Expand Down
84 changes: 50 additions & 34 deletions k2/csrc/cuda/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
#include <device_launch_parameters.h>
#include <driver_types.h>

#include <stdio.h>
#include <assert.h>
#include <cstdio>
#include <cassert>

namespace k2 {

Expand Down Expand Up @@ -71,34 +71,38 @@ namespace k2 {
/**
* @fn
* __host__ __device__ __forceinline__ cudaError_t
* _K2CudaDebug(cudaError_t error,
* K2CudaDebug_(cudaError_t error,
* const char *filename,
* int line)
*
* @brief This is an error checking function, with context information.
* It's not designed to called by users, but inner macros.
* It's made used by other macros.
*
* @param[in] error an enum type indicating CUDA errors.
* @param[in] filename the source filename that the error comes from.
* @param[in] line the code line that the error happened.
* @return Pass the input CUDA error.
* @param[in] abort this bool control if the error results into `abort`
* @return the input CUDA error.
*
* @code{.cpp}
* _K2CudaDebug(cudaGetLastError(), __FILE__, __LINE__);
* K2CudaDebug_(cudaGetLastError(), __FILE__, __LINE__);
* @endcode
*/
__host__ __device__ __forceinline__ cudaError_t _K2CudaDebug(
__host__ __device__ __forceinline__ cudaError_t K2CudaDebug_(
cudaError_t error,
const char *filename,
int line) {
int line,
bool abort = true) {
if (cudaSuccess != error) {
#ifndef __CUDA_ARCH__
fprintf(stderr, "CUDA error ID=%d, NAME=%s, [%s, %d]: %s\n",
error, cudaGetErrorName(error),
filename, line,
cudaGetErrorString(error));
fflush(stderr);
if (abort) {
exit(error);
}
#elif __CUDA_ARCH__ >= 200
printf("CUDA error ID=%d, NAME=%s, "
"[block (%d,%d,%d) thread (%d,%d,%d), %s, %d]: %s\n",
Expand All @@ -107,6 +111,16 @@ __host__ __device__ __forceinline__ cudaError_t _K2CudaDebug(
threadIdx.x, threadIdx.y, threadIdx.z,
filename, line,
cudaGetErrorString(error));
if (abort) {
__threadfence(); // ensure memory write before trap
/**
* kill kernel (all threads) with error.
* It may cause context destructed.
* `assert(cudaSuccess != error)`
* is another candidate.
*/
asm("trap;");
}
#endif
}
return error;
Expand All @@ -118,19 +132,19 @@ __host__ __device__ __forceinline__ cudaError_t _K2CudaDebug(
* @brief Macro for checking cuda error.
*
* @details
* If error is not cudaSuccess, print the error message and exit with the
* error enum value. Otherwise, it does nothing.
* If error is not cudaSuccess, print the error message, and pass the
* optional `bAbort` as `abort` of `K2CudaDebug_`.
* Otherwise, it does nothing except return the error.
*
* @param[in] e an enum type indicating CUDA errors.
* @param[in] e one in the enum type that indicates the CUDA error.
* @return the CUDA error returned by `K2CudaDebug_`.
*
* @code{.cpp}
* K2_CUDA_CHECK_ERROR(cudaGetLastError());
* K2_CUDA_CHECK_ERROR(error = cudaGetLastError());
* @endcode
*/
#define K2_CUDA_CHECK_ERROR(e) \
if (::k2::_K2CudaDebug((cudaError_t)(e), __FILE__, __LINE__)) { \
exit(e); \
}
#define K2_CUDA_CHECK_ERROR(e, bAbort...) \
::k2::K2CudaDebug_((cudaError_t)(e), __FILE__, __LINE__, ##bAbort)

/**
* @def K2_CUDA_SAFE_CALL([cuda_runtime_api|kernel])
Expand All @@ -139,8 +153,10 @@ __host__ __device__ __forceinline__ cudaError_t _K2CudaDebug(
* or "kernels" return status.
*
* @details
* - If api return status is not cudaSuccess, print and exit.
* - The `cudaDeviceSynchronize` only happens when `NDEBUG` is not defined.
* - Use K2_CUDA_CHECK_ERROR(.., bAbort = true) to deal with the error.
*
* @param[in]
*
* @note
* Kernel launches do not return any error code, thus checking should after it.
Expand All @@ -152,17 +168,17 @@ __host__ __device__ __forceinline__ cudaError_t _K2CudaDebug(
* @endcode
*/
#ifndef NDEBUG
#define K2_CUDA_SAFE_CALL(...) \
do { \
(__VA_ARGS__); \
cudaDeviceSynchronize(); \
K2_CUDA_CHECK_ERROR(cudaGetLastError()); \
#define K2_CUDA_SAFE_CALL(...) \
do { \
(__VA_ARGS__); \
cudaDeviceSynchronize(); \
K2_CUDA_CHECK_ERROR(cudaGetLastError(), true); \
} while (0)
#else
#define K2_CUDA_SAFE_CALL(...) \
do { \
(__VA_ARGS__); \
K2_CUDA_CHECK_ERROR(cudaGetLastError()); \
#define K2_CUDA_SAFE_CALL(...) \
do { \
(__VA_ARGS__); \
K2_CUDA_CHECK_ERROR(cudaGetLastError(), true); \
} while (0)
#endif

Expand Down Expand Up @@ -208,14 +224,14 @@ __host__ __device__ __forceinline__ cudaError_t _K2CudaDebug(
* @endcode
*/
#ifdef K2_PARANOID
#define K2_PARANOID_ASSERT(exp, format, ...) \
do { \
if (exp) \
(void)0; \
else { \
K2_DLOG(" [%s, %d] " format, __FILE__, __LINE__, ##__VA_ARGS__); \
assert(exp); \
} \
#define K2_PARANOID_ASSERT(exp, format, ...) \
do { \
if (exp) \
(void)0; \
else { \
K2_DLOG("Error [%s, %d] " format, __FILE__, __LINE__, ##__VA_ARGS__); \
assert(exp); \
} \
} while (0)
#else
#define K2_PARANOID_ASSERT(exp, format, ...) ((void) 0)
Expand Down
18 changes: 6 additions & 12 deletions k2/csrc/cuda/debug_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

// See ../../LICENSE for clarification regarding multiple authors

#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <gmock/gmock.h>

#ifndef K2_PARANOID
#define K2_PARANOID
Expand Down Expand Up @@ -56,7 +56,7 @@ __global__ void HelloCUDA(float f) {
}

// A vector add kernel definition
__global__ void VecAdd(float *A, float *B, float *C) {
__global__ void VecAdd(const float *A, const float *B, float *C) {
int i = threadIdx.x;
C[i] = A[i] + B[i];
K2_PARANOID_ASSERT(C[i] == A[i] + B[i],
Expand All @@ -80,7 +80,7 @@ TEST(DebugTest, K2CheckEq) {

TEST(DebugTest, K2CudaCheckError) {
::testing::FLAGS_gtest_death_test_style = "threadsafe";
ASSERT_DEATH(K2_CUDA_CHECK_ERROR(cudaErrorMemoryAllocation),
ASSERT_DEATH(K2_CUDA_CHECK_ERROR(cudaErrorMemoryAllocation, true),
"cudaErrorMemoryAllocation");
}

Expand Down Expand Up @@ -128,7 +128,8 @@ TEST(DebugTest, K2DLog) {
HelloCUDA<<<1, 5>>>(1.2345f);
FillContents<<<3, 2>>>(1, d_A);
cudaDeviceSynchronize();
K2_CUDA_CHECK_ERROR(cudaGetLastError());
auto error = K2_CUDA_CHECK_ERROR(cudaGetLastError());
EXPECT_EQ(error, cudaSuccess);
}

cudaFree(d_A);
Expand All @@ -137,19 +138,12 @@ TEST(DebugTest, K2DLog) {
TEST(DebugTest, K2ParanoidAssert) {
::testing::FLAGS_gtest_death_test_style = "threadsafe";

{

ASSERT_DEATH(
K2_PARANOID_ASSERT(2 < 1, "2 unexpectedly smaller than 1\n"),
"Assertion");
}

{
int a = 2;
int b = 1;
ASSERT_DEATH(
K2_PARANOID_ASSERT(a < b, "%d unexpectedly smaller than %d\n", a, b),
"Assertion");
"Assertion `a < b' failed");
}

{
Expand Down

0 comments on commit 4a301ac

Please sign in to comment.