From d34b0f03c12dca46e911cb96b74c2eeba9d1ca37 Mon Sep 17 00:00:00 2001 From: john li Date: Tue, 6 Jun 2023 15:31:48 -0400 Subject: [PATCH 1/3] include cublas error details when getting cublas handle fails --- csrc/includes/context.h | 6 ++++-- csrc/transformer/inference/includes/inference_context.h | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/csrc/includes/context.h b/csrc/includes/context.h index 3a9067dc3b9f..ee37dd2b2598 100644 --- a/csrc/includes/context.h +++ b/csrc/includes/context.h @@ -50,8 +50,10 @@ class TrainingContext { { curandCreateGenerator(&_gen, CURAND_RNG_PSEUDO_DEFAULT); curandSetPseudoRandomGeneratorSeed(_gen, 123); - if (cublasCreate(&_cublasHandle) != CUBLAS_STATUS_SUCCESS) { - auto message = std::string("Fail to create cublas handle."); + cublasStatus_t stat = cublasCreate(&_cublasHandle); + if (stat != CUBLAS_STATUS_SUCCESS) { + auto message = std::string("Failed to create cublas handle: ") + + cublasGetStatusName(stat) + " " + cublasGetStatusString(stat); std::cerr << message << std::endl; throw std::runtime_error(message); } diff --git a/csrc/transformer/inference/includes/inference_context.h b/csrc/transformer/inference/includes/inference_context.h index 66596c9bfb82..74d40522c41d 100644 --- a/csrc/transformer/inference/includes/inference_context.h +++ b/csrc/transformer/inference/includes/inference_context.h @@ -60,8 +60,11 @@ class InferenceContext { { _workSpaceSize = 0; _workspace = 0; - if (cublasCreate(&_cublasHandle) != CUBLAS_STATUS_SUCCESS) { - auto message = std::string("Fail to create cublas handle."); + + cublasStatus_t stat = cublasCreate(&_cublasHandle); + if (stat != CUBLAS_STATUS_SUCCESS) { + auto message = std::string("Failed to create cublas handle: ") + + cublasGetStatusName(stat) + " " + cublasGetStatusString(stat); std::cerr << message << std::endl; throw std::runtime_error(message); } From 672c6223faf74a93d38841b4f2df9f3565f63439 Mon Sep 17 00:00:00 2001 From: john li Date: Wed, 7 Jun 2023 10:55:17 -0400 Subject: [PATCH 2/3] run clang-format --- csrc/includes/context.h | 4 ++-- csrc/transformer/inference/includes/inference_context.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csrc/includes/context.h b/csrc/includes/context.h index ee37dd2b2598..2e39f3f8b3bd 100644 --- a/csrc/includes/context.h +++ b/csrc/includes/context.h @@ -52,8 +52,8 @@ class TrainingContext { curandSetPseudoRandomGeneratorSeed(_gen, 123); cublasStatus_t stat = cublasCreate(&_cublasHandle); if (stat != CUBLAS_STATUS_SUCCESS) { - auto message = std::string("Failed to create cublas handle: ") - + cublasGetStatusName(stat) + " " + cublasGetStatusString(stat); + auto message = std::string("Failed to create cublas handle: ") + + cublasGetStatusName(stat) + " " + cublasGetStatusString(stat); std::cerr << message << std::endl; throw std::runtime_error(message); } diff --git a/csrc/transformer/inference/includes/inference_context.h b/csrc/transformer/inference/includes/inference_context.h index 74d40522c41d..166611e0c400 100644 --- a/csrc/transformer/inference/includes/inference_context.h +++ b/csrc/transformer/inference/includes/inference_context.h @@ -63,8 +63,8 @@ class InferenceContext { cublasStatus_t stat = cublasCreate(&_cublasHandle); if (stat != CUBLAS_STATUS_SUCCESS) { - auto message = std::string("Failed to create cublas handle: ") - + cublasGetStatusName(stat) + " " + cublasGetStatusString(stat); + auto message = std::string("Failed to create cublas handle: ") + + cublasGetStatusName(stat) + " " + cublasGetStatusString(stat); std::cerr << message << std::endl; throw std::runtime_error(message); } From 32049b841e3a45e4dacb30eee9d7ceb8fde246a8 Mon Sep 17 00:00:00 2001 From: john li Date: Fri, 9 Jun 2023 16:24:45 -0400 Subject: [PATCH 3/3] just use raw enum value to avoid depending on minimum cuda version --- csrc/includes/context.h | 6 ++++-- csrc/transformer/inference/includes/inference_context.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/csrc/includes/context.h b/csrc/includes/context.h index 2e39f3f8b3bd..cd80f8fbeebe 100644 --- a/csrc/includes/context.h +++ b/csrc/includes/context.h @@ -52,8 +52,10 @@ class TrainingContext { curandSetPseudoRandomGeneratorSeed(_gen, 123); cublasStatus_t stat = cublasCreate(&_cublasHandle); if (stat != CUBLAS_STATUS_SUCCESS) { - auto message = std::string("Failed to create cublas handle: ") + - cublasGetStatusName(stat) + " " + cublasGetStatusString(stat); + // It would be nice to use cublasGetStatusName and + // cublasGetStatusString, but they were only added in CUDA 11.4.2. + auto message = std::string("Failed to create cublas handle: cublasStatus_t was ") + + std::to_string(stat); std::cerr << message << std::endl; throw std::runtime_error(message); } diff --git a/csrc/transformer/inference/includes/inference_context.h b/csrc/transformer/inference/includes/inference_context.h index 166611e0c400..aaf568553560 100644 --- a/csrc/transformer/inference/includes/inference_context.h +++ b/csrc/transformer/inference/includes/inference_context.h @@ -63,8 +63,10 @@ class InferenceContext { cublasStatus_t stat = cublasCreate(&_cublasHandle); if (stat != CUBLAS_STATUS_SUCCESS) { - auto message = std::string("Failed to create cublas handle: ") + - cublasGetStatusName(stat) + " " + cublasGetStatusString(stat); + // It would be nice to use cublasGetStatusName and + // cublasGetStatusString, but they were only added in CUDA 11.4.2. + auto message = std::string("Failed to create cublas handle: cublasStatus_t was ") + + std::to_string(stat); std::cerr << message << std::endl; throw std::runtime_error(message); }