Skip to content

Commit

Permalink
fix: CUDA 12.2 defines half operators for all arches
Browse files Browse the repository at this point in the history
  • Loading branch information
gedoensmax committed Aug 15, 2023
1 parent 0f232c5 commit cee050f
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/cuda/cu_inc/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ namespace onnxruntime {
namespace cuda {

// float16 arithmetic is supported after sm5.3 with intrinsics, and cuda does not provide fallback for lower versions
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 530
// CUDA 12.2 does not limit the definition based on sm53 anymore and defines for all arches
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530) && ((__CUDACC_VER_MAJOR__ < 12) || ((__CUDACC_VER_MAJOR__ == 12 ) && (__CUDACC_VER_MINOR__ < 2)))
__device__ __forceinline__ half operator+(const half& lh, const half& rh) { return half((float)lh + (float)rh); }
__device__ __forceinline__ half operator-(const half& lh, const half& rh) { return half((float)lh - (float)rh); }
__device__ __forceinline__ half operator*(const half& lh, const half& rh) { return half((float)lh * (float)rh); }
Expand Down

0 comments on commit cee050f

Please sign in to comment.