From 9b077e51c778e7b05bee27fa52a5ecae62e00bb4 Mon Sep 17 00:00:00 2001 From: Jacob Faibussowitsch Date: Thu, 24 Oct 2024 02:03:22 -0500 Subject: [PATCH] Fix CUDA driver type stub definitions (#511) closes https://github.com/rapidsai/kvikio/issues/510 Authors: - Jacob Faibussowitsch (https://github.com/Jacobfaib) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) URL: https://github.com/rapidsai/kvikio/pull/511 --- cpp/include/kvikio/shim/cuda_h_wrapper.hpp | 59 +++++++++++++--------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/cpp/include/kvikio/shim/cuda_h_wrapper.hpp b/cpp/include/kvikio/shim/cuda_h_wrapper.hpp index 0740c99f31..f5f30f9407 100644 --- a/cpp/include/kvikio/shim/cuda_h_wrapper.hpp +++ b/cpp/include/kvikio/shim/cuda_h_wrapper.hpp @@ -27,13 +27,24 @@ #else // If CUDA isn't defined, we define some of the data types here. -// Notice, this doesn't need to be ABI compatible with the CUDA definitions. +// Notice, the functions and constant values don't need to match the CUDA +// definitions, but the types *do*, since downstream libraries dlsym()-ing +// the symbols at runtime rely on accurate type definitions. If we mismatch +// here, then those libraries will get "mismatched type alias redefinition" +// errors when they include our headers. -using CUresult = int; +#if defined(_WIN64) || defined(__LP64__) +// Don't use uint64_t, we want to match the driver headers exactly using CUdeviceptr = unsigned long long; -using CUdevice = int; -using CUcontext = void*; -using CUstream = void*; +#else +using CUdeviceptr = unsigned int; +#endif +static_assert(sizeof(CUdeviceptr) == sizeof(void*)); + +using CUresult = int; +using CUdevice = int; +using CUcontext = struct CUctx_st*; +using CUstream = struct CUstream_st*; #define CUDA_ERROR_STUB_LIBRARY 0 #define CUDA_SUCCESS 0 @@ -44,24 +55,24 @@ using CUstream = void*; #define CU_MEMHOSTREGISTER_PORTABLE 0 #define CU_STREAM_DEFAULT 0 -CUresult cuInit(...); -CUresult cuMemHostAlloc(...); -CUresult cuMemFreeHost(...); -CUresult cuMemcpyHtoDAsync(...); -CUresult cuMemcpyDtoHAsync(...); -CUresult cuPointerGetAttribute(...); -CUresult cuPointerGetAttributes(...); -CUresult cuCtxPushCurrent(...); -CUresult cuCtxPopCurrent(...); -CUresult cuCtxGetCurrent(...); -CUresult cuMemGetAddressRange(...); -CUresult cuGetErrorName(...); -CUresult cuGetErrorString(...); -CUresult cuDeviceGet(...); -CUresult cuDevicePrimaryCtxRetain(...); -CUresult cuDevicePrimaryCtxRelease(...); -CUresult cuStreamCreate(...); -CUresult cuStreamDestroy(...); -CUresult cuStreamSynchronize(...); +#define cuInit(...) ((CUresult)CUDA_SUCCESS) +#define cuMemHostAlloc(...) ((CUresult)CUDA_SUCCESS) +#define cuMemFreeHost(...) ((CUresult)CUDA_SUCCESS) +#define cuMemcpyHtoDAsync(...) ((CUresult)CUDA_SUCCESS) +#define cuMemcpyDtoHAsync(...) ((CUresult)CUDA_SUCCESS) +#define cuPointerGetAttribute(...) ((CUresult)CUDA_SUCCESS) +#define cuPointerGetAttributes(...) ((CUresult)CUDA_SUCCESS) +#define cuCtxPushCurrent(...) ((CUresult)CUDA_SUCCESS) +#define cuCtxPopCurrent(...) ((CUresult)CUDA_SUCCESS) +#define cuCtxGetCurrent(...) ((CUresult)CUDA_SUCCESS) +#define cuMemGetAddressRange(...) ((CUresult)CUDA_SUCCESS) +#define cuGetErrorName(...) ((CUresult)CUDA_SUCCESS) +#define cuGetErrorString(...) ((CUresult)CUDA_SUCCESS) +#define cuDeviceGet(...) ((CUresult)CUDA_SUCCESS) +#define cuDevicePrimaryCtxRetain(...) ((CUresult)CUDA_SUCCESS) +#define cuDevicePrimaryCtxRelease(...) ((CUresult)CUDA_SUCCESS) +#define cuStreamCreate(...) ((CUresult)CUDA_SUCCESS) +#define cuStreamDestroy(...) ((CUresult)CUDA_SUCCESS) +#define cuStreamSynchronize(...) ((CUresult)CUDA_SUCCESS) #endif