Skip to content

Commit

Permalink
[Fix] Clean up Runtime API.
Browse files Browse the repository at this point in the history
  • Loading branch information
lshqqytiger committed May 21, 2024
1 parent 11cc584 commit 2ad9ad6
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 47 deletions.
21 changes: 20 additions & 1 deletion hip_runtime-sys/src/hip_runtime_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7154,9 +7154,17 @@ extern "C" {
extern "C" {
#[must_use]
pub fn __hipRegisterFatBinary(
data: *const ::std::os::raw::c_void,
data: *mut ::std::os::raw::c_void,
) -> *mut *mut ::std::os::raw::c_void;
}
/*
extern "C" {
#[must_use]
pub fn __hipRegisterFatBinaryEnd(
fatCubinHandle: *mut *mut ::std::os::raw::c_void,
) -> ::std::os::raw::c_void;
}
*/
extern "C" {
#[must_use]
pub fn __hipRegisterFunction(
Expand All @@ -7172,6 +7180,17 @@ extern "C" {
wSize: *mut ::std::os::raw::c_int,
) -> ::std::os::raw::c_void;
}
/*
extern "C" {
#[must_use]
pub fn __hipRegisterHostVar(
fatCubinHandle: *mut *mut ::std::os::raw::c_void,
deviceName: *const ::std::os::raw::c_char,
hostVar: *mut ::std::os::raw::c_char,
size: usize,
) -> ::std::os::raw::c_void;
}
*/
extern "C" {
#[must_use]
pub fn __hipRegisterManagedVar(
Expand Down
15 changes: 6 additions & 9 deletions zluda_runtime/src/cudart.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3565,10 +3565,7 @@ pub unsafe extern "system" fn cudaGetDeviceProperties(
prop: *mut cudaDeviceProp,
device: ::std::os::raw::c_int,
) -> cudaError_t {
crate::get_device_properties(
prop,
device,
)
crate::unsupported()
}

#[doc = " \\brief Returns information about the device\n\n Returns in \\p *value the integer value of the attribute \\p attr on device\n \\p device. The supported attributes are:\n - ::cudaDevAttrMaxThreadsPerBlock: Maximum number of threads per block\n - ::cudaDevAttrMaxBlockDimX: Maximum x-dimension of a block\n - ::cudaDevAttrMaxBlockDimY: Maximum y-dimension of a block\n - ::cudaDevAttrMaxBlockDimZ: Maximum z-dimension of a block\n - ::cudaDevAttrMaxGridDimX: Maximum x-dimension of a grid\n - ::cudaDevAttrMaxGridDimY: Maximum y-dimension of a grid\n - ::cudaDevAttrMaxGridDimZ: Maximum z-dimension of a grid\n - ::cudaDevAttrMaxSharedMemoryPerBlock: Maximum amount of shared memory\n available to a thread block in bytes\n - ::cudaDevAttrTotalConstantMemory: Memory available on device for\n __constant__ variables in a CUDA C kernel in bytes\n - ::cudaDevAttrWarpSize: Warp size in threads\n - ::cudaDevAttrMaxPitch: Maximum pitch in bytes allowed by the memory copy\n functions that involve memory regions allocated through ::cudaMallocPitch()\n - ::cudaDevAttrMaxTexture1DWidth: Maximum 1D texture width\n - ::cudaDevAttrMaxTexture1DLinearWidth: Maximum width for a 1D texture bound\n to linear memory\n - ::cudaDevAttrMaxTexture1DMipmappedWidth: Maximum mipmapped 1D texture width\n - ::cudaDevAttrMaxTexture2DWidth: Maximum 2D texture width\n - ::cudaDevAttrMaxTexture2DHeight: Maximum 2D texture height\n - ::cudaDevAttrMaxTexture2DLinearWidth: Maximum width for a 2D texture\n bound to linear memory\n - ::cudaDevAttrMaxTexture2DLinearHeight: Maximum height for a 2D texture\n bound to linear memory\n - ::cudaDevAttrMaxTexture2DLinearPitch: Maximum pitch in bytes for a 2D\n texture bound to linear memory\n - ::cudaDevAttrMaxTexture2DMipmappedWidth: Maximum mipmapped 2D texture\n width\n - ::cudaDevAttrMaxTexture2DMipmappedHeight: Maximum mipmapped 2D texture\n height\n - ::cudaDevAttrMaxTexture3DWidth: Maximum 3D texture width\n - ::cudaDevAttrMaxTexture3DHeight: Maximum 3D texture height\n - ::cudaDevAttrMaxTexture3DDepth: Maximum 3D texture depth\n - ::cudaDevAttrMaxTexture3DWidthAlt: Alternate maximum 3D texture width,\n 0 if no alternate maximum 3D texture size is supported\n - ::cudaDevAttrMaxTexture3DHeightAlt: Alternate maximum 3D texture height,\n 0 if no alternate maximum 3D texture size is supported\n - ::cudaDevAttrMaxTexture3DDepthAlt: Alternate maximum 3D texture depth,\n 0 if no alternate maximum 3D texture size is supported\n - ::cudaDevAttrMaxTextureCubemapWidth: Maximum cubemap texture width or\n height\n - ::cudaDevAttrMaxTexture1DLayeredWidth: Maximum 1D layered texture width\n - ::cudaDevAttrMaxTexture1DLayeredLayers: Maximum layers in a 1D layered\n texture\n - ::cudaDevAttrMaxTexture2DLayeredWidth: Maximum 2D layered texture width\n - ::cudaDevAttrMaxTexture2DLayeredHeight: Maximum 2D layered texture height\n - ::cudaDevAttrMaxTexture2DLayeredLayers: Maximum layers in a 2D layered\n texture\n - ::cudaDevAttrMaxTextureCubemapLayeredWidth: Maximum cubemap layered\n texture width or height\n - ::cudaDevAttrMaxTextureCubemapLayeredLayers: Maximum layers in a cubemap\n layered texture\n - ::cudaDevAttrMaxSurface1DWidth: Maximum 1D surface width\n - ::cudaDevAttrMaxSurface2DWidth: Maximum 2D surface width\n - ::cudaDevAttrMaxSurface2DHeight: Maximum 2D surface height\n - ::cudaDevAttrMaxSurface3DWidth: Maximum 3D surface width\n - ::cudaDevAttrMaxSurface3DHeight: Maximum 3D surface height\n - ::cudaDevAttrMaxSurface3DDepth: Maximum 3D surface depth\n - ::cudaDevAttrMaxSurface1DLayeredWidth: Maximum 1D layered surface width\n - ::cudaDevAttrMaxSurface1DLayeredLayers: Maximum layers in a 1D layered\n surface\n - ::cudaDevAttrMaxSurface2DLayeredWidth: Maximum 2D layered surface width\n - ::cudaDevAttrMaxSurface2DLayeredHeight: Maximum 2D layered surface height\n - ::cudaDevAttrMaxSurface2DLayeredLayers: Maximum layers in a 2D layered\n surface\n - ::cudaDevAttrMaxSurfaceCubemapWidth: Maximum cubemap surface width\n - ::cudaDevAttrMaxSurfaceCubemapLayeredWidth: Maximum cubemap layered\n surface width\n - ::cudaDevAttrMaxSurfaceCubemapLayeredLayers: Maximum layers in a cubemap\n layered surface\n - ::cudaDevAttrMaxRegistersPerBlock: Maximum number of 32-bit registers\n available to a thread block\n - ::cudaDevAttrClockRate: Peak clock frequency in kilohertz\n - ::cudaDevAttrTextureAlignment: Alignment requirement; texture base\n addresses aligned to ::textureAlign bytes do not need an offset applied\n to texture fetches\n - ::cudaDevAttrTexturePitchAlignment: Pitch alignment requirement for 2D\n texture references bound to pitched memory\n - ::cudaDevAttrGpuOverlap: 1 if the device can concurrently copy memory\n between host and device while executing a kernel, or 0 if not\n - ::cudaDevAttrMultiProcessorCount: Number of multiprocessors on the device\n - ::cudaDevAttrKernelExecTimeout: 1 if there is a run time limit for kernels\n executed on the device, or 0 if not\n - ::cudaDevAttrIntegrated: 1 if the device is integrated with the memory\n subsystem, or 0 if not\n - ::cudaDevAttrCanMapHostMemory: 1 if the device can map host memory into\n the CUDA address space, or 0 if not\n - ::cudaDevAttrComputeMode: Compute mode is the compute mode that the device\n is currently in. Available modes are as follows:\n - ::cudaComputeModeDefault: Default mode - Device is not restricted and\n multiple threads can use ::cudaSetDevice() with this device.\n - ::cudaComputeModeExclusive: Compute-exclusive mode - Only one thread will\n be able to use ::cudaSetDevice() with this device.\n - ::cudaComputeModeProhibited: Compute-prohibited mode - No threads can use\n ::cudaSetDevice() with this device.\n - ::cudaComputeModeExclusiveProcess: Compute-exclusive-process mode - Many\n threads in one process will be able to use ::cudaSetDevice() with this\n device.\n - ::cudaDevAttrConcurrentKernels: 1 if the device supports executing\n multiple kernels within the same context simultaneously, or 0 if\n not. It is not guaranteed that multiple kernels will be resident on the\n device concurrently so this feature should not be relied upon for\n correctness.\n - ::cudaDevAttrEccEnabled: 1 if error correction is enabled on the device,\n 0 if error correction is disabled or not supported by the device\n - ::cudaDevAttrPciBusId: PCI bus identifier of the device\n - ::cudaDevAttrPciDeviceId: PCI device (also known as slot) identifier of\n the device\n - ::cudaDevAttrTccDriver: 1 if the device is using a TCC driver. TCC is only\n available on Tesla hardware running Windows Vista or later.\n - ::cudaDevAttrMemoryClockRate: Peak memory clock frequency in kilohertz\n - ::cudaDevAttrGlobalMemoryBusWidth: Global memory bus width in bits\n - ::cudaDevAttrL2CacheSize: Size of L2 cache in bytes. 0 if the device\n doesn't have L2 cache.\n - ::cudaDevAttrMaxThreadsPerMultiProcessor: Maximum resident threads per\n multiprocessor\n - ::cudaDevAttrUnifiedAddressing: 1 if the device shares a unified address\n space with the host, or 0 if not\n - ::cudaDevAttrComputeCapabilityMajor: Major compute capability version\n number\n - ::cudaDevAttrComputeCapabilityMinor: Minor compute capability version\n number\n - ::cudaDevAttrStreamPrioritiesSupported: 1 if the device supports stream\n priorities, or 0 if not\n - ::cudaDevAttrGlobalL1CacheSupported: 1 if device supports caching globals\n in L1 cache, 0 if not\n - ::cudaDevAttrLocalL1CacheSupported: 1 if device supports caching locals\n in L1 cache, 0 if not\n - ::cudaDevAttrMaxSharedMemoryPerMultiprocessor: Maximum amount of shared memory\n available to a multiprocessor in bytes; this amount is shared by all\n thread blocks simultaneously resident on a multiprocessor\n - ::cudaDevAttrMaxRegistersPerMultiprocessor: Maximum number of 32-bit registers\n available to a multiprocessor; this number is shared by all thread blocks\n simultaneously resident on a multiprocessor\n - ::cudaDevAttrManagedMemory: 1 if device supports allocating\n managed memory, 0 if not\n - ::cudaDevAttrIsMultiGpuBoard: 1 if device is on a multi-GPU board, 0 if not\n - ::cudaDevAttrMultiGpuBoardGroupID: Unique identifier for a group of devices on the\n same multi-GPU board\n - ::cudaDevAttrHostNativeAtomicSupported: 1 if the link between the device and the\n host supports native atomic operations\n - ::cudaDevAttrSingleToDoublePrecisionPerfRatio: Ratio of single precision performance\n (in floating-point operations per second) to double precision performance\n - ::cudaDevAttrPageableMemoryAccess: 1 if the device supports coherently accessing\n pageable memory without calling cudaHostRegister on it, and 0 otherwise\n - ::cudaDevAttrConcurrentManagedAccess: 1 if the device can coherently access managed\n memory concurrently with the CPU, and 0 otherwise\n - ::cudaDevAttrComputePreemptionSupported: 1 if the device supports\n Compute Preemption, 0 if not\n - ::cudaDevAttrCanUseHostPointerForRegisteredMem: 1 if the device can access host\n registered memory at the same virtual address as the CPU, and 0 otherwise\n - ::cudaDevAttrCooperativeLaunch: 1 if the device supports launching cooperative kernels\n via ::cudaLaunchCooperativeKernel, and 0 otherwise\n - ::cudaDevAttrCooperativeMultiDeviceLaunch: 1 if the device supports launching cooperative\n kernels via ::cudaLaunchCooperativeKernelMultiDevice, and 0 otherwise\n - ::cudaDevAttrCanFlushRemoteWrites: 1 if the device supports flushing of outstanding\n remote writes, and 0 otherwise\n - ::cudaDevAttrHostRegisterSupported: 1 if the device supports host memory registration\n via ::cudaHostRegister, and 0 otherwise\n - ::cudaDevAttrPageableMemoryAccessUsesHostPageTables: 1 if the device accesses pageable memory via the\n host's page tables, and 0 otherwise\n - ::cudaDevAttrDirectManagedMemAccessFromHost: 1 if the host can directly access managed memory on the device\n without migration, and 0 otherwise\n - ::cudaDevAttrMaxSharedMemoryPerBlockOptin: Maximum per block shared memory size on the device. This value can\n be opted into when using ::cudaFuncSetAttribute\n - ::cudaDevAttrMaxBlocksPerMultiprocessor: Maximum number of thread blocks that can reside on a multiprocessor\n - ::cudaDevAttrMaxPersistingL2CacheSize: Maximum L2 persisting lines capacity setting in bytes\n - ::cudaDevAttrMaxAccessPolicyWindowSize: Maximum value of cudaAccessPolicyWindow::num_bytes\n - ::cudaDevAttrReservedSharedMemoryPerBlock: Shared memory reserved by CUDA driver per block in bytes\n - ::cudaDevAttrSparseCudaArraySupported: 1 if the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays.\n - ::cudaDevAttrHostRegisterReadOnlySupported: Device supports using the ::cudaHostRegister flag cudaHostRegisterReadOnly\n to register memory that must be mapped as read-only to the GPU\n - ::cudaDevAttrMemoryPoolsSupported: 1 if the device supports using the cudaMallocAsync and cudaMemPool family of APIs, and 0 otherwise\n - ::cudaDevAttrGPUDirectRDMASupported: 1 if the device supports GPUDirect RDMA APIs, and 0 otherwise\n - ::cudaDevAttrGPUDirectRDMAFlushWritesOptions: bitmask to be interpreted according to the ::cudaFlushGPUDirectRDMAWritesOptions enum\n - ::cudaDevAttrGPUDirectRDMAWritesOrdering: see the ::cudaGPUDirectRDMAWritesOrdering enum for numerical values\n - ::cudaDevAttrMemoryPoolSupportedHandleTypes: Bitmask of handle types supported with mempool based IPC\n\n \\param value - Returned device attribute value\n \\param attr - Device attribute to query\n \\param device - Device number to query\n\n \\return\n ::cudaSuccess,\n ::cudaErrorInvalidDevice,\n ::cudaErrorInvalidValue\n \\notefnerr\n \\note_init_rt\n \\note_callback\n\n \\sa ::cudaGetDeviceCount, ::cudaGetDevice, ::cudaSetDevice, ::cudaChooseDevice,\n ::cudaGetDeviceProperties,\n ::cuDeviceGetAttribute"]
Expand Down Expand Up @@ -7444,7 +7441,7 @@ pub unsafe extern "system" fn __cudaPopCallConfiguration(
gridDim: *mut dim3,
blockDim: *mut dim3,
sharedMem: *mut usize,
stream: *mut ::std::os::raw::c_void,
stream: *mut cudaStream_t,
) -> cudaError_t {
crate::pop_call_configuration(
gridDim,
Expand All @@ -7459,8 +7456,8 @@ pub unsafe extern "system" fn __cudaPushCallConfiguration(
gridDim: dim3,
blockDim: dim3,
sharedMem: usize,
stream: *mut ::std::os::raw::c_void,
) -> ::std::os::raw::c_uint {
stream: cudaStream_t,
) -> cudaError_t {
crate::push_call_configuration(
gridDim,
blockDim,
Expand All @@ -7477,10 +7474,10 @@ pub unsafe extern "system" fn __cudaRegisterFatBinary(
}

#[no_mangle]
pub extern "system" fn __cudaRegisterFatBinaryEnd(
pub unsafe extern "system" fn __cudaRegisterFatBinaryEnd(
fatCubinHandle: *mut *mut ::std::os::raw::c_void,
) -> () {

crate::register_fat_binary_end(fatCubinHandle)
}

#[no_mangle]
Expand Down
Loading

0 comments on commit 2ad9ad6

Please sign in to comment.