Skip to content

Commit

Permalink
Implement reverse lookup (Ptr->Tuple) for CUDNN descriptors. (JuliaGP…
Browse files Browse the repository at this point in the history
  • Loading branch information
RomeoV authored Aug 19, 2023
1 parent dc16d92 commit 4b87ec0
Showing 1 changed file with 40 additions and 4 deletions.
44 changes: 40 additions & 4 deletions lib/cudnn/src/convolution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,28 @@ function cudnnConvolutionForwardAD(w, x, bias, z; y, activation, convDesc, wDesc
return y
end

function cudnnGetConvolutionDescriptor(d::cudnnConvolutionDescriptor)
# we don't know the dimension of the convolution, so we start by
# allocating the maximum size it can be.
nbDimsRequested = CUDNN_DIM_MAX - 2
# later, here we get the actual dimensionality of the convolution
arrlen = Ref{Cint}(nbDimsRequested)
padding = Array{Cint}(undef, nbDimsRequested)
stride = Array{Cint}(undef, nbDimsRequested)
dilation = Array{Cint}(undef, nbDimsRequested)
mode = Ref{cuDNN.cudnnConvolutionMode_t}(CUDNN_CONVOLUTION)
dataType = Ref{cuDNN.cudnnDataType_t}(cuDNN.CUDNN_DATA_FLOAT)

cudnnGetConvolutionNdDescriptor(d, nbDimsRequested, arrlen, padding, stride, dilation,
mode, dataType)
T = juliaDataType(dataType[])
SZ = arrlen[]
P = (padding[1:SZ]..., )
S = (stride[1:SZ]..., )
D = (dilation[1:SZ]..., )
return T, mode[], SZ, P, S, D
end

# Helper for cudnnConvolutionDescriptor
function cudnnSetConvolutionDescriptor(
ptr::cudnnConvolutionDescriptor_t,
Expand Down Expand Up @@ -179,9 +201,15 @@ const cudnnConvolutionFwdAlgoPerfCacheLock = ReentrantLock()
It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
"""
function cudnnConvolutionFwdAlgoPerf(xDesc, x, wDesc, w, convDesc, yDesc, y, biasDesc, activation, allocateTmpBuf=true)
key = (xDesc, wDesc, convDesc, biasDesc, activation)
xDesc_native = cudnnGetTensorDescriptor(xDesc)
wDesc_native = cudnnGetFilterDescriptor(wDesc)
convDesc_native = cudnnGetConvolutionDescriptor(convDesc)
biasDesc_native = (isnothing(biasDesc) ? nothing
: cudnnGetTensorDescriptor(biasDesc))

key = (xDesc_native, wDesc_native, convDesc_native, biasDesc, activation)
val = lock(cudnnConvolutionFwdAlgoPerfCacheLock) do
get(cudnnConvolutionFwdAlgoPerfCache, key, nothing)
get(cudnnConvolutionFwdAlgoPerfCache, key, nothing)
end
if val === nothing
requestedAlgoCount = Int(CUDNN_CONVOLUTION_FWD_ALGO_COUNT)
Expand Down Expand Up @@ -210,7 +238,11 @@ const cudnnConvolutionBwdDataAlgoPerfCacheLock = ReentrantLock()
It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
"""
function cudnnConvolutionBwdDataAlgoPerf(wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, allocateTmpBuf=true)
key = (wDesc, dyDesc, convDesc)
wDesc_native = cudnnGetFilterDescriptor(wDesc)
dyDesc_native = cudnnGetTensorDescriptor(dyDesc)
convDesc_native = cudnnGetConvolutionDescriptor(convDesc)

key = (wDesc_native, dyDesc_native, convDesc_native)
val = lock(cudnnConvolutionBwdDataAlgoPerfCacheLock) do
get(cudnnConvolutionBwdDataAlgoPerfCache, key, nothing)
end
Expand Down Expand Up @@ -241,7 +273,11 @@ const cudnnConvolutionBwdFilterAlgoPerfCacheLock = ReentrantLock()
It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
"""
function cudnnConvolutionBwdFilterAlgoPerf(xDesc, x, dyDesc, dy, convDesc, dwDesc, dw, allocateTmpBuf=true)
key = (xDesc, dyDesc, convDesc)
xDesc_native = cudnnGetTensorDescriptor(xDesc)
dyDesc_native = cudnnGetTensorDescriptor(dyDesc)
convDesc_native = cudnnGetConvolutionDescriptor(convDesc)

key = (xDesc_native, dyDesc_native, convDesc_native)
val = lock(cudnnConvolutionBwdFilterAlgoPerfCacheLock) do
get(cudnnConvolutionBwdFilterAlgoPerfCache, (xDesc, dyDesc, convDesc), nothing)
end
Expand Down

0 comments on commit 4b87ec0

Please sign in to comment.