diff --git a/ext/CUDAExt/Project.toml b/ext/CUDAExt/Project.toml index de5f24667..46f7ec4ec 100644 --- a/ext/CUDAExt/Project.toml +++ b/ext/CUDAExt/Project.toml @@ -11,7 +11,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] CUDA = "3.3.1" -NNlib = "0.7.23" +NNlib = "0.7.25" julia = "1.6" [extras] diff --git a/ext/CUDAExt/src/cudnn/conv.jl b/ext/CUDAExt/src/cudnn/conv.jl index 027d594ee..a4f23c137 100644 --- a/ext/CUDAExt/src/cudnn/conv.jl +++ b/ext/CUDAExt/src/cudnn/conv.jl @@ -2,7 +2,7 @@ using NNlib: DenseConvDims import NNlib: conv!, ∇conv_filter!, ∇conv_data!, conv_bias_act! -using CUDA.CUDNN: scalingParameter, CUDNN_CONVOLUTION, convdims, +using CUDA.CUDNN: scalingParameter, CUDNN_CONVOLUTION, convdims, cudnnConvolutionDescriptor, cudnnConvolutionBwdDataAlgoPerf, cudnnConvolutionForward!, cudnnConvolutionBwdFilterAlgoPerf, cudnnConvolutionBackwardData, cudnnConvolutionBackwardFilter, @@ -19,7 +19,7 @@ function cudnnConvolutionDescriptor(cdims::DenseConvDims, x::DenseCuArray{T}) wh cudnnDataType(T), math_mode(), CUDNN_DEFAULT_REORDER, - Cint(1)) + Cint(NNlib.groupcount(cdims))) end function conv!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, cdims::DenseConvDims; @@ -34,7 +34,7 @@ function conv!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, cdims cudnnConvolutionForward!(y, w, x, d; alpha, beta, z=y) end -function conv_bias_act!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, +function conv_bias_act!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, cdims::DenseConvDims, bias::DenseCuArray{T}, σ=identity; z::DenseCuArray{T}=y, alpha=1, beta=0, algo=-1) where T<:CUDNNFloat if cudnnversion() < v"6" @@ -42,7 +42,7 @@ function conv_bias_act!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{ end if algo != -1 @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1 - end + end d = cudnnConvolutionDescriptor(cdims, x) # only relu and identity are supported by cudnnConvolutionForward! activation = (σ == NNlib.relu ? CUDNN_ACTIVATION_RELU : CUDNN_ACTIVATION_IDENTITY) @@ -60,7 +60,7 @@ function ∇conv_data!(dx::DenseCuArray{T}, dy::DenseCuArray{T}, w::DenseCuArray end if algo != -1 @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1 - end + end alpha, beta = scalingParameter(T,alpha), scalingParameter(T,beta); xDesc, yDesc, wDesc = cudnnTensorDescriptor(dx), cudnnTensorDescriptor(dy), cudnnFilterDescriptor(w) convDesc = cudnnConvolutionDescriptor(cdims, dx) @@ -78,7 +78,7 @@ function ∇conv_filter!(dw::DenseCuArray{T}, x::DenseCuArray{T}, dy::DenseCuArr end if algo != -1 @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1 - end + end alpha, beta = scalingParameter(T,alpha), scalingParameter(T,beta); xDesc, yDesc, wDesc = cudnnTensorDescriptor(x), cudnnTensorDescriptor(dy), cudnnFilterDescriptor(dw) convDesc = cudnnConvolutionDescriptor(cdims, x) diff --git a/ext/CUDAExt/test/conv.jl b/ext/CUDAExt/test/conv.jl index e4abd3024..f605519ea 100644 --- a/ext/CUDAExt/test/conv.jl +++ b/ext/CUDAExt/test/conv.jl @@ -9,21 +9,23 @@ using NNlib: DenseConvDims @test ∇conv_filter(a, c, cdims) ≈ collect(∇conv_filter(da, dc, cdims)) # Test for agreement between CPU NNlib and CuDNN versions, across a variety of kwargs - for num_spatial_dims in (1, 2, 3) + options = Dict{Any, Any}.(( + (), (:dilation => 2), (:flipkernel => true), (:stride => 2), + (:padding => 1), + )) + C_in_ = 3 + C_out = 4 + batch_size = 1 + + for groups in (1, 2, 4), num_spatial_dims in (1, 2, 3) + # Make `C_in = C_out` when using grouped convolution. + C_in = groups == 1 ? C_in_ : C_out # Initialize data we'll run our tests over - C_in = 3 - C_out = 4 - batch_size = 1 x = rand(Float64, fill(8, num_spatial_dims)..., C_in, batch_size) - w = rand(Float64, fill(2, num_spatial_dims)..., C_in, C_out) - b = rand(Float64, fill(1, num_spatial_dims)..., C_in, C_out) - options = (Dict(), Dict(:dilation => 2), Dict(:flipkernel => true), Dict(:stride => 2), Dict(:padding => 1)) - - # @denizyuret: algo option deprecated for nnlib, handling in cudnn - # algos = (1, 0, 1, 1,) - # for (opts, algo) in zip(options, algos) + w = rand(Float64, fill(2, num_spatial_dims)..., C_in ÷ groups, C_out) - for opts in options + for opts in options + opts[:groups] = groups cdims = DenseConvDims(x, w; opts...) y = NNlib.conv(x, w, cdims) @@ -36,19 +38,11 @@ using NNlib: DenseConvDims gputest((x, w) -> NNlib.conv(x, w, cdims; alpha=2.0), x, w, checkgrad=false) # TODO gputest((y, w) -> NNlib.∇conv_data(y, w, cdims; alpha=2.0), y, w, checkgrad=false) # TODO gputest((x, y) -> NNlib.∇conv_filter(x, y, cdims; alpha=2.0), x, y, checkgrad=false) # TODO - + gputest((y, x, w) -> NNlib.conv!(copy(y), x, w, cdims; beta=2.0), y, x, w, checkgrad=false) # TODO # @test_broken gputest((x, y, w) -> NNlib.∇conv_data!(copy(x), y, w, cdims; beta=2.0), x, y, w, checkgrad=false) #TODO gputest((w, x, y) -> NNlib.∇conv_filter!(copy(w), x, y, cdims; beta=2.0), w, x, y, checkgrad=false) # TODO end - - # CPU implementation of ∇conv_bias! - db = zeros(Float64, 1, 1, 3, 1) - dy = randn(Float64, 8, 8, 3, 1) - function NNlibCUDA.∇conv_bias!(db, dy) - db .= sum(dy, dims=1:(ndims(dy)-2)) - return db - end - gputest(NNlibCUDA.∇conv_bias!, db, dy, checkgrad=false) end + end