diff --git a/ext/CUDAExt/Project.toml b/ext/CUDAExt/Project.toml
index de5f24667..46f7ec4ec 100644
--- a/ext/CUDAExt/Project.toml
+++ b/ext/CUDAExt/Project.toml
@@ -11,7 +11,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]
 CUDA = "3.3.1"
-NNlib = "0.7.23"
+NNlib = "0.7.25"
 julia = "1.6"
 
 [extras]
diff --git a/ext/CUDAExt/src/cudnn/conv.jl b/ext/CUDAExt/src/cudnn/conv.jl
index 027d594ee..a4f23c137 100644
--- a/ext/CUDAExt/src/cudnn/conv.jl
+++ b/ext/CUDAExt/src/cudnn/conv.jl
@@ -2,7 +2,7 @@
 using NNlib: DenseConvDims
 import NNlib: conv!, ∇conv_filter!, ∇conv_data!, conv_bias_act!
 
-using CUDA.CUDNN: scalingParameter, CUDNN_CONVOLUTION, convdims, 
+using CUDA.CUDNN: scalingParameter, CUDNN_CONVOLUTION, convdims,
                   cudnnConvolutionDescriptor, cudnnConvolutionBwdDataAlgoPerf,
                   cudnnConvolutionForward!, cudnnConvolutionBwdFilterAlgoPerf,
                   cudnnConvolutionBackwardData, cudnnConvolutionBackwardFilter,
@@ -19,7 +19,7 @@ function cudnnConvolutionDescriptor(cdims::DenseConvDims, x::DenseCuArray{T}) wh
                                cudnnDataType(T),
                                math_mode(),
                                CUDNN_DEFAULT_REORDER,
-                               Cint(1))
+                               Cint(NNlib.groupcount(cdims)))
 end
 
 function conv!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, cdims::DenseConvDims;
@@ -34,7 +34,7 @@ function conv!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, cdims
     cudnnConvolutionForward!(y, w, x, d; alpha, beta, z=y)
 end
 
-function conv_bias_act!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T}, 
+function conv_bias_act!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{T},
                         cdims::DenseConvDims, bias::DenseCuArray{T}, σ=identity;
                         z::DenseCuArray{T}=y, alpha=1, beta=0, algo=-1) where T<:CUDNNFloat
     if cudnnversion() < v"6"
@@ -42,7 +42,7 @@ function conv_bias_act!(y::DenseCuArray{T}, x::DenseCuArray{T}, w::DenseCuArray{
     end
     if algo != -1
         @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1
-    end    
+    end
     d = cudnnConvolutionDescriptor(cdims, x)
     # only relu and identity are supported by cudnnConvolutionForward!
     activation = (σ == NNlib.relu ? CUDNN_ACTIVATION_RELU : CUDNN_ACTIVATION_IDENTITY)
@@ -60,7 +60,7 @@ function ∇conv_data!(dx::DenseCuArray{T}, dy::DenseCuArray{T}, w::DenseCuArray
     end
     if algo != -1
         @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1
-    end    
+    end
     alpha, beta = scalingParameter(T,alpha), scalingParameter(T,beta);
     xDesc, yDesc, wDesc = cudnnTensorDescriptor(dx), cudnnTensorDescriptor(dy), cudnnFilterDescriptor(w)
     convDesc = cudnnConvolutionDescriptor(cdims, dx)
@@ -78,7 +78,7 @@ function ∇conv_filter!(dw::DenseCuArray{T}, x::DenseCuArray{T}, dy::DenseCuArr
     end
     if algo != -1
         @warn "The algo option has been deprecated, the fastest algo is computed automatically" maxlog=1
-    end    
+    end
     alpha, beta = scalingParameter(T,alpha), scalingParameter(T,beta);
     xDesc, yDesc, wDesc = cudnnTensorDescriptor(x), cudnnTensorDescriptor(dy), cudnnFilterDescriptor(dw)
     convDesc = cudnnConvolutionDescriptor(cdims, x)
diff --git a/ext/CUDAExt/test/conv.jl b/ext/CUDAExt/test/conv.jl
index e4abd3024..f605519ea 100644
--- a/ext/CUDAExt/test/conv.jl
+++ b/ext/CUDAExt/test/conv.jl
@@ -9,21 +9,23 @@ using NNlib: DenseConvDims
     @test ∇conv_filter(a, c, cdims) ≈ collect(∇conv_filter(da, dc, cdims))
 
     # Test for agreement between CPU NNlib and CuDNN versions, across a variety of kwargs
-    for num_spatial_dims in (1, 2, 3)
+    options = Dict{Any, Any}.((
+        (), (:dilation => 2), (:flipkernel => true), (:stride => 2),
+        (:padding => 1),
+    ))
+    C_in_ = 3
+    C_out = 4
+    batch_size = 1
+
+    for groups in (1, 2, 4), num_spatial_dims in (1, 2, 3)
+        # Make `C_in = C_out` when using grouped convolution.
+        C_in = groups == 1 ? C_in_ : C_out
         # Initialize data we'll run our tests over
-        C_in = 3
-        C_out = 4
-        batch_size = 1
         x = rand(Float64, fill(8, num_spatial_dims)..., C_in, batch_size)
-        w = rand(Float64, fill(2, num_spatial_dims)..., C_in, C_out)
-        b = rand(Float64, fill(1, num_spatial_dims)..., C_in, C_out)
-        options = (Dict(), Dict(:dilation => 2), Dict(:flipkernel => true), Dict(:stride => 2), Dict(:padding => 1))
-
-        # @denizyuret: algo option deprecated for nnlib, handling in cudnn
-        # algos = (1, 0, 1, 1,)
-        # for (opts, algo) in zip(options, algos)
+        w = rand(Float64, fill(2, num_spatial_dims)..., C_in ÷ groups, C_out)
 
-        for opts in options  
+        for opts in options
+            opts[:groups] = groups
             cdims = DenseConvDims(x, w; opts...)
             y = NNlib.conv(x, w, cdims)
 
@@ -36,19 +38,11 @@ using NNlib: DenseConvDims
             gputest((x, w) -> NNlib.conv(x, w, cdims; alpha=2.0), x, w, checkgrad=false) # TODO
             gputest((y, w) -> NNlib.∇conv_data(y, w, cdims; alpha=2.0), y, w, checkgrad=false) # TODO
             gputest((x, y) -> NNlib.∇conv_filter(x, y, cdims; alpha=2.0), x, y, checkgrad=false) # TODO
-            
+
             gputest((y, x, w) -> NNlib.conv!(copy(y), x, w, cdims; beta=2.0), y, x, w, checkgrad=false) # TODO
             # @test_broken gputest((x, y, w) -> NNlib.∇conv_data!(copy(x), y, w, cdims; beta=2.0), x, y, w, checkgrad=false) #TODO
             gputest((w, x, y) -> NNlib.∇conv_filter!(copy(w), x, y, cdims; beta=2.0), w, x, y, checkgrad=false) # TODO
         end
-
-        # CPU implementation of ∇conv_bias!
-        db = zeros(Float64, 1, 1, 3, 1)
-        dy = randn(Float64, 8, 8, 3, 1)
-        function NNlibCUDA.∇conv_bias!(db, dy)
-            db .= sum(dy, dims=1:(ndims(dy)-2))
-            return db
-        end
-        gputest(NNlibCUDA.∇conv_bias!, db, dy, checkgrad=false)
     end
+
 end