diff --git a/Project.toml b/Project.toml
index 1181f429..baa710d2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -32,21 +32,21 @@ LuxLibTrackercuDNNExt = ["CUDA", "Tracker", "cuDNN"]
 LuxLibcuDNNExt = ["CUDA", "cuDNN"]
 
 [compat]
-AMDGPU = "0.8"
-Aqua = "0.8"
+AMDGPU = "0.8.4"
+Aqua = "0.8.7"
 CUDA = "5.2"
 ChainRulesCore = "1.20"
 ComponentArrays = "0.15.8"
 ExplicitImports = "1.4.1"
 FastClosures = "0.3.2"
 ForwardDiff = "0.10.36"
-KernelAbstractions = "0.9.2"
+KernelAbstractions = "0.9.15"
 LuxAMDGPU = "0.2.1"
 LuxCUDA = "0.3.1"
 LuxCore = "0.1.13"
 LuxTestUtils = "0.1.15"
 Markdown = "1.9"
-NNlib = "0.9.9"
+NNlib = "0.9.10"
 PrecompileTools = "1.2"
 Random = "1.9"
 ReTestItems = "1"
@@ -55,7 +55,7 @@ ReverseDiff = "1.15"
 StableRNGs = "1"
 Statistics = "1.9"
 Test = "1.9"
-Tracker = "0.2.26"
+Tracker = "0.2.31"
 Zygote = "0.6.69"
 cuDNN = "1.3"
 julia = "1.9"
diff --git a/ext/LuxLibcuDNNExt/batchnorm.jl b/ext/LuxLibcuDNNExt/batchnorm.jl
index e3787220..668c8812 100644
--- a/ext/LuxLibcuDNNExt/batchnorm.jl
+++ b/ext/LuxLibcuDNNExt/batchnorm.jl
@@ -1,8 +1,7 @@
-# NOTE: This can be upstreamed to LuxCUDA once we drop support for v1.6
 # Difference from the NNlib version: We expose the mean and inv_variance computed in the
 # cudnn call, since they can be used at other places like forward mode AD
 @inline function _wsize(x::AbstractArray{T, N}) where {T, N}
-    return ntuple(i -> ifelse(i == N - 1, size(x, N - 1), 1), N)
+    return ntuple(i -> i == N - 1 ? size(x, N - 1 : 1), N)
 end
 
 function LuxLib.batchnorm_cudnn(γ::Nothing, β::Nothing, x::DenseCuArray, args...; kwargs...)
diff --git a/test/api/batchnorm_tests.jl b/test/api/batchnorm_tests.jl
index 5453ff9f..d533746e 100644
--- a/test/api/batchnorm_tests.jl
+++ b/test/api/batchnorm_tests.jl
@@ -2,13 +2,13 @@
     rng = get_stable_rng(12345)
 
     function _setup_batchnorm(aType, T, sz; affine::Bool=true, track_stats::Bool)
-        x = randn(T, sz) |> aType
-        scale = affine ? aType(randn(T, sz[end - 1])) : nothing
-        bias = affine ? aType(randn(T, sz[end - 1])) : nothing
+        x = __generate_fixed_array(T, sz) |> aType
+        scale = affine ? aType(__generate_fixed_array(T, sz[end - 1])) : nothing
+        bias = affine ? aType(__generate_fixed_array(T, sz[end - 1])) : nothing
 
         if track_stats
-            running_mean = randn(T, sz[end - 1]) |> aType
-            running_var = abs2.(randn(T, sz[end - 1])) |> aType
+            running_mean = __generate_fixed_array(T, sz[end - 1]) |> aType
+            running_var = abs2.(__generate_fixed_array(T, sz[end - 1])) |> aType
             return x, scale, bias, running_mean, running_var
         else
             return x, scale, bias, nothing, nothing
diff --git a/test/api/groupnorm_tests.jl b/test/api/groupnorm_tests.jl
index 3f4e03f4..26284846 100644
--- a/test/api/groupnorm_tests.jl
+++ b/test/api/groupnorm_tests.jl
@@ -1,10 +1,16 @@
 @testsetup module GroupNormSetup
 using LuxLib
 
+@inline __generate_fixed_array(::Type{T}, sz...) where {T} = __generate_fixed_array(T, sz)
+@inline function __generate_fixed_array(::Type{T}, sz) where {T}
+    return reshape(T.(collect(1:prod(sz)) ./ prod(sz)), sz...)
+end
+@inline __generate_fixed_array(::Type{T}, sz::Int) where {T} = T.(collect(1:sz) ./ sz)
+
 function _setup_groupnorm(aType, T, sz, groups)
-    x = randn(T, sz) |> aType
-    scale = randn(T, sz[end - 1]) |> aType
-    bias = randn(T, sz[end - 1]) |> aType
+    x = __generate_fixed_array(T, sz) |> aType
+    scale = __generate_fixed_array(T, sz[end - 1]) |> aType
+    bias = __generate_fixed_array(T, sz[end - 1]) |> aType
     return x, scale, bias
 end
 
@@ -27,8 +33,6 @@ end
             sz in ((16, 16, 6, 4), (32, 32, 6, 4), (64, 64, 12, 4)),
             groups in (2, 3)
 
-            T === Float16 && mode == "AMDGPU" && continue
-
             _f = (args...) -> groupnorm(args...; groups, epsilon)
 
             epsilon = T(1e-5)
@@ -40,8 +44,7 @@ end
 
             @inferred groupnorm(x, scale, bias; groups, epsilon)
 
-            # @jet _f(x, scale, bias)  # test_call throws exception
-            LuxTestUtils.JET.@test_opt target_modules=(LuxLib,) _f(x, scale, bias)
+            @jet _f(x, scale, bias)
 
             @test y isa aType{T, length(sz)}
             @test size(y) == sz
@@ -55,14 +58,14 @@ end
 
             # The KA implementation reorders operations manually for maximal
             # performance. Hence equality cannot be guaranteed.
-            @test check_approx(y, y_; atol=1.0f-3, rtol=1.0f-3)
-            @test check_approx(gs_x, gs_x_; atol=1.0f-3, rtol=1.0f-3)
-            @test check_approx(gs_scale, gs_scale_; atol=1.0f-3, rtol=1.0f-3)
-            @test check_approx(gs_bias, gs_bias_; atol=1.0f-3, rtol=1.0f-3)
+            @test check_approx(y, y_; atol=1.0f-1, rtol=1.0f-1)
+            @test check_approx(gs_x, gs_x_; atol=1.0f-1, rtol=1.0f-1)
+            @test check_approx(gs_scale, gs_scale_; atol=1.0f-1, rtol=1.0f-1)
+            @test check_approx(gs_bias, gs_bias_; atol=1.0f-1, rtol=1.0f-1)
 
             fp16 = T == Float16
             __f = (args...) -> sum(groupnorm(x, args...; groups, epsilon))
-            @eval @test_gradients $__f $scale $bias gpu_testing=$on_gpu atol=1.0f-3 rtol=1.0f-3 soft_fail=$fp16
+            @eval @test_gradients $__f $scale $bias gpu_testing=$on_gpu atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16
         end
     end
 end
diff --git a/test/api/instancenorm_tests.jl b/test/api/instancenorm_tests.jl
index b601e227..128feaa2 100644
--- a/test/api/instancenorm_tests.jl
+++ b/test/api/instancenorm_tests.jl
@@ -4,9 +4,9 @@
     rng = get_stable_rng(12345)
 
     function _setup_instancenorm(aType, T, sz; affine::Bool=true)
-        x = randn(T, sz) |> aType
-        scale = affine ? aType(ones(T, sz[end - 1])) : nothing
-        bias = affine ? aType(zeros(T, sz[end - 1])) : nothing
+        x = __generate_fixed_array(T, sz) |> aType
+        scale = affine ? aType(__generate_fixed_array(T, sz[end - 1])) : nothing
+        bias = affine ? aType(__generate_fixed_array(T, sz[end - 1])) : nothing
         return x, scale, bias
     end
 
@@ -31,8 +31,10 @@
             @test size(y) == sz
 
             _target_std = ones(ntuple(_ -> 1, length(sz) - 2)..., size(x)[(end - 1):end]...)
-            @eval @test check_approx(std(Array($y); dims=1:($(length(sz) - 2))),
-                $_target_std; atol=0.2, rtol=0.2)
+            if !affine
+                @eval @test check_approx(std(Array($y); dims=1:($(length(sz) - 2))),
+                    $_target_std; atol=0.2, rtol=0.2)
+            end
             @test std(y; dims=1:(length(sz) - 2)) != std(x; dims=1:(length(sz) - 2))
 
             if __istraining(training) && affine
diff --git a/test/api/layernorm_tests.jl b/test/api/layernorm_tests.jl
index 4cd2d9d4..8aa39671 100644
--- a/test/api/layernorm_tests.jl
+++ b/test/api/layernorm_tests.jl
@@ -2,10 +2,10 @@
     using Statistics
 
     function _setup_layernorm(aType, T, x_size, affine_shape)
-        x = randn(T, x_size) |> aType
+        x = __generate_fixed_array(T, x_size) |> aType
         if affine_shape !== nothing
-            scale = randn(T, affine_shape..., 1) |> aType
-            bias = randn(T, affine_shape..., 1) |> aType
+            scale = __generate_fixed_array(T, (affine_shape..., 1)) |> aType
+            bias = __generate_fixed_array(T, (affine_shape..., 1)) |> aType
             return x, scale, bias
         else
             return x, nothing, nothing
diff --git a/test/shared_testsetup.jl b/test/shared_testsetup.jl
index 886b20d6..acff5d77 100644
--- a/test/shared_testsetup.jl
+++ b/test/shared_testsetup.jl
@@ -28,6 +28,12 @@ get_stable_rng(seed=12345) = StableRNG(seed)
 
 __istraining(::Val{training}) where {training} = training
 
+@inline __generate_fixed_array(::Type{T}, sz...) where {T} = __generate_fixed_array(T, sz)
+@inline function __generate_fixed_array(::Type{T}, sz) where {T}
+    return reshape(T.(collect(1:prod(sz)) ./ prod(sz)), sz...)
+end
+@inline __generate_fixed_array(::Type{T}, sz::Int) where {T} = T.(collect(1:sz) ./ sz)
+
 export cpu_testing, cuda_testing, amdgpu_testing, MODES, get_stable_rng, __istraining,
-       check_approx, @jet, @test_gradients
+       check_approx, @jet, @test_gradients, __generate_fixed_array
 end