Skip to content
This repository has been archived by the owner on Nov 4, 2024. It is now read-only.

Commit

Permalink
fix: update to use test_gradients macro
Browse files Browse the repository at this point in the history
  • Loading branch information
avik-pal committed Sep 18, 2024
1 parent 0df09fa commit 2c77ccb
Show file tree
Hide file tree
Showing 11 changed files with 34 additions and 33 deletions.
2 changes: 1 addition & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ ForwardDiff = "0.10.36"
Hwloc = "3.2"
InteractiveUtils = "<0.0.1, 1"
JLArrays = "0.1.5"
LuxTestUtils = "1.1.2"
LuxTestUtils = "1.2"
MKL = "0.7"
MLDataDevices = "1.0.0"
NNlib = "0.9.21"
Expand Down
6 changes: 3 additions & 3 deletions test/common_ops/activation_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@
end
@test @inferred(Zygote.gradient(apply_act_fast2, f, x)) isa Any

test_gradients(Base.Fix1(apply_act, f), x; atol, rtol)
test_gradients(Base.Fix1(apply_act_fast, f), x; atol, rtol)
test_gradients(Base.Fix1(apply_act_fast2, f), x; atol, rtol)
@test_gradients(Base.Fix1(apply_act, f), x; atol, rtol)
@test_gradients(Base.Fix1(apply_act_fast, f), x; atol, rtol)
@test_gradients(Base.Fix1(apply_act_fast2, f), x; atol, rtol)

∂x1 = Zygote.gradient(apply_act, f, x)[2]
∂x2 = Zygote.gradient(apply_act_fast, f, x)[2]
Expand Down
6 changes: 3 additions & 3 deletions test/common_ops/bias_act_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@
@test_broken @inferred(Zygote.gradient(bias_act_loss3, act, x, b)) isa Any
end

test_gradients(__Fix1(bias_act_loss1, act), x, b; atol, rtol,
@test_gradients(__Fix1(bias_act_loss1, act), x, b; atol, rtol,
soft_fail=fp16 ? [AutoFiniteDiff()] : [])
test_gradients(__Fix1(bias_act_loss2, act), x, b; atol, rtol,
@test_gradients(__Fix1(bias_act_loss2, act), x, b; atol, rtol,
soft_fail=fp16 ? [AutoFiniteDiff()] : [])
test_gradients(__Fix1(bias_act_loss3, act), x, b; atol, rtol,
@test_gradients(__Fix1(bias_act_loss3, act), x, b; atol, rtol,
soft_fail=fp16 ? [AutoFiniteDiff()] : [])

∂x1, ∂b1 = Zygote.gradient(__Fix1(bias_act_loss1, act), x, b)
Expand Down
2 changes: 1 addition & 1 deletion test/common_ops/conv_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ function run_conv_testing(gen_f::Function, activation, kernel, stride, padding,
mp && push!(skip_backends, AutoReverseDiff())
((mp && ongpu) || (mode == "amdgpu" && (Tx == Float64 || Tw == Float64))) &&
push!(skip_backends, AutoTracker())
test_gradients(__f_grad, weight, x, bias; atol, rtol, skip_backends, soft_fail=fp16)
@test_gradients(__f_grad, weight, x, bias; atol, rtol, skip_backends, soft_fail=fp16)
end

anonact = x -> gelu(x)
Expand Down
2 changes: 1 addition & 1 deletion test/common_ops/dense_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ function run_dense_testing(Tw, Tx, M, N, hasbias, activation, aType, mode, ongpu
__f_grad = let activation = activation
(w, x, b) -> __f(activation, w, x, b)
end
test_gradients(__f_grad, w, x, bias; atol, rtol, skip_backends, soft_fail=fp16)
@test_gradients(__f_grad, w, x, bias; atol, rtol, skip_backends, soft_fail=fp16)

y_simple = dense_simple(activation, w, x, bias)
y_zyg = fused_dense_bias_activation(activation, w, x, bias)
Expand Down
9 changes: 5 additions & 4 deletions test/common_ops/dropout_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
__f = let rng = rng, T = T
x -> sum(first(dropout(rng, x, T(0.5), Val(true), T(2), dims)))
end
test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3,
@test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3,
soft_fail=(T == Float16 ? [AutoFiniteDiff()] : []),
broken_backends=(T == Float16 && Sys.iswindows() ? [AutoEnzyme()] : []))

Expand Down Expand Up @@ -74,7 +74,8 @@ end
__f = let rng = rng, mask = mask, p = T(0.5), invp = T(2)
x -> sum(first(dropout(rng, x, mask, p, Val(true), Val(true), invp, :)))
end
test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3,
@test_gradients(__f, x; atol=1.0f-3,
rtol=1.0f-3,
soft_fail=(T == Float16 ? [AutoFiniteDiff()] : []))

@jet sum(first(dropout(
Expand Down Expand Up @@ -105,7 +106,7 @@ end
soft_fail = T == Float16 ? Any[AutoFiniteDiff()] : []
skip_backends = length(x_shape) == 5 ? [AutoEnzyme()] : []

test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, soft_fail, skip_backends)
@test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3, soft_fail, skip_backends)

@jet sum(first(dropout(
rng, x, mask, T(0.5), Val(true), Val(false), T(2), :)))
Expand Down Expand Up @@ -154,7 +155,7 @@ end
__f = let rng = rng
x -> sum(first(alpha_dropout(rng, x, T(0.5), Val(true))))
end
test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3,
@test_gradients(__f, x; atol=1.0f-3, rtol=1.0f-3,
soft_fail=(T == Float16 ? [AutoFiniteDiff()] : []),
broken_backends=(T == Float16 && Sys.iswindows() ? [AutoEnzyme()] : []))

Expand Down
6 changes: 3 additions & 3 deletions test/normalization/batchnorm_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ function run_batchnorm_testing(

__f = (args...) -> sum(first(batchnorm(
args..., rm, rv, training, act, T(0.9), epsilon)))
test_gradients(
__f, x, scale, bias; atol, rtol, skip_backends, soft_fail, broken_backends)
@test_gradients(__f, x, scale, bias; atol, rtol, skip_backends, soft_fail,
broken_backends)
end

if anonact !== act
Expand Down Expand Up @@ -183,6 +183,6 @@ end

__f = (args...) -> sum(first(batchnorm(
args..., running_mean, running_var, Val(true), identity, 0.9f0, 1.0f-5)))
test_gradients(__f, x, scale, bias; atol=1.0f-3, rtol=1.0f-3)
@test_gradients(__f, x, scale, bias; atol=1.0f-3, rtol=1.0f-3)
end
end
2 changes: 1 addition & 1 deletion test/normalization/groupnorm_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ function run_groupnorm_testing(T, sz, groups, affine, act, aType, mode, ongpu)

if affine
__f = (args...) -> sum(groupnorm(args..., groups, act, epsilon))
test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
@test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
end
end

Expand Down
4 changes: 2 additions & 2 deletions test/normalization/instancenorm_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ function run_instancenorm_testing(gen_f, T, sz, training, act, aType, mode, ongp
if is_training(training)
__f = (args...) -> sum(first(instancenorm(args..., training, act, epsilon)))
soft_fail = fp16 ? fp16 : [AutoFiniteDiff()]
test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
@test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
end

# Now test with running stats
Expand Down Expand Up @@ -67,7 +67,7 @@ function run_instancenorm_testing(gen_f, T, sz, training, act, aType, mode, ongp
args..., rm, rv, training, act, T(0.1), epsilon)))
soft_fail = fp16 ? fp16 : [AutoFiniteDiff()]
skip_backends = (Sys.iswindows() && fp16) ? [AutoEnzyme()] : []
test_gradients(__f, x, scale, bias; atol, rtol, soft_fail, skip_backends)
@test_gradients(__f, x, scale, bias; atol, rtol, soft_fail, skip_backends)
end
end

Expand Down
4 changes: 2 additions & 2 deletions test/normalization/layernorm_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ function run_layernorm_testing_core(
soft_fail = fp16 ? fp16 : [AutoFiniteDiff()]
if affine_shape !== nothing
__f = (args...) -> sum(_f(args...))
test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
@test_gradients(__f, x, scale, bias; atol, rtol, soft_fail)
else
__f = x -> sum(_f(x, scale, bias))
test_gradients(__f, x; atol, rtol, soft_fail)
@test_gradients(__f, x; atol, rtol, soft_fail)
end

if anonact !== act
Expand Down
24 changes: 12 additions & 12 deletions test/others/bmm_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -264,36 +264,36 @@ end
B = 3

@testset "Two 3-arrays" begin
test_gradients(fn, aType(randn(rng, M, P, B)),
@test_gradients(fn, aType(randn(rng, M, P, B)),
aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
test_gradients(fn, batched_adjoint(aType(randn(rng, P, M, B))),
@test_gradients(fn, batched_adjoint(aType(randn(rng, P, M, B))),
aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
test_gradients(fn, aType(randn(rng, M, P, B)),
@test_gradients(fn, aType(randn(rng, M, P, B)),
batched_transpose(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3)
end

@testset "One a matrix..." begin
test_gradients(fn, aType(randn(rng, M, P)),
@test_gradients(fn, aType(randn(rng, M, P)),
aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
test_gradients(fn, adjoint(aType(randn(rng, P, M))),
@test_gradients(fn, adjoint(aType(randn(rng, P, M))),
aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
test_gradients(fn, aType(randn(rng, M, P)),
@test_gradients(fn, aType(randn(rng, M, P)),
batched_adjoint(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3)

test_gradients(fn, aType(randn(rng, M, P)),
@test_gradients(fn, aType(randn(rng, M, P)),
aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
test_gradients(fn, adjoint(aType(randn(rng, P, M))),
@test_gradients(fn, adjoint(aType(randn(rng, P, M))),
aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
test_gradients(fn, aType(randn(rng, M, P)),
@test_gradients(fn, aType(randn(rng, M, P)),
batched_adjoint(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3)
end

@testset "... or equivalent to a matrix" begin
test_gradients(fn, aType(randn(rng, M, P, 1)),
@test_gradients(fn, aType(randn(rng, M, P, 1)),
aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
test_gradients(fn, batched_transpose(aType(randn(rng, P, M, 1))),
@test_gradients(fn, batched_transpose(aType(randn(rng, P, M, 1))),
aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3)
test_gradients(fn, aType(randn(rng, M, P, 1)),
@test_gradients(fn, aType(randn(rng, M, P, 1)),
batched_transpose(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3)
end
end
Expand Down

0 comments on commit 2c77ccb

Please sign in to comment.