From a9a3c8b7cc9a92a9421f1f6d7c31f0a75d7b140f Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Sun, 14 Apr 2024 11:03:04 -0400 Subject: [PATCH] Try making the tests deterministic --- .buildkite/pipeline.yml | 3 ++- .github/workflows/Downgrade.yml | 2 +- Project.toml | 22 +++++++++++----------- ext/LuxLibcuDNNExt/LuxLibcuDNNExt.jl | 3 +-- ext/LuxLibcuDNNExt/batchnorm.jl | 3 +-- test/api/batchnorm_tests.jl | 10 +++++----- test/api/groupnorm_tests.jl | 27 +++++++++++++++------------ test/api/instancenorm_tests.jl | 15 +++++++++------ test/api/layernorm_tests.jl | 6 +++--- test/shared_testsetup.jl | 8 +++++++- 10 files changed, 55 insertions(+), 44 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index dfdd6637..c3bbdb8a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -18,6 +18,7 @@ steps: cuda: "*" env: GROUP: "CUDA" + RETESTITEMS_NWORKERS: 0 # Distributed is causing stalling issues with CUDA if: build.message !~ /\[skip tests\]/ timeout_in_minutes: 60 matrix: @@ -160,6 +161,6 @@ steps: - "Boltz" env: - RETESTITEMS_NWORKERS: 2 + RETESTITEMS_NWORKERS: 4 RETESTITEMS_NWORKER_THREADS: 2 SECRET_CODECOV_TOKEN: "wMpDLaAVEHe6EJAc+LZBl4jF3wADVN6F+15vr/ONJHOv/XXbtYovuc1PCQwhz0AzZjWpSO12IDTyKfwVgYvqaGYfQ9yGyplJtSu2MiL2k44B/IY+wEZhsfkBIhXlG89si5A/I+/f8T8QuwxBqBLh8fYq7oxC+gNzKhbj8vIT4n5hCusvYYGufgKRC2U9P4ij0Sf40egQ5B+StaTykqJNq1163UARjNBypHIVDbYE0HUHiF7WB4eI5LxBBzlcHmsUkuGp6ZlqAu/8C83k65lwDnyHDfjvBM24q9GQTDFA5r7RUfYKHElQEBPk3GhoJn7XGIfD2pC0VNcw5jYCwsX2mw==;U2FsdGVkX1+euKMib66zno5Kkw7OxXo6v4RnkAA/HElJM46qfX17VgZ9iVLg45jOOWRgghmyYuy2WQ8RcVbuOg==" diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml index 04cbe75e..c89327b2 100644 --- a/.github/workflows/Downgrade.yml +++ b/.github/workflows/Downgrade.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - version: ['1.9'] + version: ['1.10'] steps: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 diff --git a/Project.toml b/Project.toml index 1181f429..925e361c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LuxLib" uuid = "82251201-b29d-42c6-8e01-566dec8acb11" authors = ["Avik Pal and contributors"] -version = "0.3.11" +version = "0.3.12" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" @@ -32,33 +32,33 @@ LuxLibTrackercuDNNExt = ["CUDA", "Tracker", "cuDNN"] LuxLibcuDNNExt = ["CUDA", "cuDNN"] [compat] -AMDGPU = "0.8" -Aqua = "0.8" +AMDGPU = "0.8.4" +Aqua = "0.8.7" CUDA = "5.2" ChainRulesCore = "1.20" ComponentArrays = "0.15.8" ExplicitImports = "1.4.1" FastClosures = "0.3.2" ForwardDiff = "0.10.36" -KernelAbstractions = "0.9.2" +KernelAbstractions = "0.9.15" LuxAMDGPU = "0.2.1" LuxCUDA = "0.3.1" LuxCore = "0.1.13" LuxTestUtils = "0.1.15" -Markdown = "1.9" -NNlib = "0.9.9" +Markdown = "1.10" +NNlib = "0.9.10" PrecompileTools = "1.2" -Random = "1.9" +Random = "1.10" ReTestItems = "1" Reexport = "1" ReverseDiff = "1.15" StableRNGs = "1" -Statistics = "1.9" -Test = "1.9" -Tracker = "0.2.26" +Statistics = "1.10" +Test = "1.10" +Tracker = "0.2.31" Zygote = "0.6.69" cuDNN = "1.3" -julia = "1.9" +julia = "1.10" [extras] AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" diff --git a/ext/LuxLibcuDNNExt/LuxLibcuDNNExt.jl b/ext/LuxLibcuDNNExt/LuxLibcuDNNExt.jl index 3727b3b5..044929ea 100644 --- a/ext/LuxLibcuDNNExt/LuxLibcuDNNExt.jl +++ b/ext/LuxLibcuDNNExt/LuxLibcuDNNExt.jl @@ -19,11 +19,10 @@ const CUDNN_BN_ARRAY_TYPE = Union{ CuArray{<:Union{Float32, Float64}, 5}} const BNParamType = Union{Nothing, CuVector{<:Union{Float32, Float64}}} -function batchnorm(x::CUDNN_BN_ARRAY_TYPE, scale::BNParamType, bias::BNParamType, +function LuxLib.batchnorm(x::CUDNN_BN_ARRAY_TYPE, scale::BNParamType, bias::BNParamType, running_mean::BNParamType, running_var::BNParamType; momentum::Real, training::Val, epsilon::Real) rm, rv = LuxLib._get_batchnorm_statistics(x, running_mean, running_var, training) - x_ = first(LuxLib.batchnorm_cudnn(rm, rv, scale, bias, x, momentum, epsilon, training)) return x_, (; running_mean=rm, running_var=rv) end diff --git a/ext/LuxLibcuDNNExt/batchnorm.jl b/ext/LuxLibcuDNNExt/batchnorm.jl index e3787220..aea36e21 100644 --- a/ext/LuxLibcuDNNExt/batchnorm.jl +++ b/ext/LuxLibcuDNNExt/batchnorm.jl @@ -1,8 +1,7 @@ -# NOTE: This can be upstreamed to LuxCUDA once we drop support for v1.6 # Difference from the NNlib version: We expose the mean and inv_variance computed in the # cudnn call, since they can be used at other places like forward mode AD @inline function _wsize(x::AbstractArray{T, N}) where {T, N} - return ntuple(i -> ifelse(i == N - 1, size(x, N - 1), 1), N) + return ntuple(i -> i == N - 1 ? size(x, N - 1) : 1, N) end function LuxLib.batchnorm_cudnn(γ::Nothing, β::Nothing, x::DenseCuArray, args...; kwargs...) diff --git a/test/api/batchnorm_tests.jl b/test/api/batchnorm_tests.jl index 5453ff9f..d533746e 100644 --- a/test/api/batchnorm_tests.jl +++ b/test/api/batchnorm_tests.jl @@ -2,13 +2,13 @@ rng = get_stable_rng(12345) function _setup_batchnorm(aType, T, sz; affine::Bool=true, track_stats::Bool) - x = randn(T, sz) |> aType - scale = affine ? aType(randn(T, sz[end - 1])) : nothing - bias = affine ? aType(randn(T, sz[end - 1])) : nothing + x = __generate_fixed_array(T, sz) |> aType + scale = affine ? aType(__generate_fixed_array(T, sz[end - 1])) : nothing + bias = affine ? aType(__generate_fixed_array(T, sz[end - 1])) : nothing if track_stats - running_mean = randn(T, sz[end - 1]) |> aType - running_var = abs2.(randn(T, sz[end - 1])) |> aType + running_mean = __generate_fixed_array(T, sz[end - 1]) |> aType + running_var = abs2.(__generate_fixed_array(T, sz[end - 1])) |> aType return x, scale, bias, running_mean, running_var else return x, scale, bias, nothing, nothing diff --git a/test/api/groupnorm_tests.jl b/test/api/groupnorm_tests.jl index 3f4e03f4..26284846 100644 --- a/test/api/groupnorm_tests.jl +++ b/test/api/groupnorm_tests.jl @@ -1,10 +1,16 @@ @testsetup module GroupNormSetup using LuxLib +@inline __generate_fixed_array(::Type{T}, sz...) where {T} = __generate_fixed_array(T, sz) +@inline function __generate_fixed_array(::Type{T}, sz) where {T} + return reshape(T.(collect(1:prod(sz)) ./ prod(sz)), sz...) +end +@inline __generate_fixed_array(::Type{T}, sz::Int) where {T} = T.(collect(1:sz) ./ sz) + function _setup_groupnorm(aType, T, sz, groups) - x = randn(T, sz) |> aType - scale = randn(T, sz[end - 1]) |> aType - bias = randn(T, sz[end - 1]) |> aType + x = __generate_fixed_array(T, sz) |> aType + scale = __generate_fixed_array(T, sz[end - 1]) |> aType + bias = __generate_fixed_array(T, sz[end - 1]) |> aType return x, scale, bias end @@ -27,8 +33,6 @@ end sz in ((16, 16, 6, 4), (32, 32, 6, 4), (64, 64, 12, 4)), groups in (2, 3) - T === Float16 && mode == "AMDGPU" && continue - _f = (args...) -> groupnorm(args...; groups, epsilon) epsilon = T(1e-5) @@ -40,8 +44,7 @@ end @inferred groupnorm(x, scale, bias; groups, epsilon) - # @jet _f(x, scale, bias) # test_call throws exception - LuxTestUtils.JET.@test_opt target_modules=(LuxLib,) _f(x, scale, bias) + @jet _f(x, scale, bias) @test y isa aType{T, length(sz)} @test size(y) == sz @@ -55,14 +58,14 @@ end # The KA implementation reorders operations manually for maximal # performance. Hence equality cannot be guaranteed. - @test check_approx(y, y_; atol=1.0f-3, rtol=1.0f-3) - @test check_approx(gs_x, gs_x_; atol=1.0f-3, rtol=1.0f-3) - @test check_approx(gs_scale, gs_scale_; atol=1.0f-3, rtol=1.0f-3) - @test check_approx(gs_bias, gs_bias_; atol=1.0f-3, rtol=1.0f-3) + @test check_approx(y, y_; atol=1.0f-1, rtol=1.0f-1) + @test check_approx(gs_x, gs_x_; atol=1.0f-1, rtol=1.0f-1) + @test check_approx(gs_scale, gs_scale_; atol=1.0f-1, rtol=1.0f-1) + @test check_approx(gs_bias, gs_bias_; atol=1.0f-1, rtol=1.0f-1) fp16 = T == Float16 __f = (args...) -> sum(groupnorm(x, args...; groups, epsilon)) - @eval @test_gradients $__f $scale $bias gpu_testing=$on_gpu atol=1.0f-3 rtol=1.0f-3 soft_fail=$fp16 + @eval @test_gradients $__f $scale $bias gpu_testing=$on_gpu atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 end end end diff --git a/test/api/instancenorm_tests.jl b/test/api/instancenorm_tests.jl index b601e227..26a2dba0 100644 --- a/test/api/instancenorm_tests.jl +++ b/test/api/instancenorm_tests.jl @@ -4,9 +4,9 @@ rng = get_stable_rng(12345) function _setup_instancenorm(aType, T, sz; affine::Bool=true) - x = randn(T, sz) |> aType - scale = affine ? aType(ones(T, sz[end - 1])) : nothing - bias = affine ? aType(zeros(T, sz[end - 1])) : nothing + x = __generate_fixed_array(T, sz) |> aType + scale = affine ? aType(__generate_fixed_array(T, sz[end - 1])) : nothing + bias = affine ? aType(__generate_fixed_array(T, sz[end - 1])) : nothing return x, scale, bias end @@ -30,9 +30,12 @@ @test y isa aType{T, length(sz)} @test size(y) == sz - _target_std = ones(ntuple(_ -> 1, length(sz) - 2)..., size(x)[(end - 1):end]...) - @eval @test check_approx(std(Array($y); dims=1:($(length(sz) - 2))), - $_target_std; atol=0.2, rtol=0.2) + if !affine + _target_std = ones( + ntuple(_ -> 1, length(sz) - 2)..., size(x)[(end - 1):end]...) + @test check_approx( + std(Array(y); dims=1:(length(sz) - 2)), _target_std; atol=0.2, rtol=0.2) + end @test std(y; dims=1:(length(sz) - 2)) != std(x; dims=1:(length(sz) - 2)) if __istraining(training) && affine diff --git a/test/api/layernorm_tests.jl b/test/api/layernorm_tests.jl index 4cd2d9d4..8aa39671 100644 --- a/test/api/layernorm_tests.jl +++ b/test/api/layernorm_tests.jl @@ -2,10 +2,10 @@ using Statistics function _setup_layernorm(aType, T, x_size, affine_shape) - x = randn(T, x_size) |> aType + x = __generate_fixed_array(T, x_size) |> aType if affine_shape !== nothing - scale = randn(T, affine_shape..., 1) |> aType - bias = randn(T, affine_shape..., 1) |> aType + scale = __generate_fixed_array(T, (affine_shape..., 1)) |> aType + bias = __generate_fixed_array(T, (affine_shape..., 1)) |> aType return x, scale, bias else return x, nothing, nothing diff --git a/test/shared_testsetup.jl b/test/shared_testsetup.jl index 886b20d6..acff5d77 100644 --- a/test/shared_testsetup.jl +++ b/test/shared_testsetup.jl @@ -28,6 +28,12 @@ get_stable_rng(seed=12345) = StableRNG(seed) __istraining(::Val{training}) where {training} = training +@inline __generate_fixed_array(::Type{T}, sz...) where {T} = __generate_fixed_array(T, sz) +@inline function __generate_fixed_array(::Type{T}, sz) where {T} + return reshape(T.(collect(1:prod(sz)) ./ prod(sz)), sz...) +end +@inline __generate_fixed_array(::Type{T}, sz::Int) where {T} = T.(collect(1:sz) ./ sz) + export cpu_testing, cuda_testing, amdgpu_testing, MODES, get_stable_rng, __istraining, - check_approx, @jet, @test_gradients + check_approx, @jet, @test_gradients, __generate_fixed_array end