diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 78c1683f..fe6fae05 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1,6 +1,6 @@ steps: - label: "Triggering Pipelines (Pull Request)" - if: "build.pull_request.base_branch == 'main'" + if: build.branch != "main" && build.tag == null agents: queue: "juliagpu" plugins: diff --git a/.buildkite/testing.yml b/.buildkite/testing.yml index 82a68ba5..2146ea94 100644 --- a/.buildkite/testing.yml +++ b/.buildkite/testing.yml @@ -24,55 +24,108 @@ steps: julia: - "1" - - group: ":telescope: Downstream CUDA" + - group: ":julia: AMD GPU" steps: - - label: ":julia: {{matrix.repo}} (Julia 1 + CUDA GPU)" + - label: ":julia: Julia: {{matrix.julia}} + AMD GPU" plugins: - JuliaCI/julia#v1: - version: "1" + version: "{{matrix.julia}}" + - JuliaCI/julia-test#v1: + test_args: "--quickfail" - JuliaCI/julia-coverage#v1: codecov: true dirs: - src - ext - command: julia --code-coverage=user --color=yes --project .buildkite/scripts/downstream.jl "{{matrix.repo}}" "CUDA" + env: + RETESTITEMS_NWORKERS: 2 + BACKEND_GROUP: "AMDGPU" agents: queue: "juliagpu" - cuda: "*" - if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip downstream\]/ && build.message !~ /\[skip ci\]/ && build.branch != "main" + rocm: "*" + rocmgpu: "*" + if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip ci\]/ timeout_in_minutes: 240 matrix: setup: - repo: - - "Boltz" - - "Lux" + julia: + - "1" - - group: ":julia: AMD GPU" + # - group: ":julia: Metal GPU" + # steps: + # - label: ":julia: Julia {{matrix.julia}} + Metal GPU" + # soft_fail: true + # plugins: + # - JuliaCI/julia#v1: + # version: "{{matrix.julia}}" + # - JuliaCI/julia-test#v1: + # test_args: "--quickfail" + # - JuliaCI/julia-coverage#v1: + # codecov: true + # dirs: + # - src + # - ext + # agents: + # queue: "juliaecosystem" + # os: "macos" + # arch: "aarch64" + # env: + # BACKEND_GROUP: "Metal" + # if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip ci\]/ + # timeout_in_minutes: 240 + # matrix: + # setup: + # julia: + # - "1" + + # - group: ":julia: oneAPI GPU" + # steps: + # - label: ":julia: Julia {{matrix.julia}} + oneAPI GPU" + # soft_fail: true + # plugins: + # - JuliaCI/julia#v1: + # version: "{{matrix.julia}}" + # - JuliaCI/julia-test#v1: + # test_args: "--quickfail" + # - JuliaCI/julia-coverage#v1: + # codecov: true + # dirs: + # - src + # - ext + # agents: + # queue: "juliagpu" + # intel: "*" + # env: + # BACKEND_GROUP: "oneAPI" + # if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip ci\]/ + # timeout_in_minutes: 240 + # matrix: + # setup: + # julia: + # - "1" + + - group: ":telescope: Downstream CUDA" steps: - - label: ":julia: Julia: {{matrix.julia}} + AMD GPU" + - label: ":julia: {{matrix.repo}} (Julia 1 + CUDA GPU)" plugins: - JuliaCI/julia#v1: - version: "{{matrix.julia}}" - - JuliaCI/julia-test#v1: - test_args: "--quickfail" + version: "1" - JuliaCI/julia-coverage#v1: codecov: true dirs: - src - ext - env: - RETESTITEMS_NWORKERS: 2 - BACKEND_GROUP: "AMDGPU" + command: julia --code-coverage=user --color=yes --project .buildkite/scripts/downstream.jl "{{matrix.repo}}" "CUDA" agents: queue: "juliagpu" - rocm: "*" - rocmgpu: "*" - if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip ci\]/ + cuda: "*" + if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip downstream\]/ && build.message !~ /\[skip ci\]/ && build.branch != "main" timeout_in_minutes: 240 matrix: setup: - julia: - - "1" + repo: + - "Boltz" + - "Lux" - group: ":telescope: Downstream AMD GPU" steps: diff --git a/Project.toml b/Project.toml index 37a4d383..2e3fb8ed 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LuxLib" uuid = "82251201-b29d-42c6-8e01-566dec8acb11" authors = ["Avik Pal and contributors"] -version = "1.2.4" +version = "1.3.0" [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" diff --git a/benchmarks/setup.jl b/benchmarks/setup.jl index 06211e9d..53e0bd11 100644 --- a/benchmarks/setup.jl +++ b/benchmarks/setup.jl @@ -236,11 +236,6 @@ end function setup_batched_matmul_benchmarks!(suite::BenchmarkGroup, cpu_or_gpu::String, backend::String, dev::MLDataDevices.AbstractDevice) - if dev isa MetalDevice || dev isa oneAPIDevice - @warn "Skipping batched_matmul benchmarks for $(dev)..." - return - end - for N in [2, 16, 128, 512], Bsize in [4, 32, 128, 512] benchmark_name = "batchedmm($N, Bsize=$Bsize)" diff --git a/src/impl/Impl.jl b/src/impl/Impl.jl index bdd79cbf..c1818c77 100644 --- a/src/impl/Impl.jl +++ b/src/impl/Impl.jl @@ -21,8 +21,8 @@ using Random: Random, AbstractRNG, rand! using Statistics: Statistics, mean, var using LuxCore: LuxCore -using MLDataDevices: get_device_type, AMDGPUDevice, CUDADevice, AbstractGPUDevice, - AbstractDevice +using MLDataDevices: get_device_type, CPUDevice, AMDGPUDevice, CUDADevice, + AbstractGPUDevice, AbstractDevice using NNlib: NNlib, ConvDims using ..LuxLib: Optional, Numeric, ∂∅, internal_operation_mode, AbstractInternalArrayOpMode, diff --git a/src/impl/batched_mul.jl b/src/impl/batched_mul.jl index c5e3fdf3..87afb452 100644 --- a/src/impl/batched_mul.jl +++ b/src/impl/batched_mul.jl @@ -8,22 +8,26 @@ function batched_matmul(::GenericBroadcastOp, x::AbstractArray{xT, 3}, return NNlib.batched_mul(x, y) end -function batched_matmul(::GPUBroadcastOp{<:AbstractGPUDevice}, +for dev in (AMDGPUDevice, CUDADevice) + @eval function batched_matmul(::GPUBroadcastOp{$(dev)}, + x::AbstractArray{xT, 3}, y::AbstractArray{yT, 3}) where {xT, yT} + return NNlib.batched_mul(x, y) # GPU versions are well optimized + end +end + +function batched_matmul(opmode::GPUBroadcastOp{<:AbstractGPUDevice}, x::AbstractArray{xT, 3}, y::AbstractArray{yT, 3}) where {xT, yT} - return NNlib.batched_mul(x, y) # GPU versions are well optimized + if isconcretetype(Core.Compiler._return_type( + NNlib.batched_mul, Tuple{typeof(x), typeof(y)})) + return NNlib.batched_mul(x, y) # GPU versions are well optimized + end + return fallback_batched_matmul(opmode, x, y) end -function batched_matmul(::GPUBroadcastOp{AMDGPUDevice}, x::AbstractArray{<:Complex, 3}, +function batched_matmul( + opmode::GPUBroadcastOp{AMDGPUDevice}, x::AbstractArray{<:Complex, 3}, y::AbstractArray{<:Complex, 3}) - if (size(x, 3) != size(y, 3) && size(x, 3) != 1 && size(y, 3) != 1) || - (size(x, 2) != size(y, 1)) - throw(DimensionMismatch(lazy"size(x) = $(size(x)), size(y) = $(size(y)) inconsistent for batched_matmul.")) - end - @warn "Using fallback implementation of `batched_matmul` for complex numbers on \ - AMDGPUDevice" maxlog=1 - size(x, 3) == size(y, 3) && return stack(*, batchview(x), batchview(y)) - size(x, 3) == 1 && return stack(Base.Fix1(*, batchview(x, 1)), batchview(y)) - return stack(Base.Fix2(*, batchview(y, 1)), batchview(x)) + return fallback_batched_matmul(opmode, x, y) end function batched_matmul(opmode::LoopedArrayOp, x::AbstractArray{xT, 3}, @@ -73,6 +77,39 @@ function batched_matmul_loopvec_impl!( end end +function fallback_batched_matmul( + dev, x::AbstractArray{xT, 3}, y::AbstractArray{yT, 3}) where {xT, yT} + z = similar(x, promote_type(eltype(x), eltype(y)), size(x, 1), + size(y, 2), max(size(x, 3), size(y, 3))) + fallback_batched_matmul!(z, dev, x, y) + return z +end + +function fallback_batched_matmul!( + z::AbstractArray{zT, 3}, dev, x::AbstractArray{xT, 3}, + y::AbstractArray{yT, 3}) where {zT, xT, yT} + @warn "Using fallback Batched Matrix Multiply routine for $(dev) with A: size = \ + $(size(x)) eltype = $(xT) and B: size = $(size(y)) eltype = $(yT). This may be \ + slow." maxlog=1 + if (size(x, 3) != size(y, 3) && size(x, 3) != 1 && size(y, 3) != 1) || + (size(x, 2) != size(y, 1)) + throw(DimensionMismatch(lazy"size(x) = $(size(x)), size(y) = $(size(y)) inconsistent for batched_matmul.")) + end + if size(x, 3) == size(y, 3) + Threads.@threads for L in indices((x, y), 3) + mul!(batchview(z, L), batchview(x, L), batchview(y, L)) + end + elseif size(x, 3) == 1 + Threads.@threads for L in indices((x, y), 3) + mul!(batchview(z, L), batchview(x, 1), batchview(y, L)) + end + else # has to be size(y, 3) == 1 + Threads.@threads for L in indices((x, y), 3) + mul!(batchview(z, L), batchview(x, L), batchview(y, 1)) + end + end +end + function CRC.rrule(::typeof(batched_matmul), x::AbstractArray{xT, 3}, y::AbstractArray{yT, 3}) where {xT, yT} ∇batched_matmul = @closure Δ_ -> begin diff --git a/src/impl/conv.jl b/src/impl/conv.jl index 4cee0adc..f5181b65 100644 --- a/src/impl/conv.jl +++ b/src/impl/conv.jl @@ -31,7 +31,7 @@ function conv!(y::AbstractArray{yT, N}, ::Type{<:AbstractDevice}, NNlib.conv!(y, x, weight, cdims) return end -function conv!(y::AbstractArray{yT, N}, ::Type{<:AbstractGPUDevice}, +function conv!(y::AbstractArray{yT, N}, ::Type{<:Union{CUDADevice, AMDGPUDevice}}, x::AbstractArray{xT, N}, weight::AbstractArray{wT, N}, cdims::ConvDims) where {yT, xT, wT, N} if xT !== wT !== yT @@ -43,11 +43,53 @@ function conv!(y::AbstractArray{yT, N}, ::Type{<:AbstractGPUDevice}, contiguous(ofeltype_array(yT, weight)), cdims) return end +function conv!(y::AbstractArray{yT, N}, dev::Type{<:AbstractGPUDevice}, + x::AbstractArray{xT, N}, weight::AbstractArray{wT, N}, + cdims::ConvDims) where {yT, xT, wT, N} + if xT !== wT !== yT + safe_warning( + "Mixed Precision Inputs received for GPU convolution [weight: $(wT)] and \ + [x: $(xT)]. Promoting to $(yT).", 1) + end + x_cont = contiguous(ofeltype_array(yT, x)) + weight_cont = contiguous(ofeltype_array(yT, weight)) + fallback_slow_conv!(y, dev, x_cont, weight_cont, cdims) + return +end -function conv(x′, weight′, cdims::ConvDims) +function fallback_slow_conv!(y::AbstractArray{yT, N}, dev::Type{<:AbstractDevice}, + x::AbstractArray{xT, N}, weight::AbstractArray{wT, N}, + cdims::ConvDims) where {yT, xT, wT, N} + @warn "Falling back to slow convolution routine for $(dev) with x: size = \ + $(size(x)) eltype = $(xT) and weight: size = $(size(weight)) \ + eltype = $(wT)." maxlog=1 + # TODO: We should be able to reuse `y` for some part here for some efficiency + @assert NNlib.groupcount(cdims)==1 "Only groups=1 is supported for now." # FIXME + tmp = NNlib.unfold(x, cdims) + weight_compact = reshape(weight, :, size(weight, N), 1) + res = batched_matmul(tmp, weight_compact) + copyto!(y, reshape(res, size(y))) + return +end + +conv(x, weight, cdims::ConvDims) = conv(get_device_type((x, weight)), x, weight, cdims) + +function conv( + ::Type{<:Union{CPUDevice, CUDADevice, AMDGPUDevice}}, x′, weight′, cdims::ConvDims) x, weight = get_conv_input_weight(x′, weight′) return NNlib.conv(x, weight, cdims) end +function conv(dev::Type{<:AbstractDevice}, x′, weight′, cdims::ConvDims) + x, weight = get_conv_input_weight(dev, x′, weight′) + return fallback_slow_conv(dev, x, weight, cdims) +end + +function fallback_slow_conv(dev, x, weight, cdims::ConvDims) + y = similar(x, promote_type(eltype(x), eltype(weight)), NNlib.output_size(cdims)..., + NNlib.channels_out(cdims), size(x, ndims(x))) + fallback_slow_conv!(y, dev, x, weight, cdims) + return y +end function ∇conv_data(x′, weight′, cdims::ConvDims) x, weight = get_conv_input_weight(x′, weight′) diff --git a/src/impl/dropout.jl b/src/impl/dropout.jl index 264156a3..64d28fa5 100644 --- a/src/impl/dropout.jl +++ b/src/impl/dropout.jl @@ -190,6 +190,7 @@ function generate_dropout_mask_loop!(y::AbstractArray, p, invp) end function generate_dropout_mask_simd_loop!(y::AbstractArray{T}, p, invp) where {T} + p, invp = T(p), T(invp) @simd ivdep for I in indices(y) y[I] = (y[I] > p) * invp end @@ -197,7 +198,9 @@ end @enzyme_alternative generate_dropout_mask_loop! generate_dropout_mask_simd_loop! -function generate_dropout_mask!(y::AbstractArray, ::AbstractInternalArrayOpMode, p, invp) +function generate_dropout_mask!( + y::AbstractArray{T}, ::AbstractInternalArrayOpMode, p, invp) where {T} + p, invp = T(p), T(invp) @. y = (y > p) * invp return end diff --git a/test/Project.toml b/test/Project.toml index 51b229fc..ab1b5736 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -61,3 +61,9 @@ Statistics = "1.10" Test = "1.10" Tracker = "0.2.34" Zygote = "0.6.70" + +[extras] +CUDA_Driver_jll = "4ee394cb-3365-5eb0-8335-949819d2adfc" + +[preferences.CUDA_Driver_jll] +compat = false diff --git a/test/common_ops/activation_tests.jl b/test/common_ops/activation_tests.jl index a5c3e2f8..2045f20f 100644 --- a/test/common_ops/activation_tests.jl +++ b/test/common_ops/activation_tests.jl @@ -5,11 +5,13 @@ apply_act_fast(f::F, x) where {F} = sum(abs2, fast_activation!!(f, copy(x))) apply_act_fast2(f::F, x) where {F} = sum(abs2, fast_activation(f, x)) - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "$f: $T" for f in [identity, relu, sigmoid, sigmoid_fast, softplus, logsigmoid, gelu, swish, lisht, tanh, tanh_fast], T in [Float16, Float32, Float64] + !fp64 && T == Float64 && continue + x = rand(rng, T, 4, 3) |> aType y1 = apply_act(f, x) diff --git a/test/common_ops/bias_act_tests.jl b/test/common_ops/bias_act_tests.jl index 2bdbc830..1429c9b2 100644 --- a/test/common_ops/bias_act_tests.jl +++ b/test/common_ops/bias_act_tests.jl @@ -11,13 +11,15 @@ end (f::__Fix1)(x, b) = f.f(f.act, x, b) - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "$act, $T, $sz" for act in [ identity, relu, sigmoid, sigmoid_fast, softplus, logsigmoid, gelu, swish, lisht, tanh, tanh_fast], T in [Float16, Float32, Float64], sz in [(2, 2, 3, 4), (4, 5)] + !fp64 && T == Float64 && continue + x = rand(rng, T, sz) |> aType b = rand(rng, T, sz[end - 1]) |> aType diff --git a/test/common_ops/conv_tests.jl b/test/common_ops/conv_tests.jl index 5c208cd4..c7426b20 100644 --- a/test/common_ops/conv_tests.jl +++ b/test/common_ops/conv_tests.jl @@ -92,8 +92,9 @@ export expand, convfilter, calc_padding, anonact, TEST_BLOCKS, run_conv_testing end @testitem "Fused Conv: Group 1" tags=[:conv] setup=[SharedTestSetup, ConvSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "$(Tw) x $(Tx) hasbias: $(hasbias) activation: $(activation) kernel: $(kernel) padding: $(padding) stride: $(stride) groups: $(groups)" for ((Tx, Tw), hasbias, activation, (kernel, padding, stride, groups)) in TEST_BLOCKS[1] + !fp64 && (Tx == Float64 || Tw == Float64) && continue run_conv_testing(generate_fixed_array, activation, kernel, stride, padding, hasbias, groups, Tw, Tx, aType, mode, ongpu) end @@ -101,8 +102,9 @@ end end @testitem "Fused Conv: Group 2" tags=[:conv] setup=[SharedTestSetup, ConvSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "$(Tw) x $(Tx) hasbias: $(hasbias) activation: $(activation) kernel: $(kernel) padding: $(padding) stride: $(stride) groups: $(groups)" for ((Tx, Tw), hasbias, activation, (kernel, padding, stride, groups)) in TEST_BLOCKS[2] + !fp64 && (Tx == Float64 || Tw == Float64) && continue run_conv_testing(generate_fixed_array, activation, kernel, stride, padding, hasbias, groups, Tw, Tx, aType, mode, ongpu) end @@ -110,8 +112,9 @@ end end @testitem "Fused Conv: Group 3" tags=[:conv] setup=[SharedTestSetup, ConvSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "$(Tw) x $(Tx) hasbias: $(hasbias) activation: $(activation) kernel: $(kernel) padding: $(padding) stride: $(stride) groups: $(groups)" for ((Tx, Tw), hasbias, activation, (kernel, padding, stride, groups)) in TEST_BLOCKS[3] + !fp64 && (Tx == Float64 || Tw == Float64) && continue run_conv_testing(generate_fixed_array, activation, kernel, stride, padding, hasbias, groups, Tw, Tx, aType, mode, ongpu) end @@ -119,8 +122,9 @@ end end @testitem "Fused Conv: Group 4" tags=[:conv] setup=[SharedTestSetup, ConvSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "$(Tw) x $(Tx) hasbias: $(hasbias) activation: $(activation) kernel: $(kernel) padding: $(padding) stride: $(stride) groups: $(groups)" for ((Tx, Tw), hasbias, activation, (kernel, padding, stride, groups)) in TEST_BLOCKS[4] + !fp64 && (Tx == Float64 || Tw == Float64) && continue run_conv_testing(generate_fixed_array, activation, kernel, stride, padding, hasbias, groups, Tw, Tx, aType, mode, ongpu) end @@ -128,8 +132,9 @@ end end @testitem "Fused Conv: Group 5" tags=[:conv] setup=[SharedTestSetup, ConvSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "$(Tw) x $(Tx) hasbias: $(hasbias) activation: $(activation) kernel: $(kernel) padding: $(padding) stride: $(stride) groups: $(groups)" for ((Tx, Tw), hasbias, activation, (kernel, padding, stride, groups)) in TEST_BLOCKS[5] + !fp64 && (Tx == Float64 || Tw == Float64) && continue run_conv_testing(generate_fixed_array, activation, kernel, stride, padding, hasbias, groups, Tw, Tx, aType, mode, ongpu) end diff --git a/test/common_ops/dense_tests.jl b/test/common_ops/dense_tests.jl index a14906b6..e438647c 100644 --- a/test/common_ops/dense_tests.jl +++ b/test/common_ops/dense_tests.jl @@ -79,40 +79,45 @@ export ALL_TEST_CONFIGS, TEST_BLOCKS, run_dense_testing end @testitem "Fused Dense: Group 1" tags=[:dense] setup=[SharedTestSetup, DenseSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $Tw x $Tx, size $M x $N, bias $hasbias, activation $activation" for ((Tx, Tw), M, N, hasbias, activation) in TEST_BLOCKS[1] + !fp64 && (Tx == Float64 || Tw == Float64) && continue run_dense_testing(Tw, Tx, M, N, hasbias, activation, aType, mode, ongpu) end end end @testitem "Fused Dense: Group 2" tags=[:dense] setup=[SharedTestSetup, DenseSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $Tw x $Tx, size $M x $N, bias $hasbias, activation $activation" for ((Tx, Tw), M, N, hasbias, activation) in TEST_BLOCKS[2] + !fp64 && (Tx == Float64 || Tw == Float64) && continue run_dense_testing(Tw, Tx, M, N, hasbias, activation, aType, mode, ongpu) end end end @testitem "Fused Dense: Group 3" tags=[:dense] setup=[SharedTestSetup, DenseSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $Tw x $Tx, size $M x $N, bias $hasbias, activation $activation" for ((Tx, Tw), M, N, hasbias, activation) in TEST_BLOCKS[3] + !fp64 && (Tx == Float64 || Tw == Float64) && continue run_dense_testing(Tw, Tx, M, N, hasbias, activation, aType, mode, ongpu) end end end @testitem "Fused Dense: Group 4" tags=[:dense] setup=[SharedTestSetup, DenseSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $Tw x $Tx, size $M x $N, bias $hasbias, activation $activation" for ((Tx, Tw), M, N, hasbias, activation) in TEST_BLOCKS[4] + !fp64 && (Tx == Float64 || Tw == Float64) && continue run_dense_testing(Tw, Tx, M, N, hasbias, activation, aType, mode, ongpu) end end end @testitem "Fused Dense: Group 5" tags=[:dense] setup=[SharedTestSetup, DenseSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $Tw x $Tx, size $M x $N, bias $hasbias, activation $activation" for ((Tx, Tw), M, N, hasbias, activation) in TEST_BLOCKS[5] + !fp64 && (Tx == Float64 || Tw == Float64) && continue run_dense_testing(Tw, Tx, M, N, hasbias, activation, aType, mode, ongpu) end end diff --git a/test/common_ops/dropout_tests.jl b/test/common_ops/dropout_tests.jl index 2dd6f5e2..45f8fd01 100644 --- a/test/common_ops/dropout_tests.jl +++ b/test/common_ops/dropout_tests.jl @@ -1,11 +1,13 @@ @testitem "Dropout" tags=[:other_ops] setup=[SharedTestSetup] begin rng = StableRNG(12345) - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "$T, $x_shape, $dims" for T in (Float16, Float32, Float64), x_shape in ((2, 3), (2, 2, 3), (2, 2, 3, 1), (2, 2, 1, 3, 1)), dims in (:, 1, (1, 2)) + !fp64 && T == Float64 && continue + x = randn(rng, T, x_shape) |> aType @test @inferred(dropout(rng, x, T(0.5), Val(true), T(2), dims)) isa Any @@ -46,10 +48,12 @@ end rng = StableRNG(12345) - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "$T: $x_shape" for T in (Float16, Float32, Float64), x_shape in ((2, 3), (2, 2, 3), (2, 2, 3, 1), (2, 2, 1, 3, 1)) + !fp64 && T == Float64 && continue + x = randn(rng, T, x_shape) |> aType mask = rand(T, x_shape) |> aType @@ -133,10 +137,12 @@ end rng = StableRNG(12345) - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "$T: $x_shape" for T in (Float16, Float32, Float64), x_shape in ((2, 3), (2, 2, 3), (2, 2, 3, 1), (2, 2, 1, 3, 1)) + !fp64 && T == Float64 && continue + x = randn(rng, T, x_shape) |> aType @test @inferred(alpha_dropout(rng, x, T(0.5), Val(true))) isa Any diff --git a/test/normalization/batchnorm_tests.jl b/test/normalization/batchnorm_tests.jl index 3d935809..3936200a 100644 --- a/test/normalization/batchnorm_tests.jl +++ b/test/normalization/batchnorm_tests.jl @@ -123,8 +123,9 @@ export setup_batchnorm, ALL_TEST_CONFIGS, TEST_BLOCKS, run_batchnorm_testing end @testitem "Batch Norm: Group 1" tags=[:batch_norm] setup=[SharedTestSetup, BatchNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $act $affine $track_stats" for (T, sz, training, affine, track_stats, act) in TEST_BLOCKS[1] + !fp64 && T == Float64 && continue run_batchnorm_testing(generate_fixed_array, T, sz, training, affine, track_stats, act, aType, mode, ongpu) end @@ -132,8 +133,9 @@ end end @testitem "Batch Norm: Group 2" tags=[:batch_norm] setup=[SharedTestSetup, BatchNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $act $affine $track_stats" for (T, sz, training, affine, track_stats, act) in TEST_BLOCKS[2] + !fp64 && T == Float64 && continue run_batchnorm_testing(generate_fixed_array, T, sz, training, affine, track_stats, act, aType, mode, ongpu) end @@ -141,8 +143,9 @@ end end @testitem "Batch Norm: Group 3" tags=[:batch_norm] setup=[SharedTestSetup, BatchNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $act $affine $track_stats" for (T, sz, training, affine, track_stats, act) in TEST_BLOCKS[3] + !fp64 && T == Float64 && continue run_batchnorm_testing(generate_fixed_array, T, sz, training, affine, track_stats, act, aType, mode, ongpu) end @@ -150,8 +153,9 @@ end end @testitem "Batch Norm: Group 4" tags=[:batch_norm] setup=[SharedTestSetup, BatchNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $act $affine $track_stats" for (T, sz, training, affine, track_stats, act) in TEST_BLOCKS[4] + !fp64 && T == Float64 && continue run_batchnorm_testing(generate_fixed_array, T, sz, training, affine, track_stats, act, aType, mode, ongpu) end @@ -159,8 +163,9 @@ end end @testitem "Batch Norm: Group 5" tags=[:batch_norm] setup=[SharedTestSetup, BatchNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $act $affine $track_stats" for (T, sz, training, affine, track_stats, act) in TEST_BLOCKS[5] + !fp64 && T == Float64 && continue run_batchnorm_testing(generate_fixed_array, T, sz, training, affine, track_stats, act, aType, mode, ongpu) end @@ -168,7 +173,9 @@ end end @testitem "Batch Norm: Mixed Precision" tags=[:batch_norm] setup=[SharedTestSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES + !fp64 && aType == Float64 && continue + x = rand(Float64, 4, 4, 6, 2) |> aType scale = rand(Float32, 6) |> aType bias = rand(Float32, 6) |> aType diff --git a/test/normalization/groupnorm_tests.jl b/test/normalization/groupnorm_tests.jl index 3d5e821a..3c638885 100644 --- a/test/normalization/groupnorm_tests.jl +++ b/test/normalization/groupnorm_tests.jl @@ -93,40 +93,45 @@ export setup_groupnorm, ALL_TEST_CONFIGS, TEST_BLOCKS, run_groupnorm_testing end @testitem "Group Norm: Group 1" tags=[:group_norm] setup=[SharedTestSetup, GroupNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $groups, $affine, $act" for (T, sz, groups, affine, act) in TEST_BLOCKS[1] + !fp64 && T == Float64 && continue run_groupnorm_testing(T, sz, groups, affine, act, aType, mode, ongpu) end end end @testitem "Group Norm: Group 2" tags=[:group_norm] setup=[SharedTestSetup, GroupNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $groups, $affine, $act" for (T, sz, groups, affine, act) in TEST_BLOCKS[2] + !fp64 && T == Float64 && continue run_groupnorm_testing(T, sz, groups, affine, act, aType, mode, ongpu) end end end @testitem "Group Norm: Group 3" tags=[:group_norm] setup=[SharedTestSetup, GroupNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $groups, $affine, $act" for (T, sz, groups, affine, act) in TEST_BLOCKS[3] + !fp64 && T == Float64 && continue run_groupnorm_testing(T, sz, groups, affine, act, aType, mode, ongpu) end end end @testitem "Group Norm: Group 4" tags=[:group_norm] setup=[SharedTestSetup, GroupNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $groups, $affine, $act" for (T, sz, groups, affine, act) in TEST_BLOCKS[4] + !fp64 && T == Float64 && continue run_groupnorm_testing(T, sz, groups, affine, act, aType, mode, ongpu) end end end @testitem "Group Norm: Group 5" tags=[:group_norm] setup=[SharedTestSetup, GroupNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $groups, $affine, $act" for (T, sz, groups, affine, act) in TEST_BLOCKS[5] + !fp64 && T == Float64 && continue run_groupnorm_testing(T, sz, groups, affine, act, aType, mode, ongpu) end end diff --git a/test/normalization/instancenorm_tests.jl b/test/normalization/instancenorm_tests.jl index a48a502d..ff166cfa 100644 --- a/test/normalization/instancenorm_tests.jl +++ b/test/normalization/instancenorm_tests.jl @@ -84,8 +84,9 @@ end @testitem "Instance Norm: Group 1" tags=[:instance_norm] setup=[ SharedTestSetup, InstanceNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $training $act" for (T, sz, training, act) in TEST_BLOCKS[1] + !fp64 && T == Float64 && continue run_instancenorm_testing( generate_fixed_array, T, sz, training, act, aType, mode, ongpu) end @@ -94,8 +95,9 @@ end @testitem "Instance Norm: Group 2" tags=[:instance_norm] setup=[ SharedTestSetup, InstanceNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $training $act" for (T, sz, training, act) in TEST_BLOCKS[2] + !fp64 && T == Float64 && continue run_instancenorm_testing( generate_fixed_array, T, sz, training, act, aType, mode, ongpu) end @@ -104,8 +106,9 @@ end @testitem "Instance Norm: Group 3" tags=[:instance_norm] setup=[ SharedTestSetup, InstanceNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $training $act" for (T, sz, training, act) in TEST_BLOCKS[3] + !fp64 && T == Float64 && continue run_instancenorm_testing( generate_fixed_array, T, sz, training, act, aType, mode, ongpu) end @@ -114,8 +117,9 @@ end @testitem "Instance Norm: Group 4" tags=[:instance_norm] setup=[ SharedTestSetup, InstanceNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $training $act" for (T, sz, training, act) in TEST_BLOCKS[4] + !fp64 && T == Float64 && continue run_instancenorm_testing( generate_fixed_array, T, sz, training, act, aType, mode, ongpu) end @@ -124,8 +128,9 @@ end @testitem "Instance Norm: Group 5" tags=[:instance_norm] setup=[ SharedTestSetup, InstanceNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $sz, $training $act" for (T, sz, training, act) in TEST_BLOCKS[5] + !fp64 && T == Float64 && continue run_instancenorm_testing( generate_fixed_array, T, sz, training, act, aType, mode, ongpu) end diff --git a/test/normalization/layernorm_tests.jl b/test/normalization/layernorm_tests.jl index bdfccb47..37ca3c70 100644 --- a/test/normalization/layernorm_tests.jl +++ b/test/normalization/layernorm_tests.jl @@ -90,8 +90,9 @@ export ALL_TEST_CONFIGS, TEST_BLOCKS, run_layernorm_testing end @testitem "Layer Norm: Group 1" tags=[:layer_norm] setup=[SharedTestSetup, LayerNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $x_shape, $act" for (T, x_shape, affine_shape, act) in TEST_BLOCKS[1] + !fp64 && T == Float64 && continue run_layernorm_testing( generate_fixed_array, aType, T, x_shape, affine_shape, act, ongpu, mode) end @@ -99,8 +100,9 @@ end end @testitem "Layer Norm: Group 2" tags=[:layer_norm] setup=[SharedTestSetup, LayerNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $x_shape, $act" for (T, x_shape, affine_shape, act) in TEST_BLOCKS[2] + !fp64 && T == Float64 && continue run_layernorm_testing( generate_fixed_array, aType, T, x_shape, affine_shape, act, ongpu, mode) end @@ -108,8 +110,9 @@ end end @testitem "Layer Norm: Group 3" tags=[:layer_norm] setup=[SharedTestSetup, LayerNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $x_shape, $act" for (T, x_shape, affine_shape, act) in TEST_BLOCKS[3] + !fp64 && T == Float64 && continue run_layernorm_testing( generate_fixed_array, aType, T, x_shape, affine_shape, act, ongpu, mode) end @@ -117,8 +120,9 @@ end end @testitem "Layer Norm: Group 4" tags=[:layer_norm] setup=[SharedTestSetup, LayerNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $x_shape, $act" for (T, x_shape, affine_shape, act) in TEST_BLOCKS[4] + !fp64 && T == Float64 && continue run_layernorm_testing( generate_fixed_array, aType, T, x_shape, affine_shape, act, ongpu, mode) end @@ -126,8 +130,9 @@ end end @testitem "Layer Norm: Group 5" tags=[:layer_norm] setup=[SharedTestSetup, LayerNormSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "eltype $T, size $x_shape, $act" for (T, x_shape, affine_shape, act) in TEST_BLOCKS[5] + !fp64 && T == Float64 && continue run_layernorm_testing( generate_fixed_array, aType, T, x_shape, affine_shape, act, ongpu, mode) end @@ -135,7 +140,9 @@ end end @testitem "Layer Norm: Error Checks" tags=[:layer_norm] setup=[SharedTestSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES + !fp64 && continue + x = rand(2, 3) |> aType @test_throws ArgumentError layernorm(x, nothing, nothing, identity, nothing, 1e-5) diff --git a/test/others/bmm_tests.jl b/test/others/bmm_tests.jl index ea847568..2b89b0ef 100644 --- a/test/others/bmm_tests.jl +++ b/test/others/bmm_tests.jl @@ -46,8 +46,10 @@ end @testitem "batched_mul" tags=[:batched_ops] setup=[SharedTestSetup, BatchedMMSetup] begin rng = StableRNG(1234) - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES @testset "batched_mul: Float64 × $(TB)" for TB in [Float64, Float32] + !fp64 && continue + @testset "real" begin A = randn(rng, 7, 5, 3) |> aType B = randn(rng, TB, 5, 7, 3) |> aType @@ -131,7 +133,9 @@ end SharedTestSetup, BatchedMMSetup] begin rng = StableRNG(1234) - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES + !fp64 && continue + @testset "Float64 × $(TB)" for TB in [Float64, ComplexF64] @testset "trivial dimensions & unit strides" begin @testset "$tA(rand$((sA...,3))) ⊠ $tB(rand$((sB...,3)))" for tA in [ @@ -228,7 +232,9 @@ end SharedTestSetup, BatchedMMSetup] begin rng = StableRNG(1234) - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES + !fp64 && continue + @testset "Float64 × $(TB)" for TB in [Float64, ComplexF64] A = randn(rng, 3, 3, 3) |> aType M = aType(rand(rng, TB, 3, 3)) .+ im @@ -259,42 +265,44 @@ end fn(A, B) = sum(batched_matmul(A, B)) fn_vec(A, B) = sum(batched_vec(A, B)) - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES M, P, Q = 13, 7, 11 B = 3 @testset "Two 3-arrays" begin - @test_gradients(fn, aType(randn(rng, M, P, B)), - aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - @test_gradients(fn, batched_adjoint(aType(randn(rng, P, M, B))), - aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - @test_gradients(fn, aType(randn(rng, M, P, B)), - batched_transpose(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3) + @test_gradients(fn, aType(randn(rng, Float32, M, P, B)), + aType(randn(rng, Float32, P, Q, B)); atol=1e-3, rtol=1e-3) + @test_gradients(fn, batched_adjoint(aType(randn(rng, Float32, P, M, B))), + aType(randn(rng, Float32, P, Q, B)); atol=1e-3, rtol=1e-3) + @test_gradients(fn, aType(randn(rng, Float32, M, P, B)), + batched_transpose(aType(randn(rng, Float32, Q, P, B))); atol=1e-3, + rtol=1e-3) end @testset "One a matrix..." begin - @test_gradients(fn, aType(randn(rng, M, P)), - aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - @test_gradients(fn, adjoint(aType(randn(rng, P, M))), - aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - @test_gradients(fn, aType(randn(rng, M, P)), - batched_adjoint(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3) - - @test_gradients(fn, aType(randn(rng, M, P)), - aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - @test_gradients(fn, adjoint(aType(randn(rng, P, M))), - aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - @test_gradients(fn, aType(randn(rng, M, P)), - batched_adjoint(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3) + @test_gradients(fn, aType(randn(rng, Float32, M, P)), + aType(randn(rng, Float32, P, Q, B)); atol=1e-3, rtol=1e-3) + @test_gradients(fn, adjoint(aType(randn(rng, Float32, P, M))), + aType(randn(rng, Float32, P, Q, B)); atol=1e-3, rtol=1e-3) + @test_gradients(fn, aType(randn(rng, Float32, M, P)), + batched_adjoint(aType(randn(rng, Float32, Q, P, B))); atol=1e-3, rtol=1e-3) + + @test_gradients(fn, aType(randn(rng, Float32, M, P)), + aType(randn(rng, Float32, P, Q, B)); atol=1e-3, rtol=1e-3) + @test_gradients(fn, adjoint(aType(randn(rng, Float32, P, M))), + aType(randn(rng, Float32, P, Q, B)); atol=1e-3, rtol=1e-3) + @test_gradients(fn, aType(randn(rng, Float32, M, P)), + batched_adjoint(aType(randn(rng, Float32, Q, P, B))); atol=1e-3, rtol=1e-3) end @testset "... or equivalent to a matrix" begin - @test_gradients(fn, aType(randn(rng, M, P, 1)), - aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - @test_gradients(fn, batched_transpose(aType(randn(rng, P, M, 1))), - aType(randn(rng, P, Q, B)); atol=1e-3, rtol=1e-3) - @test_gradients(fn, aType(randn(rng, M, P, 1)), - batched_transpose(aType(randn(rng, Q, P, B))); atol=1e-3, rtol=1e-3) + @test_gradients(fn, aType(randn(rng, Float32, M, P, 1)), + aType(randn(rng, Float32, P, Q, B)); atol=1e-3, rtol=1e-3) + @test_gradients(fn, batched_transpose(aType(randn(rng, Float32, P, M, 1))), + aType(randn(rng, Float32, P, Q, B)); atol=1e-3, rtol=1e-3) + @test_gradients(fn, aType(randn(rng, Float32, M, P, 1)), + batched_transpose(aType(randn(rng, Float32, Q, P, B))); atol=1e-3, + rtol=1e-3) end end end diff --git a/test/others/forwarddiff_tests.jl b/test/others/forwarddiff_tests.jl index 23c279e8..228aa7d3 100644 --- a/test/others/forwarddiff_tests.jl +++ b/test/others/forwarddiff_tests.jl @@ -38,7 +38,7 @@ end end - @testset "$(mode): Jacobian Vector Products" for (mode, aType, ongpu) in MODES + @testset "$(mode): Jacobian Vector Products" for (mode, aType, ongpu, fp64) in MODES @testset "$(op)(; flipped = $flipped)" for flipped in (true, false), op in (depthwiseconv, conv) @@ -98,7 +98,7 @@ end rng = StableRNG(12345) - @testset "$mode: dropout" for (mode, aType, ongpu) in MODES + @testset "$mode: dropout" for (mode, aType, ongpu, fp64) in MODES x = randn(rng, Float32, 10, 2) |> aType x_dual = ForwardDiff.Dual.(x) diff --git a/test/others/misc_tests.jl b/test/others/misc_tests.jl index 6943de74..6e046eea 100644 --- a/test/others/misc_tests.jl +++ b/test/others/misc_tests.jl @@ -1,5 +1,5 @@ @testitem "internal_operation_mode: Wrapped Arrays" tags=[:others] setup=[SharedTestSetup] begin - @testset "$mode" for (mode, aType, ongpu) in MODES + @testset "$mode" for (mode, aType, ongpu, fp64) in MODES x = rand(Float32, 4, 3) |> aType retval = ongpu ? LuxLib.GPUBroadcastOp : LuxLib.LoopedArrayOp @test LuxLib.internal_operation_mode(x) isa retval diff --git a/test/runtests.jl b/test/runtests.jl index 799d0c2b..54223a63 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,6 +14,8 @@ const LUXLIB_BLAS_BACKEND = lowercase(get(ENV, "LUXLIB_BLAS_BACKEND", "default") (BACKEND_GROUP == "all" || BACKEND_GROUP == "cuda") && push!(EXTRA_PKGS, "LuxCUDA") (BACKEND_GROUP == "all" || BACKEND_GROUP == "amdgpu") && push!(EXTRA_PKGS, "AMDGPU") +(BACKEND_GROUP == "all" || BACKEND_GROUP == "oneapi") && push!(EXTRA_PKGS, "oneAPI") +(BACKEND_GROUP == "all" || BACKEND_GROUP == "metal") && push!(EXTRA_PKGS, "Metal") if !isempty(EXTRA_PKGS) @info "Installing Extra Packages for testing" EXTRA_PKGS=EXTRA_PKGS diff --git a/test/shared_testsetup.jl b/test/shared_testsetup.jl index 4cf27cfb..487a50d5 100644 --- a/test/shared_testsetup.jl +++ b/test/shared_testsetup.jl @@ -33,6 +33,14 @@ if BACKEND_GROUP == "all" || BACKEND_GROUP == "amdgpu" using AMDGPU end +if BACKEND_GROUP == "all" || BACKEND_GROUP == "oneapi" + using oneAPI +end + +if BACKEND_GROUP == "all" || BACKEND_GROUP == "metal" + using Metal +end + cpu_testing() = BACKEND_GROUP == "all" || BACKEND_GROUP == "cpu" function cuda_testing() return (BACKEND_GROUP == "all" || BACKEND_GROUP == "cuda") && @@ -42,12 +50,22 @@ function amdgpu_testing() return (BACKEND_GROUP == "all" || BACKEND_GROUP == "amdgpu") && MLDataDevices.functional(AMDGPUDevice) end +function oneapi_testing() + return (BACKEND_GROUP == "all" || BACKEND_GROUP == "oneapi") && + MLDataDevices.functional(oneAPIDevice) +end +function metal_testing() + return (BACKEND_GROUP == "all" || BACKEND_GROUP == "metal") && + MLDataDevices.functional(MetalDevice) +end const MODES = begin modes = [] - cpu_testing() && push!(modes, ("cpu", Array, false)) - cuda_testing() && push!(modes, ("cuda", CuArray, true)) - amdgpu_testing() && push!(modes, ("amdgpu", ROCArray, true)) + cpu_testing() && push!(modes, ("cpu", Array, false, true)) + cuda_testing() && push!(modes, ("cuda", CuArray, true, true)) + amdgpu_testing() && push!(modes, ("amdgpu", ROCArray, true, true)) + oneapi_testing() && push!(modes, ("oneapi", oneArray, true, false)) + metal_testing() && push!(modes, ("metal", MtlArray, true, false)) modes end