From ed856518ca12c9860d04f560f292b4b32e5e0503 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Sat, 21 Sep 2024 11:34:35 -0400 Subject: [PATCH] ci(buildkite): add GPU testing for Metal and oneAPI --- .buildkite/testing.yml | 85 ++++++++++++++++++++++++++++++++-------- test/runtests.jl | 2 + test/shared_testsetup.jl | 18 +++++++++ 3 files changed, 89 insertions(+), 16 deletions(-) diff --git a/.buildkite/testing.yml b/.buildkite/testing.yml index 82a68ba5..2e0a587f 100644 --- a/.buildkite/testing.yml +++ b/.buildkite/testing.yml @@ -24,32 +24,64 @@ steps: julia: - "1" - - group: ":telescope: Downstream CUDA" + - group: ":julia: AMD GPU" steps: - - label: ":julia: {{matrix.repo}} (Julia 1 + CUDA GPU)" + - label: ":julia: Julia: {{matrix.julia}} + AMD GPU" plugins: - JuliaCI/julia#v1: - version: "1" + version: "{{matrix.julia}}" + - JuliaCI/julia-test#v1: + test_args: "--quickfail" - JuliaCI/julia-coverage#v1: codecov: true dirs: - src - ext - command: julia --code-coverage=user --color=yes --project .buildkite/scripts/downstream.jl "{{matrix.repo}}" "CUDA" + env: + RETESTITEMS_NWORKERS: 2 + BACKEND_GROUP: "AMDGPU" agents: queue: "juliagpu" - cuda: "*" - if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip downstream\]/ && build.message !~ /\[skip ci\]/ && build.branch != "main" + rocm: "*" + rocmgpu: "*" + if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip ci\]/ timeout_in_minutes: 240 matrix: setup: - repo: - - "Boltz" - - "Lux" + julia: + - "1" - - group: ":julia: AMD GPU" + - group: ":julia: Metal GPU" steps: - - label: ":julia: Julia: {{matrix.julia}} + AMD GPU" + - label: ":julia: Julia {{matrix.julia}} + Metal GPU" + soft_fail: true + plugins: + - JuliaCI/julia#v1: + version: "{{matrix.julia}}" + - JuliaCI/julia-test#v1: + test_args: "--quickfail" + # - JuliaCI/julia-coverage#v1: + # codecov: true + # dirs: + # - src + # - ext + agents: + queue: "juliaecosystem" + os: "macos" + arch: "aarch64" + env: + BACKEND_GROUP: "Metal" + if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip ci\]/ + timeout_in_minutes: 240 + matrix: + setup: + julia: + - "1" + + - group: ":julia: oneAPI (Intel) GPU" + steps: + - label: ":julia: Julia {{matrix.julia}} + oneAPI (Intel) GPU" + soft_fail: true plugins: - JuliaCI/julia#v1: version: "{{matrix.julia}}" @@ -60,13 +92,11 @@ steps: dirs: - src - ext - env: - RETESTITEMS_NWORKERS: 2 - BACKEND_GROUP: "AMDGPU" agents: queue: "juliagpu" - rocm: "*" - rocmgpu: "*" + intel: "*" + env: + BACKEND_GROUP: "oneAPI" if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip ci\]/ timeout_in_minutes: 240 matrix: @@ -74,6 +104,29 @@ steps: julia: - "1" + - group: ":telescope: Downstream CUDA" + steps: + - label: ":julia: {{matrix.repo}} (Julia 1 + CUDA GPU)" + plugins: + - JuliaCI/julia#v1: + version: "1" + - JuliaCI/julia-coverage#v1: + codecov: true + dirs: + - src + - ext + command: julia --code-coverage=user --color=yes --project .buildkite/scripts/downstream.jl "{{matrix.repo}}" "CUDA" + agents: + queue: "juliagpu" + cuda: "*" + if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip downstream\]/ && build.message !~ /\[skip ci\]/ && build.branch != "main" + timeout_in_minutes: 240 + matrix: + setup: + repo: + - "Boltz" + - "Lux" + - group: ":telescope: Downstream AMD GPU" steps: - label: ":julia: {{matrix.repo}} (Julia 1 + AMD GPU)" diff --git a/test/runtests.jl b/test/runtests.jl index 799d0c2b..54223a63 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,6 +14,8 @@ const LUXLIB_BLAS_BACKEND = lowercase(get(ENV, "LUXLIB_BLAS_BACKEND", "default") (BACKEND_GROUP == "all" || BACKEND_GROUP == "cuda") && push!(EXTRA_PKGS, "LuxCUDA") (BACKEND_GROUP == "all" || BACKEND_GROUP == "amdgpu") && push!(EXTRA_PKGS, "AMDGPU") +(BACKEND_GROUP == "all" || BACKEND_GROUP == "oneapi") && push!(EXTRA_PKGS, "oneAPI") +(BACKEND_GROUP == "all" || BACKEND_GROUP == "metal") && push!(EXTRA_PKGS, "Metal") if !isempty(EXTRA_PKGS) @info "Installing Extra Packages for testing" EXTRA_PKGS=EXTRA_PKGS diff --git a/test/shared_testsetup.jl b/test/shared_testsetup.jl index 4cf27cfb..fb7bb9c3 100644 --- a/test/shared_testsetup.jl +++ b/test/shared_testsetup.jl @@ -33,6 +33,14 @@ if BACKEND_GROUP == "all" || BACKEND_GROUP == "amdgpu" using AMDGPU end +if BACKEND_GROUP == "all" || BACKEND_GROUP == "oneapi" + using oneAPI +end + +if BACKEND_GROUP == "all" || BACKEND_GROUP == "metal" + using Metal +end + cpu_testing() = BACKEND_GROUP == "all" || BACKEND_GROUP == "cpu" function cuda_testing() return (BACKEND_GROUP == "all" || BACKEND_GROUP == "cuda") && @@ -42,12 +50,22 @@ function amdgpu_testing() return (BACKEND_GROUP == "all" || BACKEND_GROUP == "amdgpu") && MLDataDevices.functional(AMDGPUDevice) end +function oneapi_testing() + return (BACKEND_GROUP == "all" || BACKEND_GROUP == "oneapi") && + MLDataDevices.functional(oneAPIDevice) +end +function metal_testing() + return (BACKEND_GROUP == "all" || BACKEND_GROUP == "metal") && + MLDataDevices.functional(MetalDevice) +end const MODES = begin modes = [] cpu_testing() && push!(modes, ("cpu", Array, false)) cuda_testing() && push!(modes, ("cuda", CuArray, true)) amdgpu_testing() && push!(modes, ("amdgpu", ROCArray, true)) + oneapi_testing() && push!(modes, ("oneapi", oneArray, true)) + metal_testing() && push!(modes, ("metal", MtlArray, true)) modes end