Summary
Jobs
metadata
amd64
bump-manifest
build-base
test-distribution (extra-only-distribution.sh)
test-distribution (mirror-only-distribution.sh)
test-distribution (upstream-only-distribution.sh)
build-jax
build-pallas
build-maxtext
build-levanter
build-upstream-t5x
build-upstream-pax
launch-slurm-runner
jax-unit-test (V100)
jax-unit-test (A100)
pallas-unit-test
levanter-unit-test
launch-slurm-runner
jax-unit-test
pallas-unit-test (V100)
pallas-unit-test (A100)
levanter-unit-test
launch-slurm-runner
jax-unit-test
pallas-unit-test
levanter-unit-test (V100)
levanter-unit-test (A100)
build-rosetta
t5x-multi-gpu (1)
t5x-multi-gpu (2)
t5x-multi-gpu (4)
t5x-multi-gpu (8)
t5x-multi-node (1, 1)
t5x-multi-node (1, 2)
t5x-multi-node (2, 1)
t5x-multi-node (2, 2)
t5x-multi-node (4, 1)
t5x-multi-node (4, 2)
t5x-multi-node (8, 1)
t5x-multi-node (8, 2)
metrics
summary
sitrep
outcome
build-rosetta
te-unit-tests
te-multi-gpu (1)
te-multi-gpu (2)
single-process-multi-device (1, 8, 1, 1)
single-process-multi-device (1, 1, 2, 4)
pax-multi-node (1, 1, 1, 1)
pax-multi-node (1, 8, 1, 1)
pax-multi-node (1, 1, 8, 1)
pax-multi-node (4, 2, 1, 1)
pax-multi-node (1, 4, 1, 2)
pax-multi-node (1, 16, 1, 1)
pax-multi-node (4, 2, 1, 2)
single-process-evaluation (1, 8, 1, 1)
metrics
summary
sitrep
outcome
single-process-multi-device (1P1G_te-1, 1, --gin.train/utils.DatasetConfig.pack=False --gin.train...
single-process-multi-device (1P1G_te-0, 1, --enable-te 0)
single-process-multi-device (1P8G_te-1, 8, --gin.train/utils.DatasetConfig.pack=False --gin.train...
multi-gpu-multi-node (1N1G-te-1, 1, 1, --gin.train/utils.DatasetConfig.pack=False --gin.train_eva...
multi-gpu-multi-node (1N8G-te-1, 8, 1, --gin.train/utils.DatasetConfig.pack=False --gin.train_eva...
multi-gpu-multi-node (2N8G-te-1, 8, 2, --gin.train/utils.DatasetConfig.pack=False --gin.train_eva...
multi-gpu-multi-node (2N2G_te-0, 2, 2, --enable-te 0)
vit-single-process-multi-device (8)
vit-multi-gpu-multi-node (1, 1)
vit-multi-gpu-multi-node (1, 2)
vit-multi-gpu-multi-node (8, 1)
vit-multi-gpu-multi-node (8, 2)
publish
summary
outcome
collect-docker-tags
single-process-multi-device-te (1, 8, 1, 1)
single-process-multi-device-te (1, 1, 2, 4)
rosetta-pax-multi-node-te (1DP1FSDP1TP1PP_TE, 1, 1, 1, 1, 4)
rosetta-pax-multi-node-te (8DP1FSDP1TP1PP_TE, 1, 8, 1, 1, 4)
rosetta-pax-multi-node-te (1DP8FSDP1TP1PP_TE, 1, 1, 8, 1, 4)
rosetta-pax-multi-node-te (4DP1FSDP2TP1PP_TE, 1, 4, 1, 2, 4)
rosetta-pax-multi-node-te (16DP1FSDP1TP1PP_TE, 1, 16, 1, 1, 4)
rosetta-pax-multi-node-te (5B_fused_attn_1, 1, 1, 8, 1, 2, --run-5b --enable-fused-attn)
rosetta-pax-multi-node-te (5B_fused_attn_0, 1, 1, 8, 1, 2, --run-5b)
rosetta-pax-multi-node (1, 8, 1, 1)
rosetta-pax-multi-node (1, 4, 1, 2)
rosetta-pax-multi-node (4, 2, 1, 1)
rosetta-pax-multi-node (4, 2, 1, 2)
rosetta-pax-single-node-dropout-te (1, 8, 1, 1)
single-process-evaluation-te (1, 8, 1, 1)
metrics
summary
publish
outcome
arm64
make-publish-configs
finalize
publish-containers (jax, jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:8029667858-jax-amd...
publish-containers (pallas, jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:8029667858-pall...
publish-containers (maxtext, jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:8029667858-max...
publish-containers (levanter, jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:8029667858-le...
publish-containers (upstream-t5x, jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:802966785...
publish-containers (upstream-pax, jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:802966785...
publish-containers (t5x, jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:8029667858-t5x-amd...
publish-containers (pax, jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:8029667858-pax-amd...
publish-containers (base, jax, 800, ghcr.io/nvidia/jax-toolbox-internal:8029667858-base-amd64, gh...
publish-containers (jax, jax, 1000, ghcr.io/nvidia/jax-toolbox-internal:8029667858-jax-amd64, ghc...
publish-containers (pallas, jax, 900, ghcr.io/nvidia/jax-toolbox-internal:8029667858-pallas-amd64)
publish-containers (maxtext, jax, 900, ghcr.io/nvidia/jax-toolbox-internal:8029667858-maxtext-amd64)
publish-containers (levanter, jax, 900, ghcr.io/nvidia/jax-toolbox-internal:8029667858-levanter-a...
publish-containers (upstream-t5x, jax, 900, ghcr.io/nvidia/jax-toolbox-internal:8029667858-upstre...
publish-containers (upstream-pax, jax, 900, ghcr.io/nvidia/jax-toolbox-internal:8029667858-upstre...
publish-containers (t5x, jax, 900, ghcr.io/nvidia/jax-toolbox-internal:8029667858-t5x-amd64)
publish-containers (pax, jax, 900, ghcr.io/nvidia/jax-toolbox-internal:8029667858-pax-amd64, ghcr...
The logs for this run have expired and are no longer available.
You can’t perform that action at this time.