Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into 32B
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkgr committed Dec 10, 2024
2 parents f516f09 + 5de774f commit 49264f5
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 39 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ jobs:
matrix:
task:
- name: Test (GPU)
image: olmo-core
image: olmo-core-tch251cu124
gpus: 2
run: |
pytest -v --color=yes --durations=3 -m gpu \
Expand All @@ -118,14 +118,14 @@ jobs:
src/test/
- name: Test checkpoint (GPU)
image: olmo-core
image: olmo-core-tch251cu124
gpus: 2
run: |
pytest -v --color=yes --durations=3 -m gpu \
src/test/distributed/checkpoint*
- name: Test MoE (GPU)
image: olmo-core
image: olmo-core-tch251cu124
gpus: 1
run: |
pytest -v --color=yes --durations=3 -m gpu \
Expand Down
59 changes: 32 additions & 27 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
CUDA_VERSION = "12.1"
CUDA_VERSION = "12.4"
TORCH_CUDA_VERSION = $(shell echo $(CUDA_VERSION) | tr -d .)
TORCH_VERSION = "2.5.1"
TORCH_VERSION_SHORT = $(shell echo $(TORCH_VERSION) | tr -d .)
# NOTE: when upgrading the nightly version you also need to upgrade the torch version specification
# in 'pyproject.toml' to include that nightly version.
TORCH_NIGHTLY_VERSION = "2.6.0.dev20241009"
TORCHAO_VERSION = "0.5.0"
TORCH_NIGHTLY_VERSION = "2.6.0.dev20241209"
TORCH_NIGHTLY_VERSION_SHORT = $(shell echo $(TORCH_NIGHTLY_VERSION) | tr -d .)
TORCHAO_VERSION = "0.6.1"
MEGABLOCKS_VERSION = "megablocks[gg] @ git+https://[email protected]/epwalsh/megablocks.git@epwalsh/deps"
FLASH_ATTN_WHEEL = https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
FLASH_ATTN_WHEEL = https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl

VERSION = $(shell python src/olmo_core/version.py)
VERSION_SHORT = $(shell python src/olmo_core/version.py short)
IMAGE_BASENAME = olmo-core
STABLE_IMAGE = tch$(TORCH_VERSION_SHORT)cu$(TORCH_CUDA_VERSION)
NIGHTLY_IMAGE = tch$(TORCH_NIGHTLY_VERSION_SHORT)cu$(TORCH_CUDA_VERSION)
BEAKER_WORKSPACE = ai2/OLMo-core
BEAKER_USER = $(shell beaker account whoami --format=json | jq -r '.[0].name')

Expand Down Expand Up @@ -56,8 +59,8 @@ stable-image :
--build-arg TORCHAO_VERSION=$(TORCHAO_VERSION) \
--target stable \
--progress plain \
-t $(IMAGE_BASENAME) .
echo "Built image '$(IMAGE_BASENAME)', size: $$(docker inspect -f '{{ .Size }}' $(IMAGE_BASENAME) | numfmt --to=si)"
-t olmo-core:$(STABLE_IMAGE) .
echo "Built image 'olmo-core:$(STABLE_IMAGE)', size: $$(docker inspect -f '{{ .Size }}' olmo-core:$(STABLE_IMAGE) | numfmt --to=si)"

.PHONY : nightly-image
nightly-image :
Expand All @@ -72,38 +75,40 @@ nightly-image :
--build-arg TORCH_NIGHTLY_VERSION=$(TORCH_NIGHTLY_VERSION) \
--target nightly \
--progress plain \
-t $(IMAGE_BASENAME)-nightly .
echo "Built image '$(IMAGE_BASENAME)-nightly', size: $$(docker inspect -f '{{ .Size }}' $(IMAGE_BASENAME)-nightly | numfmt --to=si)"
-t olmo-core:$(NIGHTLY_IMAGE) .
echo "Built image 'olmo-core:$(NIGHTLY_IMAGE)', size: $$(docker inspect -f '{{ .Size }}' olmo-core:$(NIGHTLY_IMAGE) | numfmt --to=si)"

.PHONY : ghcr-image-stable
ghcr-image-stable : stable-image
docker tag $(IMAGE_BASENAME) ghcr.io/allenai/$(IMAGE_BASENAME)
docker push ghcr.io/allenai/$(IMAGE_BASENAME)
docker tag $(IMAGE_BASENAME) ghcr.io/allenai/$(IMAGE_BASENAME)-v$(VERSION_SHORT)
docker push ghcr.io/allenai/$(IMAGE_BASENAME)-v$(VERSION_SHORT)
docker tag $(IMAGE_BASENAME) ghcr.io/allenai/$(IMAGE_BASENAME)-v$(VERSION)
docker push ghcr.io/allenai/$(IMAGE_BASENAME)-v$(VERSION)
docker tag olmo-core:$(STABLE_IMAGE) ghcr.io/allenai/olmo-core:$(STABLE_IMAGE)
docker push ghcr.io/allenai/olmo-core:$(STABLE_IMAGE)
docker tag olmo-core:$(STABLE_IMAGE) ghcr.io/allenai/olmo-core:$(STABLE_IMAGE)-v$(VERSION_SHORT)
docker push ghcr.io/allenai/olmo-core:$(STABLE_IMAGE)-v$(VERSION_SHORT)
docker tag olmo-core:$(STABLE_IMAGE) ghcr.io/allenai/olmo-core:$(STABLE_IMAGE)-v$(VERSION)
docker push ghcr.io/allenai/olmo-core:$(STABLE_IMAGE)-v$(VERSION)
docker tag olmo-core:$(STABLE_IMAGE) ghcr.io/allenai/olmo-core:latest
docker push ghcr.io/allenai/olmo-core:latest

.PHONY : beaker-image-stable
beaker-image-stable : stable-image
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME) $(IMAGE_BASENAME) $(BEAKER_WORKSPACE)
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME) $(IMAGE_BASENAME)-v$(VERSION_SHORT) $(BEAKER_WORKSPACE)
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME) $(IMAGE_BASENAME)-v$(VERSION) $(BEAKER_WORKSPACE)
./src/scripts/beaker/create_beaker_image.sh olmo-core:$(STABLE_IMAGE) olmo-core-$(STABLE_IMAGE) $(BEAKER_WORKSPACE)
./src/scripts/beaker/create_beaker_image.sh olmo-core:$(STABLE_IMAGE) olmo-core-$(STABLE_IMAGE)-v$(VERSION_SHORT) $(BEAKER_WORKSPACE)
./src/scripts/beaker/create_beaker_image.sh olmo-core:$(STABLE_IMAGE) olmo-core-$(STABLE_IMAGE)-v$(VERSION) $(BEAKER_WORKSPACE)

.PHONY : ghcr-image-nightly
ghcr-image-nightly : nightly-image
docker tag $(IMAGE_BASENAME)-nightly ghcr.io/allenai/$(IMAGE_BASENAME)-nightly
docker push ghcr.io/allenai/$(IMAGE_BASENAME)-nightly
docker tag $(IMAGE_BASENAME)-nightly ghcr.io/allenai/$(IMAGE_BASENAME)-v$(VERSION_SHORT)-nightly
docker push ghcr.io/allenai/$(IMAGE_BASENAME)-v$(VERSION_SHORT)-nightly
docker tag $(IMAGE_BASENAME)-nightly ghcr.io/allenai/$(IMAGE_BASENAME)-v$(VERSION)-nightly
docker push ghcr.io/allenai/$(IMAGE_BASENAME)-v$(VERSION)-nightly
docker tag olmo-core:$(NIGHTLY_IMAGE) ghcr.io/allenai/olmo-core:$(NIGHTLY_IMAGE)
docker push ghcr.io/allenai/olmo-core:$(NIGHTLY_IMAGE)
docker tag olmo-core:$(NIGHTLY_IMAGE) ghcr.io/allenai/olmo-core:$(NIGHTLY_IMAGE)-v$(VERSION_SHORT)
docker push ghcr.io/allenai/olmo-core:$(NIGHTLY_IMAGE)-v$(VERSION_SHORT)
docker tag olmo-core:$(NIGHTLY_IMAGE) ghcr.io/allenai/olmo-core:$(NIGHTLY_IMAGE)-v$(VERSION)
docker push ghcr.io/allenai/olmo-core:$(NIGHTLY_IMAGE)-v$(VERSION)

.PHONY : beaker-image-nightly
beaker-image-nightly : nightly-image
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME)-nightly $(IMAGE_BASENAME)-nightly $(BEAKER_WORKSPACE)
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME)-nightly $(IMAGE_BASENAME)-v$(VERSION_SHORT)-nightly $(BEAKER_WORKSPACE)
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME)-nightly $(IMAGE_BASENAME)-v$(VERSION)-nightly $(BEAKER_WORKSPACE)
./src/scripts/beaker/create_beaker_image.sh olmo-core:$(NIGHTLY_IMAGE) olmo-core-$(NIGHTLY_IMAGE) $(BEAKER_WORKSPACE)
./src/scripts/beaker/create_beaker_image.sh olmo-core:$(NIGHTLY_IMAGE) olmo-core-$(NIGHTLY_IMAGE)-v$(VERSION_SHORT) $(BEAKER_WORKSPACE)
./src/scripts/beaker/create_beaker_image.sh olmo-core:$(NIGHTLY_IMAGE) olmo-core-$(NIGHTLY_IMAGE)-v$(VERSION) $(BEAKER_WORKSPACE)

.PHONY : get-beaker-workspace
get-beaker-workspace :
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ There are a number of optional dependencies that must be installed to use certai

The published [Docker images](https://github.com/orgs/allenai/packages?repo_name=OLMo-core) contain all core and optional dependencies, and are regularly tested on our in-house H100 clusters.
But there are several things to keep in mind if you intend to use these images:
- They do not come with the OLMo-core package installed, only its dependencies, to accommodate for regular code changes. Therefore the `*-nightly` tags indicate that they come with PyTorch nightly, not some nightly version of OLMo-core.
- They do not come with the OLMo-core package installed, only its dependencies, to accommodate for regular code changes.
- They may not work on your own cluster if you have different hardware or driver/CUDA versions.

If the published images do not work for your use-case for any of the above reasons, you could adapt our [Dockerfile](https://github.com/allenai/OLMo-core/blob/main/src/Dockerfile) to build your own images.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ requires-python = ">=3.9"
license = { file = "LICENSE" }
dependencies = [
"numpy<2.0",
"torch>=2.4,<=2.6.0.dev20241009",
"torch>=2.5.1",
"cached-path",
"requests",
"packaging",
Expand Down
11 changes: 6 additions & 5 deletions src/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# NOTE: make sure CUDA_VERSION and TORCH_CUDA_VERSION always match, except for punctuation
ARG CUDA_VERSION="12.1"
ARG TORCH_CUDA_VERSION="121"
ARG CUDA_VERSION="12.4"
ARG TORCH_CUDA_VERSION="124"
ARG TORCH_VERSION="2.5.1"

#########################################################################
Expand Down Expand Up @@ -31,7 +31,7 @@ ARG MEGABLOCKS_VERSION="megablocks[gg] @ git+https://[email protected]/epwalsh/mega
RUN pip wheel --no-build-isolation --no-cache-dir "${MEGABLOCKS_VERSION}"

# Build flash-attn.
ARG FLASH_ATTN_WHEEL=https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
ARG FLASH_ATTN_WHEEL=https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
RUN wget ${FLASH_ATTN_WHEEL}

# Only keep the target wheels and dependencies with CUDA extensions.
Expand Down Expand Up @@ -73,7 +73,7 @@ RUN pip install --upgrade --no-cache-dir pip wheel packaging

# Install torchao.
ARG TORCH_CUDA_VERSION
ARG TORCHAO_VERSION="0.5.0"
ARG TORCHAO_VERSION="0.6.1"
RUN pip install --no-cache-dir \
--extra-index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION} \
torchao==${TORCHAO_VERSION}
Expand All @@ -90,6 +90,7 @@ RUN pip install --no-cache-dir '.[all]' && \
pip uninstall -y ai2-olmo-core && \
rm -rf *

LABEL org.opencontainers.image.source https://github.com/allenai/OLMo-core
WORKDIR /app/olmo-core

#########################################################################
Expand All @@ -99,7 +100,7 @@ WORKDIR /app/olmo-core
FROM stable as nightly

ARG TORCH_CUDA_VERSION
ARG TORCH_NIGHTLY_VERSION="2.6.0.dev20241009"
ARG TORCH_NIGHTLY_VERSION="2.6.0.dev20241209"
RUN pip install --no-cache-dir --pre \
--index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION} \
torch==${TORCH_NIGHTLY_VERSION}+cu${TORCH_CUDA_VERSION}
4 changes: 2 additions & 2 deletions src/olmo_core/launch/beaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ class OLMoCoreBeakerImage(StrEnum):
includes *versioned* images that are published with each release of the OLMo-core package.
"""

stable = "olmo-core"
stable = "olmo-core-tch251cu124"
"""
Built with the latest compatible stable version of PyTorch.
"""

nightly = "olmo-core-nightly"
nightly = "olmo-core-tch260dev20241209cu124"
"""
Built with the latest compatible nightly version of PyTorch.
"""
Expand Down

0 comments on commit 49264f5

Please sign in to comment.