diff --git a/training/Makefile b/training/Makefile index b073a846..abe16502 100644 --- a/training/Makefile +++ b/training/Makefile @@ -8,12 +8,16 @@ help: @echo " - make instruct-nvidia" @echo " - make instruct-vllm" @echo + @echo "Once instruct images created, create advanced training containers, deepspeed only for nvidia" + @echo + @echo " - make deepspeed" + @echo " - make vllm" + @echo @echo "Once instruct images are created, creat bootc container images" @echo - @echo " - make bootc-amd" - @echo " - make bootc-intel" - @echo " - make bootc-nvidia" - @echo " - make bootc-vllm" + @echo " - make amd" + @echo " - make intel" + @echo " - make nvidia" @echo @echo "If these images are going to be used on a cloud, you might want to add cloud-init." @echo @@ -36,6 +40,14 @@ instruct-nvidia: .PHONY: instruct: instruct-amd instruct-nvidia +.PHONY: deepspeed +deepspeed: + make -C deepspeed/ image + +.PHONY: vllm +vllm: + make -C vllm/ image + # # Create bootc container images prepared for AI # @@ -46,8 +58,6 @@ intel: make -C intel-bootc/ bootc nvidia: make -C nvidia-bootc/ dtk bootc -vllm: - make -C vllm/ image # # Make Bootc container images preinstalled with cloud-init diff --git a/training/amd-bootc/Makefile b/training/amd-bootc/Makefile index 04b26652..4d6dd322 100644 --- a/training/amd-bootc/Makefile +++ b/training/amd-bootc/Makefile @@ -4,6 +4,8 @@ IMAGE_NAME ?= $(VENDOR)-bootc include ../common/Makefile.common +default: bootc + .PHONY: bootc bootc: prepare-files growfs "${CONTAINER_TOOL}" build \ diff --git a/training/deepspeed/Containerfile b/training/deepspeed/Containerfile index 854864de..c757037e 100644 --- a/training/deepspeed/Containerfile +++ b/training/deepspeed/Containerfile @@ -4,11 +4,7 @@ FROM nvcr.io/nvidia/cuda:12.1.1-cudnn8-devel-ubi9 RUN dnf install -y python python-devel git RUN python -m ensurepip --upgrade RUN pip3 install torch==2.1.2 --index-url https://download.pytorch.org/whl/cu121 -RUN pip3 install packaging wheel -RUN pip3 install flash-attn==2.5.7 -RUN pip3 install deepspeed==0.14.2 -RUN pip3 install transformers==4.40.1 -RUN pip3 install ipdb jupyterlab gpustat matplotlib hydra-core datasets rich numba +RUN pip3 install packaging wheel flash-attn==2.5.7 deepspeed==0.14.2 transformers==4.40.1 ipdb jupyterlab gpustat matplotlib hydra-core datasets rich numba RUN git clone https://github.com/instructlab/training.git RUN mkdir -p /ilab-data/training_output diff --git a/training/deepspeed/Makefile b/training/deepspeed/Makefile index 5f08903b..6ef500f6 100644 --- a/training/deepspeed/Makefile +++ b/training/deepspeed/Makefile @@ -1,15 +1,14 @@ -REGISTRY ?= quay.io -REGISTRY_ORG ?= ai-lab IMAGE_NAME ?= deepspeed-trainer -IMAGE_TAG ?= latest - CONTAINER_TOOL ?= podman default: image .PHONY: image image: + @mkdir -p ../build "${CONTAINER_TOOL}" build \ $(ARCH:%=--platform linux/%) \ --file Containerfile \ - --tag "${REGISTRY}/${REGISTRY_ORG}/${IMAGE_NAME}:${IMAGE_TAG}" \ \ No newline at end of file + --layers=false \ + --squash-all \ + --tag oci:../build/deepspeed-trainer . diff --git a/training/instructlab/Makefile b/training/instructlab/Makefile index 4e19e334..1dd0f088 100644 --- a/training/instructlab/Makefile +++ b/training/instructlab/Makefile @@ -6,7 +6,7 @@ IMAGE_TAG ?= latest .PHONY: instructlab instructlab: - mkdir -p ../build + @mkdir -p ../build git clone https://github.com/instructlab/instructlab.git 2> /dev/null || true (cd instructlab; git pull origin main) diff --git a/training/intel-bootc/Makefile b/training/intel-bootc/Makefile index 6e23b461..97c56578 100644 --- a/training/intel-bootc/Makefile +++ b/training/intel-bootc/Makefile @@ -2,6 +2,8 @@ IMAGE_NAME ?= intel-bootc include ../common/Makefile.common +default: bootc + .PHONY: bootc bootc: growfs "${CONTAINER_TOOL}" build \ diff --git a/training/nvidia-bootc/Makefile b/training/nvidia-bootc/Makefile index 5870d594..6361a0ac 100644 --- a/training/nvidia-bootc/Makefile +++ b/training/nvidia-bootc/Makefile @@ -10,6 +10,8 @@ ENABLE_RT ?= include ../common/Makefile.common +default: bootc + .PHONY: dtk dtk: "${CONTAINER_TOOL}" build \ diff --git a/training/vllm/Makefile b/training/vllm/Makefile index 2da8d952..3e8b8bc9 100644 --- a/training/vllm/Makefile +++ b/training/vllm/Makefile @@ -4,6 +4,7 @@ default: image .PHONY: image image: + @mkdir -p ../build "${CONTAINER_TOOL}" build \ $(ARCH:%=--platform linux/%) \ --file Containerfile \