Skip to content

Commit

Permalink
Add installation support for deepspeed
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel J Walsh <[email protected]>
  • Loading branch information
rhatdan committed May 2, 2024
1 parent 0dcc095 commit f9a9998
Show file tree
Hide file tree
Showing 8 changed files with 29 additions and 17 deletions.
22 changes: 16 additions & 6 deletions training/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,16 @@ help:
@echo " - make instruct-nvidia"
@echo " - make instruct-vllm"
@echo
@echo "Once instruct images created, create advanced training containers, deepspeed only for nvidia"
@echo
@echo " - make deepspeed"
@echo " - make vllm"
@echo
@echo "Once instruct images are created, creat bootc container images"
@echo
@echo " - make bootc-amd"
@echo " - make bootc-intel"
@echo " - make bootc-nvidia"
@echo " - make bootc-vllm"
@echo " - make amd"
@echo " - make intel"
@echo " - make nvidia"
@echo
@echo "If these images are going to be used on a cloud, you might want to add cloud-init."
@echo
Expand All @@ -36,6 +40,14 @@ instruct-nvidia:
.PHONY:
instruct: instruct-amd instruct-nvidia

.PHONY: deepspeed
deepspeed:
make -C deepspeed/ image

.PHONY: vllm
vllm:
make -C vllm/ image

#
# Create bootc container images prepared for AI
#
Expand All @@ -46,8 +58,6 @@ intel:
make -C intel-bootc/ bootc
nvidia:
make -C nvidia-bootc/ dtk bootc
vllm:
make -C vllm/ image

#
# Make Bootc container images preinstalled with cloud-init
Expand Down
2 changes: 2 additions & 0 deletions training/amd-bootc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ IMAGE_NAME ?= $(VENDOR)-bootc

include ../common/Makefile.common

default: bootc

.PHONY: bootc
bootc: prepare-files growfs
"${CONTAINER_TOOL}" build \
Expand Down
6 changes: 1 addition & 5 deletions training/deepspeed/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ FROM nvcr.io/nvidia/cuda:12.1.1-cudnn8-devel-ubi9
RUN dnf install -y python python-devel git
RUN python -m ensurepip --upgrade
RUN pip3 install torch==2.1.2 --index-url https://download.pytorch.org/whl/cu121
RUN pip3 install packaging wheel
RUN pip3 install flash-attn==2.5.7
RUN pip3 install deepspeed==0.14.2
RUN pip3 install transformers==4.40.1
RUN pip3 install ipdb jupyterlab gpustat matplotlib hydra-core datasets rich numba
RUN pip3 install packaging wheel flash-attn==2.5.7 deepspeed==0.14.2 transformers==4.40.1 ipdb jupyterlab gpustat matplotlib hydra-core datasets rich numba
RUN git clone https://github.com/instructlab/training.git
RUN mkdir -p /ilab-data/training_output

Expand Down
9 changes: 4 additions & 5 deletions training/deepspeed/Makefile
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
REGISTRY ?= quay.io
REGISTRY_ORG ?= ai-lab
IMAGE_NAME ?= deepspeed-trainer
IMAGE_TAG ?= latest

CONTAINER_TOOL ?= podman

default: image

.PHONY: image
image:
@mkdir -p ../build
"${CONTAINER_TOOL}" build \
$(ARCH:%=--platform linux/%) \
--file Containerfile \
--tag "${REGISTRY}/${REGISTRY_ORG}/${IMAGE_NAME}:${IMAGE_TAG}" \
--layers=false \
--squash-all \
--tag oci:../build/deepspeed-trainer .
2 changes: 1 addition & 1 deletion training/instructlab/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ IMAGE_TAG ?= latest

.PHONY: instructlab
instructlab:
mkdir -p ../build
@mkdir -p ../build
git clone https://github.com/instructlab/instructlab.git 2> /dev/null || true
(cd instructlab; git pull origin main)

Expand Down
2 changes: 2 additions & 0 deletions training/intel-bootc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ IMAGE_NAME ?= intel-bootc

include ../common/Makefile.common

default: bootc

.PHONY: bootc
bootc: growfs
"${CONTAINER_TOOL}" build \
Expand Down
2 changes: 2 additions & 0 deletions training/nvidia-bootc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ ENABLE_RT ?=

include ../common/Makefile.common

default: bootc

.PHONY: dtk
dtk:
"${CONTAINER_TOOL}" build \
Expand Down
1 change: 1 addition & 0 deletions training/vllm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ default: image

.PHONY: image
image:
@mkdir -p ../build
"${CONTAINER_TOOL}" build \
$(ARCH:%=--platform linux/%) \
--file Containerfile \
Expand Down

0 comments on commit f9a9998

Please sign in to comment.