defenseunicorns · justinthelaw · Feb 10, 2024 · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.11.6
@@ -1,28 +1,42 @@
 ARG ARCH=amd64
 
-FROM ghcr.io/defenseunicorns/leapfrogai/python:3.11-dev-${ARCH} as builder
+FROM --platform=$BUILDPLATFORM ghcr.io/defenseunicorns/leapfrogai/python:3.11-dev-${ARCH} as builder
 
 WORKDIR /leapfrogai
 
-COPY requirements.txt .
+# create virtual environment for light-weight portability and minimal libraries
+RUN python3.11 -m venv .venv
+ENV PATH="/leapfrogai/.venv/bin:$PATH"
 
+COPY requirements.txt .
 RUN pip install -r requirements.txt
 
-RUN /home/nonroot/.local/bin/ct2-transformers-converter --model openai/whisper-base --output_dir .model --copy_files tokenizer.json --quantization float32
+# download and covnert OpenAI's whisper base
+ARG MODEL_NAME=openai/whisper-base
+RUN ct2-transformers-converter --model ${MODEL_NAME} --output_dir .model --copy_files tokenizer.json --quantization float32
 
-# Use ffmpeg image to get compiled binaries
+# Use hardened ffmpeg image to get compiled binaries
 FROM cgr.dev/chainguard/ffmpeg:latest as ffmpeg
 
-FROM ghcr.io/defenseunicorns/leapfrogai/python:3.11-${ARCH}
+# hardened and slim python image
+FROM --platform=$BUILDPLATFORM ghcr.io/defenseunicorns/leapfrogai/python:3.11-${ARCH}
+
+ENV PATH="/leapfrogai/.venv/bin:$PATH"
 
 WORKDIR /leapfrogai
 
 COPY --from=ffmpeg /usr/bin/ffmpeg /usr/bin
 COPY --from=ffmpeg /usr/bin/ffprobe /usr/bin
 COPY --from=ffmpeg /usr/lib/lib* /usr/lib
-COPY --from=builder /home/nonroot/.local/lib/python3.11/site-packages /home/nonroot/.local/lib/python3.11/site-packages
+
+COPY --from=builder /leapfrogai/.venv/ /leapfrogai/.venv/
 COPY --from=builder /leapfrogai/.model/ /leapfrogai/.model/
 
+# set the path to the cuda 11.8 dependencies
+ENV LD_LIBRARY_PATH \
+/leapfrogai/.venv/lib64/python3.11/site-packages/nvidia/cublas/lib:\
+/leapfrogai/.venv/lib64/python3.11/site-packages/nvidia/cudnn/lib
+
 COPY main.py .
 
 EXPOSE 50051:50051

@@ -1,53 +1,56 @@
-VERSION := $(shell git describe --abbrev=0 --tags 2> /dev/null )
-ifeq ($(VERSION),)
-  VERSION := latest
-endif
+MODEL_NAME ?= openai/whisper-base
+REGISTRY ?= ghcr.io/defenseunicorns/leapfrogai/whisper
+VERSION ?= $(shell git fetch --tags && git tag -l "*.*.*" | sort -V | tail -n 1 | sed -e 's/^v//')
+ARCH ?= $(shell uname -m | sed s/aarch64/arm64/ | sed s/x86_64/amd64/)
 
-ARCH := $(shell uname -m | sed s/aarch64/arm64/ | sed s/x86_64/amd64/)
-
-MODEL ?= openai/whisper-base
+.PHONY: all
 
 create-venv:
 	python -m venv .venv
 
-activate-venv:
-	source .venv/bin/activate
-
 requirements-dev:
 	python -m pip install -r requirements-dev.txt
 
 requirements:
 	pip-sync requirements.txt requirements-dev.txt
 
-requirements-gpu:
-	pip-sync requirements.txt requirements-gpu.txt
-
 build-requirements:
 	pip-compile -o requirements.txt pyproject.toml
 
-build-requirements-gpu:
-	pip-compile --extra gpu -o requirements-gpu.txt pyproject.toml
-
 build-requirements-dev:
 	pip-compile --extra dev -o requirements-dev.txt pyproject.toml
 
 fetch-model:
-	ct2-transformers-converter --model ${MODEL} --output_dir .model --copy_files tokenizer.json --quantization float32
+	ct2-transformers-converter --model ${MODEL_NAME} --output_dir .model --copy_files tokenizer.json --quantization float32
 
 test:
 	pytest **/*.py
 
 dev:
 	python main.py
 
-make docker-build:
+docker-build:
 	docker build -t ghcr.io/defenseunicorns/leapfrogai/whisper:${VERSION}-${ARCH} --build-arg ARCH=${ARCH} .
 
-make docker-push:
-	docker push ghcr.io/defenseunicorns/leapfrogai/whisper:${VERSION}-${ARCH}
+docker-run:
+	docker run -d -p 50051:50051 ghcr.io/defenseunicorns/leapfrogai/whisper:${VERSION}-${ARCH}
+
+docker-run-gpu:
+	docker run --gpus device=0 -e GPU_ENABLED=true -d -p 50051:50051 ghcr.io/defenseunicorns/leapfrogai/whisper:${VERSION}-${ARCH}
 
-make docker-build-gpu:
-	docker build -f Dockerfile.gpu -t ghcr.io/defenseunicorns/leapfrogai/whisper-gpu:${VERSION}-${ARCH} --build-arg ARCH=${ARCH} .
+docker-push:
+	docker push ghcr.io/defenseunicorns/leapfrogai/whisper:${VERSION}-${ARCH}
 
-make docker-push-gpu:
-	docker push ghcr.io/defenseunicorns/leapfrogai/whisper-gpu:${VERSION}-${ARCH}
+docker-publish:
+	docker buildx install && \
+	if docker buildx ls | grep -q 'whisper'; then \
+	echo "Instance whisper already exists."; \
+	else \
+	docker buildx create --use --name whisper; \
+	fi && \
+	docker buildx build --push \
+	--build-arg REGISTRY=${REGISTRY} \
+	--build-arg VERSION=${VERSION} \
+	--platform linux/arm64,linux/amd64 \
+	-t ${REGISTRY}:${VERSION} . && \
+	docker buildx rm whisper
@@ -2,28 +2,59 @@
 
 ## Description
 
-A LeapfrogAI API-compatible Whisper wrapper for audio transcription generation.
+A LeapfrogAI API-compatible [faster-whisper](https://github.com/SYSTRAN/faster-whisper) wrapper for audio transcription generation across CPU and GPU infrastructures.
 
 ## Usage
 
 See [instructions](#instructions) to get the backend up and running. Then, use the [LeapfrogAI API server](https://github.com/defenseunicorns/leapfrogai-api) to interact with the backend.
 
 ## Instructions
 
-The instructions in this section assume the following: 
+The instructions in this section assume the following:
 
 1. Properly installed and configured Python 3.11.x, to include its development tools
 2. The LeapfrogAI API server is deployed and running
 
+<details>
+<summary><b>GPU Variation</b></summary>
+<br/>
+The following are additional assumptions for GPU inferencing:
+
+3. You have properly installed one or more NVIDIA GPUs and GPU drivers
+4. You have properly installed and configured the [cuda-toolkit](https://developer.nvidia.com/cuda-toolkit) and [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html)
+</details>
+
+### Model Selection
+
+The default model that comes with this backend in this repository's officially released images is a [CTranslate2](https://github.com/OpenNMT/CTranslate2) converted version of [OpenAI's Whisper Base](https://huggingface.co/openai/whisper-base).
+
+Other Whisper model sizes and variants can be loaded into this backend by modifying the `MODEL_NAME` during image creation or Makefile command execution.
+
 ### Run Locally
 
+<details>
+<summary><b>GPU Variation</b></summary>
+<br/>
+The following additional variables must be exported for local GPU inferencing:
+
+```bash
+# enable GPU switch
+export GPU_ENABLED=true
+
+# point to VENV's local CUDA 11.8 python lib
+export LD_LIBRARY_PATH=${PWD}/.venv/lib64/python3.11/site-packages/nvidia/cublas/lib:${PWD}/.venv/lib64/python3.11/site-packages/nvidia/cudnn/lib
+```
+
+</details>
+<br/>
+
 ```bash
 # Install FFMPEG locally
 sudo apt install ffmpeg
 
 # Setup Virtual Environment
 make create-venv
-make activate-venv
+source .venv/bin/activate
 make requirements-dev
 
 # Clone Model
@@ -40,9 +71,11 @@ python main.py
 For local image building and running.
 
 ```bash
-docker build -t ghcr.io/defenseunicorns/leapfrogai/whisper:latest .
-# add the "--gpus all" flag for CUDA inferencing
-docker run --rm --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 -p 50051:50051 -d --name whisper ghcr.io/defenseunicorns/leapfrogai/whisper:latest
+make docker-build
+# without GPU, CPU-only
+make docker-run
+# with GPU
+make docker-run-gpu
 ```
 
 #### Remote Image Build and Run
@@ -56,54 +89,3 @@ docker build -t ghcr.io/defenseunicorns/leapfrogai/whisper:<IMAGE_TAG> .
 # add the "--gpus all" flag for CUDA inferencing
 docker run -p 50051:50051 -d --name whisper ghcr.io/defenseunicorns/leapfrogai/whisper:<IMAGE_TAG>
 ```
-
-### GPU Inferencing
-
-The instructions in this section assume the following: 
-
-1. You have properly installed one or more NVIDIA GPUs and GPU drivers
-2. You have properly installed and configured the [cuda-toolkit](https://developer.nvidia.com/cuda-toolkit) and [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html)
-
-#### Run Locally
-
-For cloning a model locally and running the development backend.
-
-```bash
-# Clone Model
-make fetch-model
-
-# Setup Python Virtual Environment
-make create-venv
-make activate-venv
-make requirements-gpu
-
-# enable GPU switch
-export GPU_ENABLED=true
-
-# point to VENV's local CUDA 11.8 python lib
-export LD_LIBRARY_PATH=${PWD}/.venv/lib64/python3.11/site-packages/nvidia/cublas/lib:${PWD}/.venv/lib64/python3.11/site-packages/nvidia/cudnn/lib
-
-# Start Model Backend
-make dev
-```
-
-#### Run in Docker
-
-For local image building and running.
-
-```bash
-# Build GPU docker image
-docker build -f Dockerfile.gpu -t ghcr.io/defenseunicorns/leapfrogai/whisper:latest-gpu .
-
-# Run GPU docker container with GPU resource reservation
-docker run --gpus all -p 50051:50051 ghcr.io/defenseunicorns/leapfrogai/whisper:latest-gpu
-```
-
-For pulling a tagged image from the main release repository.
-
-Where `<IMAGE_TAG>` is the released packages found [here](https://github.com/orgs/defenseunicorns/packages/container/package/leapfrogai%2Fwhisper).
-
-```bash
-# Download and run remote GPU image
-docker run -p 50051:50051 ghcr.io/defenseunicorns/leapfrogai/whisper:<IMAGE_TAG>
-```
@@ -1,31 +1,23 @@
 [project]
 name = "leapfrogai-backend-whisper"
-version = "0.4.0"
+version = "0.5.0"
 
 description = "Whisper backend for LeapfrogAI"
 authors = [{ name = "LeapfrogAI Authors", email = "[email protected]" }]
 license = { file = "LICENSE" }
+readme = "README.md"
+requires-python = ">=3.11.4, <3.12"
 dependencies = [
     "leapfrogai == 0.4.0",
     "openai-whisper == 20230918",
     "ffmpeg == 1.4",
     "faster-whisper == 0.10.0",
-    "ctranslate2 == 3.22.0",
-    "transformers == 4.35.2",
+    "ctranslate2 == 3.24.0",
+    "transformers == 4.37.2",
 ]
-requires-python = ">=3.11.4, <3.12"
-readme = "README.md"
 
 [project.optional-dependencies]
-dev = [
-    "pip-tools",
-    "pytest",
-    "black",
-    "isort",
-    "nvidia-cublas-cu11",
-    "nvidia-cuda-runtime-cu11",
-]
-gpu = ["nvidia-cublas-cu11", "nvidia-cuda-runtime-cu11"]
+dev = ["pip-tools", "pytest", "black", "isort"]
 
 [tool.pip-tools]
 generate-hashes = true
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		3.11.6
gphorvath marked this conversation as resolved. Show resolved Hide resolved