Skip to content

Commit

Permalink
abstracting model downloads and file-normalization to models dir
Browse files Browse the repository at this point in the history
Signed-off-by: greg pereira <[email protected]>
  • Loading branch information
Gregory-Pereira committed Apr 9, 2024
1 parent 25899bc commit 227eed4
Show file tree
Hide file tree
Showing 15 changed files with 220 additions and 142 deletions.
13 changes: 3 additions & 10 deletions .github/workflows/model_servers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,12 @@ jobs:
- name: Build Image
id: build_image
uses: redhat-actions/[email protected]
with:
image: ${{ env.REGISTRY }}/${{ matrix.image_name }}
platforms: linux/amd64, linux/arm64
tags: latest
containerfiles: ./model_servers/${{ matrix.image_name }}/base/Containerfile
context: model_servers/${{ matrix.image_name }}/
working-directory: ./model_servers/${{ matrix.image_name }}/
run: make build

- name: Download model
working-directory: ./model_servers/${{ matrix.image_name }}/
run: make ${{ matrix.model }}
run: make trigger-download-model-${{ matrix.model }}

- name: Set up Python
uses: actions/[email protected]
Expand All @@ -71,8 +66,6 @@ jobs:
- name: Run tests
working-directory: ./model_servers/${{ matrix.image_name }}/
run: make test
env:
IMAGE_NAME: ${{ matrix.image_name }}

- name: Login to Container Registry
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
Expand Down
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
port_check.lock
*build
models/*
model_servers/llamacpp_python/model.gguf
model_servers/*/model.file
!models/convert_models/*
!models/Containerfile
!models/README.md
!models/README.md

38 changes: 38 additions & 0 deletions model_servers/common/Makefile.common
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
REGISTRY ?= quay.io
BASE_IMAGE_NAME ?= ai-lab/model_servers

BIND_MOUNT_OPTIONS := ro
OS := $(shell uname -s)
ifeq ($(OS),Linux)
BIND_MOUNT_OPTIONS := Z,ro
endif

.PHONY: build
build:
podman build -t $(IMAGE) . -f base/Containerfile

.PHONY: install
install:
pip install -r tests/requirements.txt

.PHONY: run-test
run-test:
IMAGE_NAME=$(IMAGE_NAME) MODEL_NAME=$(MODEL_NAME) pytest --log-cli-level NOTSET

.PHONY: test
test:
@if [ ! -f "../../models/$(MODEL_NAME)" ]; then \
echo "Model file -- $(MODEL_NAME) -- not present in the models directory."; \
exit 1; \
else \
if [ ! -f "./$(MODEL_NAME)" ]; then \
ln -s ../../models/$(MODEL_NAME) ./$(MODEL_NAME); \
fi; \
$(MAKE) IMAGE_NAME=$(IMAGE_NAME) MODEL_NAME=$(MODEL_NAME) REGISTRY=$(REGISTRY) -k run-test clean; \
fi;

.PHONY: clean
clean:
- rm ./$(MODEL_NAME) &> /dev/null


71 changes: 24 additions & 47 deletions model_servers/llamacpp_python/Makefile
Original file line number Diff line number Diff line change
@@ -1,41 +1,21 @@
APP := llamacpp_python
PORT := 8001

IMAGE := quay.io/ai-lab/model_servers/$(APP):latest
CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
VULKAN_IMAGE := quay.io/ai-lab/model_servers/$(APP)_vulkan:latest

# ----- MODEL OPTIONS -----

LLAMA_MODEL_NAME := llama-2-7b-chat.Q5_K_S.gguf
LLAMA_MODEL_URL := https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf
MODELS_PATH := /locallm/models

TINY_LLAMA_MODEL_NAME := tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf
TINY_LLAMA_MODEL_URL := https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf
REGISTRY ?= quay.io
PORT ?= 8001

MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf
MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
IMAGE_NAME ?= $(BASE_IMAGE_NAME)/$(APP):latest
IMAGE ?= $(REGISTRY)/$(IMAGE_NAME)

# --- END MODEL OPTIONS ---
MODEL_NAME ?= mistral-7b-instruct-v0.1.Q4_K_M.gguf

SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(MISTRAL_MODEL_NAME))
SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(MISTRAL_MODEL_URL))
include ../common/Makefile.common

RELATIVE_MODELS_PATH := ../../models
MODELS_PATH := /locallm/models

BIND_MOUNT_OPTIONS := ro
OS := $(shell uname -s)
ifeq ($(OS),Linux)
BIND_MOUNT_OPTIONS := ro,Z
endif
CUDA_IMAGE_NAME := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_cuda:latest
VULKAN_IMAGE := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_vulkan:latest

.Phony: all
all: build mistral run

.PHONY: build
build:
podman build -t $(IMAGE) . -f base/Containerfile
all: build trigger-download-model-mistral run

.PHONY: build-cuda
build-cuda:
Expand All @@ -45,29 +25,26 @@ build-cuda:
build-vulkan:
podman build -t $(VULKAN_IMAGE) . -f cuda/Containerfile

.PHONY: download-model-tiny-llama
download-model-tiny-llama:
curl -H "Cache-Control: no-cache" -s -S -L -f $(TINY_LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME)
########################## TRIGGER DOWNLOAD MAKE TARGETS ##########################
### NOTE: you should not add these trigger make targets for new models.
# Adding new models should implemented in the [models directory](../../models).
# These are just provided here for bootstrapping and testing different model types.

.PHONY: download-model-llama
download-model-llama:
curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME)
.PHONY: trigger-download-model-tiny-llama # small .gguf model for testing
trigger-download-model-tiny-llama:
cd ../../models && \
make -f Makefile download-model-tiny-llama

.PHONY: trigger-download-model-mistral # default model
trigger-download-model-mistral:
cd ../../models && \
make -f Makefile download-model-mistral

.PHONY: mistral
mistral:
curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME)
############################ END DOWNLOAD MAKE TARGETS ############################

.PHONY: install
install:
pip install -r tests/requirements.txt

.PHONY: run
run:
cd ../../models && \
podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE)
podman run --rm -d -it -p $(PORT):$(PORT) -v ./$(MODEL_NAME):$(MODELS_PATH)/$(MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/$(MODEL_NAME) -e PORT=$(PORT) --network=host $(IMAGE)

.PHONY: test
test:
curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
pytest --log-cli-level NOTSET
11 changes: 3 additions & 8 deletions model_servers/llamacpp_python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,15 @@ At the time of this writing, 2 models are known to work with this service
- **Mistral-7b**
- Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf)

It is suggested you place models in the [models](../../models/) directory. As for retrieving them, either use `wget` to download them with the download links above, or call the model names from the Makefile.
It is suggested you place models in the [models](../../models/) directory. It is also recomended that you use the [models dir Makefile](../../models/Makefile) to view which models you can download and to download the models themselves:

```bash
cd ../../models
curl -sLO <Download URL>
make -f Makefile download-model-mistral
cd model_servers/llamacpp_python
```

or:

```bash
make -f Makefile download-model-mistral
make -f Makefile download-model-llama
```
However we have also added makefile targets [in this directory's Makefile](./Makefile) which will in-turn call those targets, ex: `make -f Makefile download-model-mistral`

### Deploy Model Service

Expand Down
22 changes: 18 additions & 4 deletions model_servers/llamacpp_python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
import pytest_container
import os
import logging

REGISTRY=os.environ['REGISTRY']
IMAGE_NAME=os.environ['IMAGE_NAME']
MODEL_NAME=os.environ['MODEL_NAME']

logging.info("""
Starting pytest with the following ENV vars:
REGISTRY: {REGISTRY}
IMAGE_NAME: {IMAGE_NAME}
MODEL_NAME: {MODEL_NAME}
For:
model_server: whispercpp
""".format(REGISTRY=REGISTRY, IMAGE_NAME=IMAGE_NAME, MODEL_NAME=MODEL_NAME))

MS = pytest_container.Container(
url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}",
url=f"containers-storage:{REGISTRY}/{IMAGE_NAME}",
volume_mounts=[
pytest_container.container.BindMount(
container_path="/locallm/models/model.gguf",
host_path=f"./model.gguf",
container_path="/locallm/models/{MODEL_NAME}".format(MODEL_NAME=MODEL_NAME),
host_path=f"./{MODEL_NAME}",
flags=["ro"]
)
],
extra_environment_variables={
"MODEL_PATH": "/locallm/models/model.gguf",
"MODEL_PATH": "/locallm/models/{MODEL_NAME}".format(MODEL_NAME=MODEL_NAME),
"HOST": "0.0.0.0",
"PORT": "8001"
},
Expand Down
1 change: 0 additions & 1 deletion model_servers/llamacpp_python/tests/test_alive.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

CONTAINER_IMAGES = [MS]


def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
assert auto_container.connection.file("/etc/os-release").exists

Expand Down
2 changes: 1 addition & 1 deletion model_servers/llamacpp_python/tooling_options.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"This notebook assumes that the playground image is running locally. Once built, you can use the below to start the model service image. \n",
"\n",
"```bash\n",
"podman run -it -p 8000:8000 -v <YOUR-LOCAL-PATH>/locallm/models:/locallm/models:Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf playground\n",
"podman run -it -p 8000:8000 -v <YOUR-LOCAL-PATH>/locallm/models:/locallm/models:Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.file playground\n",
"```"
]
},
Expand Down
62 changes: 23 additions & 39 deletions model_servers/whispercpp/Makefile
Original file line number Diff line number Diff line change
@@ -1,57 +1,41 @@
PORT := 8001
APP := whispercpp
IMAGE := quay.io/ai-lab/model_servers/$(APP):latest
CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest
VULKAN_IMAGE :=quay.io/ai-lab/model_servers/$(APP)_vulkan:latest

# ----- MODEL OPTIONS -----

WHISPER_SMALL_MODEL_NAME := ggml-small.bin
WHISPER_SMALL_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin
MODELS_PATH := /app/models

WHISPER_BASE_MODEL_NAME := ggml-base.en.bin
WHISPER_BASE_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
REGISTRY ?= quay.io
PORT ?= 8001

SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(WHISPER_SMALL_MODEL_NAME))
SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(WHISPER_SMALL_MODEL_URL))
IMAGE_NAME ?= $(BASE_IMAGE_NAME)/$(APP):latest
IMAGE ?= $(REGISTRY)/$(IMAGE_NAME)

# --- END MODEL OPTIONS ---
MODEL_NAME ?= ggml-small.bin

MODELS_PATH := /app/models
include ../common/Makefile.common

BIND_MOUNT_OPTIONS := ro
OS := $(shell uname -s)
ifeq ($(OS),Linux)
BIND_MOUNT_OPTIONS := Z,ro
endif
# CUDA_IMAGE_NAME := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_cuda:latest
# VULKAN_IMAGE := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_vulkan:latest

.PHONY: all
all: build whisper-small run
all: build trigger-download-model-whisper-small run

.PHONY: build
build:
podman build -t $(IMAGE) . -f Containerfile
########################## TRIGGER DOWNLOAD MAKE TARGETS ##########################
### NOTE: you should not add these trigger make targets for new models.
# Adding new models should implemented in the [models directory](../../models).
# These are just provided here for bootstrapping and testing different model types.

.PHONY: whisper-small
whisper-small:
.PHONY: trigger-download-model-whisper-small # small .bin model type testing
trigger-download-model-whisper-small:
cd ../../models && \
curl -s -S -L -f $(WHISPER_SMALL_MODEL_URL) -z $(WHISPER_SMALL_MODEL_NAME) -o $(WHISPER_SMALL_MODEL_NAME).tmp && mv -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME)

.PHONY: install
install:
pip install -r tests/requirements.txt
make -f Makefile download-model-whisper-small

.PHONY: download-model-whisper-base
download-model-whisper-base:
.PHONY: trigger-download-model-whisper-base # default model
trigger-download-model-whisper-base:
cd ../../models && \
curl -s -S -L -f $(WHISPER_BASE_MODEL_URL) -z $(WHISPER_BASE_MODEL_NAME) -o $(WHISPER_BASE_MODEL_NAME).tmp && mv -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME)
make -f Makefile download-model-whisper-base

############################ END DOWNLOAD MAKE TARGETS ############################

.PHONY: run
run:
cd ../../models && \
podman run -d --rm -it -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/$(SELECTED_MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/$(SELECTED_MODEL_NAME) -e PORT=$(PORT) $(IMAGE)
podman run --rm -d -it -p $(PORT):$(PORT) -v ./$(MODEL_NAME):$(MODELS_PATH)/$(MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/$(MODEL_NAME) -e PORT=$(PORT) --network=host $(IMAGE)

.PHONY: test
test:
curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
pytest --log-cli-level NOTSET
23 changes: 19 additions & 4 deletions model_servers/whispercpp/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,32 @@
import pytest_container
import os
import logging
import sys

REGISTRY=os.environ['REGISTRY']
IMAGE_NAME=os.environ['IMAGE_NAME']
MODEL_NAME=os.environ['MODEL_NAME']

logging.info("""
Starting pytest with the following ENV vars:
REGISTRY: {REGISTRY}
IMAGE_NAME: {IMAGE_NAME}
MODEL_NAME: {MODEL_NAME}
For:
model_server: whispercpp
""".format(REGISTRY=REGISTRY, IMAGE_NAME=IMAGE_NAME, MODEL_NAME=MODEL_NAME))

MS = pytest_container.Container(
url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}",
url=f"containers-storage:{REGISTRY}/{IMAGE_NAME}",
volume_mounts=[
pytest_container.container.BindMount(
container_path="/locallm/models/model.gguf",
host_path=f"./model.gguf",
container_path="/app/models/{MODEL_NAME}".format(MODEL_NAME=MODEL_NAME),
host_path=f"./{MODEL_NAME}",
flags=["ro"]
)
],
extra_environment_variables={
"MODEL_PATH": "/locallm/models/model.gguf",
"MODEL_PATH": f"/app/models/{MODEL_NAME}".format(MODEL_NAME=MODEL_NAME),
"HOST": "0.0.0.0",
"PORT": "8001"
},
Expand Down
1 change: 0 additions & 1 deletion model_servers/whispercpp/tests/test_alive.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

CONTAINER_IMAGES = [MS]


def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
assert auto_container.connection.file("/etc/os-release").exists

Expand Down
9 changes: 0 additions & 9 deletions models/Containerfile

This file was deleted.

Loading

0 comments on commit 227eed4

Please sign in to comment.