From 227eed46ebb67b515d9b1b0c17f1fee5de9a04c3 Mon Sep 17 00:00:00 2001 From: greg pereira Date: Sun, 7 Apr 2024 12:13:49 -0700 Subject: [PATCH] abstracting model downloads and file-normalization to models dir Signed-off-by: greg pereira --- .github/workflows/model_servers.yaml | 13 +--- .gitignore | 6 +- model_servers/common/Makefile.common | 38 +++++++++ model_servers/llamacpp_python/Makefile | 71 ++++++----------- model_servers/llamacpp_python/README.md | 11 +-- .../llamacpp_python/tests/conftest.py | 22 +++++- .../llamacpp_python/tests/test_alive.py | 1 - .../llamacpp_python/tooling_options.ipynb | 2 +- model_servers/whispercpp/Makefile | 62 ++++++--------- model_servers/whispercpp/tests/conftest.py | 23 +++++- model_servers/whispercpp/tests/test_alive.py | 1 - models/Containerfile | 9 --- models/Makefile | 77 +++++++++++++++++-- models/README.md | 22 ++++-- .../chatbot/tests/functional/conftest.py | 4 +- 15 files changed, 220 insertions(+), 142 deletions(-) create mode 100644 model_servers/common/Makefile.common delete mode 100644 models/Containerfile diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml index 1cec70af5..905f8b00d 100644 --- a/.github/workflows/model_servers.yaml +++ b/.github/workflows/model_servers.yaml @@ -47,17 +47,12 @@ jobs: - name: Build Image id: build_image - uses: redhat-actions/buildah-build@v2.13 - with: - image: ${{ env.REGISTRY }}/${{ matrix.image_name }} - platforms: linux/amd64, linux/arm64 - tags: latest - containerfiles: ./model_servers/${{ matrix.image_name }}/base/Containerfile - context: model_servers/${{ matrix.image_name }}/ + working-directory: ./model_servers/${{ matrix.image_name }}/ + run: make build - name: Download model working-directory: ./model_servers/${{ matrix.image_name }}/ - run: make ${{ matrix.model }} + run: make trigger-download-model-${{ matrix.model }} - name: Set up Python uses: actions/setup-python@v5.0.0 @@ -71,8 +66,6 @@ jobs: - name: Run tests working-directory: ./model_servers/${{ matrix.image_name }}/ run: make test - env: - IMAGE_NAME: ${{ matrix.image_name }} - name: Login to Container Registry if: github.event_name == 'push' && github.ref == 'refs/heads/main' diff --git a/.gitignore b/.gitignore index 1cb0446cf..efea70443 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ port_check.lock *build models/* -model_servers/llamacpp_python/model.gguf +model_servers/*/model.file !models/convert_models/* -!models/Containerfile -!models/README.md \ No newline at end of file +!models/README.md + diff --git a/model_servers/common/Makefile.common b/model_servers/common/Makefile.common new file mode 100644 index 000000000..e2a666812 --- /dev/null +++ b/model_servers/common/Makefile.common @@ -0,0 +1,38 @@ +REGISTRY ?= quay.io +BASE_IMAGE_NAME ?= ai-lab/model_servers + +BIND_MOUNT_OPTIONS := ro +OS := $(shell uname -s) +ifeq ($(OS),Linux) + BIND_MOUNT_OPTIONS := Z,ro +endif + +.PHONY: build +build: + podman build -t $(IMAGE) . -f base/Containerfile + +.PHONY: install +install: + pip install -r tests/requirements.txt + +.PHONY: run-test +run-test: + IMAGE_NAME=$(IMAGE_NAME) MODEL_NAME=$(MODEL_NAME) pytest --log-cli-level NOTSET + +.PHONY: test +test: + @if [ ! -f "../../models/$(MODEL_NAME)" ]; then \ + echo "Model file -- $(MODEL_NAME) -- not present in the models directory."; \ + exit 1; \ + else \ + if [ ! -f "./$(MODEL_NAME)" ]; then \ + ln -s ../../models/$(MODEL_NAME) ./$(MODEL_NAME); \ + fi; \ + $(MAKE) IMAGE_NAME=$(IMAGE_NAME) MODEL_NAME=$(MODEL_NAME) REGISTRY=$(REGISTRY) -k run-test clean; \ + fi; + +.PHONY: clean +clean: + - rm ./$(MODEL_NAME) &> /dev/null + + \ No newline at end of file diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile index c017559ec..def29dacb 100644 --- a/model_servers/llamacpp_python/Makefile +++ b/model_servers/llamacpp_python/Makefile @@ -1,41 +1,21 @@ APP := llamacpp_python -PORT := 8001 - -IMAGE := quay.io/ai-lab/model_servers/$(APP):latest -CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest -VULKAN_IMAGE := quay.io/ai-lab/model_servers/$(APP)_vulkan:latest - -# ----- MODEL OPTIONS ----- - -LLAMA_MODEL_NAME := llama-2-7b-chat.Q5_K_S.gguf -LLAMA_MODEL_URL := https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf +MODELS_PATH := /locallm/models -TINY_LLAMA_MODEL_NAME := tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf -TINY_LLAMA_MODEL_URL := https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf +REGISTRY ?= quay.io +PORT ?= 8001 -MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf -MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf +IMAGE_NAME ?= $(BASE_IMAGE_NAME)/$(APP):latest +IMAGE ?= $(REGISTRY)/$(IMAGE_NAME) -# --- END MODEL OPTIONS --- +MODEL_NAME ?= mistral-7b-instruct-v0.1.Q4_K_M.gguf -SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(MISTRAL_MODEL_NAME)) -SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(MISTRAL_MODEL_URL)) +include ../common/Makefile.common -RELATIVE_MODELS_PATH := ../../models -MODELS_PATH := /locallm/models - -BIND_MOUNT_OPTIONS := ro -OS := $(shell uname -s) -ifeq ($(OS),Linux) - BIND_MOUNT_OPTIONS := ro,Z -endif +CUDA_IMAGE_NAME := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_cuda:latest +VULKAN_IMAGE := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_vulkan:latest .Phony: all -all: build mistral run - -.PHONY: build -build: - podman build -t $(IMAGE) . -f base/Containerfile +all: build trigger-download-model-mistral run .PHONY: build-cuda build-cuda: @@ -45,29 +25,26 @@ build-cuda: build-vulkan: podman build -t $(VULKAN_IMAGE) . -f cuda/Containerfile -.PHONY: download-model-tiny-llama -download-model-tiny-llama: - curl -H "Cache-Control: no-cache" -s -S -L -f $(TINY_LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) +########################## TRIGGER DOWNLOAD MAKE TARGETS ########################## +### NOTE: you should not add these trigger make targets for new models. +# Adding new models should implemented in the [models directory](../../models). +# These are just provided here for bootstrapping and testing different model types. -.PHONY: download-model-llama -download-model-llama: - curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) +.PHONY: trigger-download-model-tiny-llama # small .gguf model for testing +trigger-download-model-tiny-llama: + cd ../../models && \ + make -f Makefile download-model-tiny-llama +.PHONY: trigger-download-model-mistral # default model +trigger-download-model-mistral: + cd ../../models && \ + make -f Makefile download-model-mistral -.PHONY: mistral -mistral: - curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) +############################ END DOWNLOAD MAKE TARGETS ############################ -.PHONY: install -install: - pip install -r tests/requirements.txt .PHONY: run run: cd ../../models && \ - podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE) + podman run --rm -d -it -p $(PORT):$(PORT) -v ./$(MODEL_NAME):$(MODELS_PATH)/$(MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/$(MODEL_NAME) -e PORT=$(PORT) --network=host $(IMAGE) -.PHONY: test -test: - curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf - pytest --log-cli-level NOTSET diff --git a/model_servers/llamacpp_python/README.md b/model_servers/llamacpp_python/README.md index 943f4440e..7b4b02e8f 100644 --- a/model_servers/llamacpp_python/README.md +++ b/model_servers/llamacpp_python/README.md @@ -27,20 +27,15 @@ At the time of this writing, 2 models are known to work with this service - **Mistral-7b** - Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf) -It is suggested you place models in the [models](../../models/) directory. As for retrieving them, either use `wget` to download them with the download links above, or call the model names from the Makefile. +It is suggested you place models in the [models](../../models/) directory. It is also recomended that you use the [models dir Makefile](../../models/Makefile) to view which models you can download and to download the models themselves: ```bash cd ../../models -curl -sLO +make -f Makefile download-model-mistral cd model_servers/llamacpp_python ``` -or: - -```bash -make -f Makefile download-model-mistral -make -f Makefile download-model-llama -``` +However we have also added makefile targets [in this directory's Makefile](./Makefile) which will in-turn call those targets, ex: `make -f Makefile download-model-mistral` ### Deploy Model Service diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py index 4cd3e203d..4c3193b63 100644 --- a/model_servers/llamacpp_python/tests/conftest.py +++ b/model_servers/llamacpp_python/tests/conftest.py @@ -1,17 +1,31 @@ import pytest_container import os +import logging + +REGISTRY=os.environ['REGISTRY'] +IMAGE_NAME=os.environ['IMAGE_NAME'] +MODEL_NAME=os.environ['MODEL_NAME'] + +logging.info(""" +Starting pytest with the following ENV vars: + REGISTRY: {REGISTRY} + IMAGE_NAME: {IMAGE_NAME} + MODEL_NAME: {MODEL_NAME} +For: + model_server: whispercpp +""".format(REGISTRY=REGISTRY, IMAGE_NAME=IMAGE_NAME, MODEL_NAME=MODEL_NAME)) MS = pytest_container.Container( - url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}", + url=f"containers-storage:{REGISTRY}/{IMAGE_NAME}", volume_mounts=[ pytest_container.container.BindMount( - container_path="/locallm/models/model.gguf", - host_path=f"./model.gguf", + container_path="/locallm/models/{MODEL_NAME}".format(MODEL_NAME=MODEL_NAME), + host_path=f"./{MODEL_NAME}", flags=["ro"] ) ], extra_environment_variables={ - "MODEL_PATH": "/locallm/models/model.gguf", + "MODEL_PATH": "/locallm/models/{MODEL_NAME}".format(MODEL_NAME=MODEL_NAME), "HOST": "0.0.0.0", "PORT": "8001" }, diff --git a/model_servers/llamacpp_python/tests/test_alive.py b/model_servers/llamacpp_python/tests/test_alive.py index fcad510a0..226aac1c0 100644 --- a/model_servers/llamacpp_python/tests/test_alive.py +++ b/model_servers/llamacpp_python/tests/test_alive.py @@ -4,7 +4,6 @@ CONTAINER_IMAGES = [MS] - def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData): assert auto_container.connection.file("/etc/os-release").exists diff --git a/model_servers/llamacpp_python/tooling_options.ipynb b/model_servers/llamacpp_python/tooling_options.ipynb index ebad2174c..18577f239 100644 --- a/model_servers/llamacpp_python/tooling_options.ipynb +++ b/model_servers/llamacpp_python/tooling_options.ipynb @@ -23,7 +23,7 @@ "This notebook assumes that the playground image is running locally. Once built, you can use the below to start the model service image. \n", "\n", "```bash\n", - "podman run -it -p 8000:8000 -v /locallm/models:/locallm/models:Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.gguf playground\n", + "podman run -it -p 8000:8000 -v /locallm/models:/locallm/models:Z -e MODEL_PATH=models/mistral-7b-instruct-v0.1.Q4_K_M.file playground\n", "```" ] }, diff --git a/model_servers/whispercpp/Makefile b/model_servers/whispercpp/Makefile index 4657904b1..be0b9ce91 100644 --- a/model_servers/whispercpp/Makefile +++ b/model_servers/whispercpp/Makefile @@ -1,57 +1,41 @@ -PORT := 8001 APP := whispercpp -IMAGE := quay.io/ai-lab/model_servers/$(APP):latest -CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest -VULKAN_IMAGE :=quay.io/ai-lab/model_servers/$(APP)_vulkan:latest - -# ----- MODEL OPTIONS ----- - -WHISPER_SMALL_MODEL_NAME := ggml-small.bin -WHISPER_SMALL_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin +MODELS_PATH := /app/models -WHISPER_BASE_MODEL_NAME := ggml-base.en.bin -WHISPER_BASE_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin +REGISTRY ?= quay.io +PORT ?= 8001 -SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(WHISPER_SMALL_MODEL_NAME)) -SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(WHISPER_SMALL_MODEL_URL)) +IMAGE_NAME ?= $(BASE_IMAGE_NAME)/$(APP):latest +IMAGE ?= $(REGISTRY)/$(IMAGE_NAME) -# --- END MODEL OPTIONS --- +MODEL_NAME ?= ggml-small.bin -MODELS_PATH := /app/models +include ../common/Makefile.common -BIND_MOUNT_OPTIONS := ro -OS := $(shell uname -s) -ifeq ($(OS),Linux) - BIND_MOUNT_OPTIONS := Z,ro -endif +# CUDA_IMAGE_NAME := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_cuda:latest +# VULKAN_IMAGE := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_vulkan:latest .PHONY: all -all: build whisper-small run +all: build trigger-download-model-whisper-small run -.PHONY: build -build: - podman build -t $(IMAGE) . -f Containerfile +########################## TRIGGER DOWNLOAD MAKE TARGETS ########################## +### NOTE: you should not add these trigger make targets for new models. +# Adding new models should implemented in the [models directory](../../models). +# These are just provided here for bootstrapping and testing different model types. -.PHONY: whisper-small -whisper-small: +.PHONY: trigger-download-model-whisper-small # small .bin model type testing +trigger-download-model-whisper-small: cd ../../models && \ - curl -s -S -L -f $(WHISPER_SMALL_MODEL_URL) -z $(WHISPER_SMALL_MODEL_NAME) -o $(WHISPER_SMALL_MODEL_NAME).tmp && mv -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME) - -.PHONY: install -install: - pip install -r tests/requirements.txt + make -f Makefile download-model-whisper-small -.PHONY: download-model-whisper-base -download-model-whisper-base: +.PHONY: trigger-download-model-whisper-base # default model +trigger-download-model-whisper-base: cd ../../models && \ - curl -s -S -L -f $(WHISPER_BASE_MODEL_URL) -z $(WHISPER_BASE_MODEL_NAME) -o $(WHISPER_BASE_MODEL_NAME).tmp && mv -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME) + make -f Makefile download-model-whisper-base + +############################ END DOWNLOAD MAKE TARGETS ############################ .PHONY: run run: cd ../../models && \ - podman run -d --rm -it -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/$(SELECTED_MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/$(SELECTED_MODEL_NAME) -e PORT=$(PORT) $(IMAGE) + podman run --rm -d -it -p $(PORT):$(PORT) -v ./$(MODEL_NAME):$(MODELS_PATH)/$(MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/$(MODEL_NAME) -e PORT=$(PORT) --network=host $(IMAGE) -.PHONY: test -test: - curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf - pytest --log-cli-level NOTSET diff --git a/model_servers/whispercpp/tests/conftest.py b/model_servers/whispercpp/tests/conftest.py index 4cd3e203d..d79205cab 100644 --- a/model_servers/whispercpp/tests/conftest.py +++ b/model_servers/whispercpp/tests/conftest.py @@ -1,17 +1,32 @@ import pytest_container import os +import logging +import sys + +REGISTRY=os.environ['REGISTRY'] +IMAGE_NAME=os.environ['IMAGE_NAME'] +MODEL_NAME=os.environ['MODEL_NAME'] + +logging.info(""" +Starting pytest with the following ENV vars: + REGISTRY: {REGISTRY} + IMAGE_NAME: {IMAGE_NAME} + MODEL_NAME: {MODEL_NAME} +For: + model_server: whispercpp +""".format(REGISTRY=REGISTRY, IMAGE_NAME=IMAGE_NAME, MODEL_NAME=MODEL_NAME)) MS = pytest_container.Container( - url=f"containers-storage:{os.environ['REGISTRY']}/{os.environ['IMAGE_NAME']}", + url=f"containers-storage:{REGISTRY}/{IMAGE_NAME}", volume_mounts=[ pytest_container.container.BindMount( - container_path="/locallm/models/model.gguf", - host_path=f"./model.gguf", + container_path="/app/models/{MODEL_NAME}".format(MODEL_NAME=MODEL_NAME), + host_path=f"./{MODEL_NAME}", flags=["ro"] ) ], extra_environment_variables={ - "MODEL_PATH": "/locallm/models/model.gguf", + "MODEL_PATH": f"/app/models/{MODEL_NAME}".format(MODEL_NAME=MODEL_NAME), "HOST": "0.0.0.0", "PORT": "8001" }, diff --git a/model_servers/whispercpp/tests/test_alive.py b/model_servers/whispercpp/tests/test_alive.py index fcad510a0..226aac1c0 100644 --- a/model_servers/whispercpp/tests/test_alive.py +++ b/model_servers/whispercpp/tests/test_alive.py @@ -4,7 +4,6 @@ CONTAINER_IMAGES = [MS] - def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData): assert auto_container.connection.file("/etc/os-release").exists diff --git a/models/Containerfile b/models/Containerfile deleted file mode 100644 index fefecb5be..000000000 --- a/models/Containerfile +++ /dev/null @@ -1,9 +0,0 @@ -#https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf -#https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -#https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf -#https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin -# podman build --build-arg MODEL_URL=https://... -t quay.io/yourimage . -FROM registry.access.redhat.com/ubi9/ubi-micro:9.3-13 -ARG MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -WORKDIR /model -ADD $MODEL_URL /model/model.file diff --git a/models/Makefile b/models/Makefile index 0953b54a9..6d9b4bdf1 100644 --- a/models/Makefile +++ b/models/Makefile @@ -1,6 +1,73 @@ -MODEL ?= MODEL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -IMAGE ?= quay.io/ai-lab/llama:latest +# Add entries below as follows: +# 1. find the model name and download link. +# 2. Create a make target passing that info the `download-model` target +# 3. Add a normalizer make target, this will prime a model by chaging its file name to `model.file` which is what gets mounted in our server images -.PHONY: build -build: - podman build --build-arg ${MODEL} -f Containerfile -t ${IMAGE} . +MODEL_NAME ?= $(MISTRAL_MODEL_NAME) +MODEL_URL ?= $(MISTRAL_MODEL_URL) + +### Factory targets + +.PHONY: download-model +download-model: + curl -H "Cache-Control: no-cache" -s -S -L -f $(MODEL_URL) -z $(MODEL_NAME) -o $(MODEL_NAME).tmp && \ + mv -f $(MODEL_NAME).tmp $(MODEL_NAME) 2>/dev/null || \ + rm -f $(MODEL_NAME).tmp $(MODEL_NAME) + +##################################################### MODEL ENTRIES ##################################################### + +### abreviated model name: llama + +LLAMA_MODEL_NAME := llama-2-7b-chat.Q5_K_S.gguf +LLAMA_MODEL_URL := https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf + +.PHONY: download-model-llama +download-model-llama: + $(MAKE) MODEL_URL=$(LLAMA_MODEL_URL) MODEL_NAME=$(LLAMA_MODEL_NAME) download-model + +######################################################################################################################### + +### abreviated model name: tiny-llama + +TINY_LLAMA_MODEL_NAME := tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf +TINY_LLAMA_MODEL_URL := https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q2_K.gguf + +.PHONY: download-model-tiny-llama +download-model-tiny-llama: + $(MAKE) MODEL_URL=$(TINY_LLAMA_MODEL_URL) MODEL_NAME=$(TINY_LLAMA_MODEL_NAME) download-model + +######################################################################################################################### + +### abreviated model name: mistral + +MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf +MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf + +.PHONY: download-model-mistral +download-model-mistral: + $(MAKE) MODEL_URL=$(MISTRAL_MODEL_URL) MODEL_NAME=$(MISTRAL_MODEL_NAME) download-model + + +######################################################################################################################### + +### abreviated model name: whisper-small + +WHISPER_SMALL_MODEL_NAME := ggml-small.bin +WHISPER_SMALL_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin + +.PHONY: download-model-whisper-small +download-model-whisper-small: + $(MAKE) MODEL_URL=$(WHISPER_SMALL_MODEL_URL) MODEL_NAME=$(WHISPER_SMALL_MODEL_NAME) download-model + +######################################################################################################################### + +### abreviated model name: whisper-base + +WHISPER_BASE_MODEL_NAME := ggml-base.en.bin +WHISPER_BASE_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin + +.PHONY: download-model-whisper-base +download-model-whisper-base: + $(MAKE) MODEL_URL=$(WHISPER_BASE_MODEL_URL) MODEL_NAME=$(WHISPER_BASE_MODEL_NAME) download-model + +################################################### END MODEL ENTRIES ################################################### diff --git a/models/README.md b/models/README.md index f5cdd2544..27e7d0887 100644 --- a/models/README.md +++ b/models/README.md @@ -1,13 +1,19 @@ # Directory to store model files -The following suggested list of open models is available on huggingface.co. +The models directory stores models and provides automation around downloading models. -* https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -* https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf -* https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin +Want to try one of our tested models? Try or or all of the following: -You can easily build one of these models into a container image by executing - -``` -make MODEL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf IMAGE=your.registry.com/llama:latest +```bash +make -f Makefile download-model-llama +make -f Makefile download-model-tiny-llama +make -f Makefile download-model-mistral +make -f Makefile download-model-whisper-small +make -f Makefile download-model-whisper-base ``` + +Want to download and run a model you dont see listed? This is supported with the `MODEL_NAME` and `MODEL_URL` params: + +```bash +make -f Makefile download-model MODEL_URL=https://huggingface.co/andrewcanis/c4ai-command-r-v01-GGUF/resolve/main/c4ai-command-r-v01-Q4_K_S.gguf MODEL_NAME=c4ai-command-r-v01-Q4_K_S.gguf +``` \ No newline at end of file diff --git a/recipes/natural_language_processing/chatbot/tests/functional/conftest.py b/recipes/natural_language_processing/chatbot/tests/functional/conftest.py index 81d1bd979..9be811034 100644 --- a/recipes/natural_language_processing/chatbot/tests/functional/conftest.py +++ b/recipes/natural_language_processing/chatbot/tests/functional/conftest.py @@ -22,7 +22,7 @@ host_port=8001 ) ], - extra_launch_args=["--net=host"] + extra_launch_args=["--network=host"] ) CB = pytest_container.Container( @@ -36,7 +36,7 @@ host_port=8501 ) ], - extra_launch_args=["--net=host"] + extra_launch_args=["--network=host"] ) def pytest_generate_tests(metafunc):