diff --git a/.github/workflows/chatbot.yaml b/.github/workflows/chatbot.yaml index 25e2a710..14b726de 100644 --- a/.github/workflows/chatbot.yaml +++ b/.github/workflows/chatbot.yaml @@ -59,8 +59,8 @@ jobs: run: make install - name: Download model - working-directory: ./model_servers/llamacpp_python - run: make mistral + working-directory: ./recipes/natural_language_processing/${{ env.IMAGE_NAME }} + run: make download-model-mistral - name: Run Functional Tests shell: bash diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml index 84b1ae8f..4bbc893c 100644 --- a/.github/workflows/model_servers.yaml +++ b/.github/workflows/model_servers.yaml @@ -18,6 +18,7 @@ on: env: REGISTRY: ghcr.io + REGISTRY_ORG: containers jobs: build-and-push-image: @@ -82,7 +83,7 @@ jobs: - name: Download model working-directory: ./model_servers/${{ matrix.directory }}/ - run: make ${{ matrix.model }} + run: make download-model-${{ matrix.model }} - name: Set up Python uses: actions/setup-python@v5.0.0 @@ -96,16 +97,14 @@ jobs: - name: Run non-gpu tests working-directory: ./model_servers/${{ matrix.directory }}/ if: ${{ matrix.no_gpu }} - run: make test - env: - IMAGE_NAME: ${{ matrix.image_name }} + run: make test REGISTRY=${{ env.REGISTRY }} IMAGE_NAME=${{ env.REGISTRY_ORG }}/${{ matrix.image_name}}:latest - - name: Run cuda test - working-directory: ./model_servers/${{ matrix.directory }}/ - if: ${{ matrix.cuda }} - run: make test-cuda - env: - IMAGE_NAME: ${{ matrix.image_name }} + # - name: Run cuda test # we dont have cuda tests + # working-directory: ./model_servers/${{ matrix.directory }}/ + # if: ${{ matrix.cuda }} + # run: make test-cuda + # env: + # IMAGE_NAME: ${{ matrix.image_name }} - name: Login to Container Registry if: github.event_name == 'push' && github.ref == 'refs/heads/main' diff --git a/.gitignore b/.gitignore index 9bef8cac..b88c4f2a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,8 @@ port_check.lock *build models/* -model_servers/llamacpp_python/model.gguf -!models/convert_models/* -!models/Containerfile +!models/Makefile !models/README.md +convert_models/converted_models recipes/chromedriver +recipes/Google\ Chrome.app \ No newline at end of file diff --git a/model_servers/common/Makefile.common b/model_servers/common/Makefile.common new file mode 100644 index 00000000..ee249451 --- /dev/null +++ b/model_servers/common/Makefile.common @@ -0,0 +1,47 @@ +REGISTRY ?= quay.io +REGISTRY_ORG ?= ai-lab +COMPONENT ?= model_servers + +BIND_MOUNT_OPTIONS := ro +OS := $(shell uname -s) +ifeq ($(OS),Linux) + BIND_MOUNT_OPTIONS := Z,ro +endif + +.PHONY: build +build: + podman build --squash-all --build-arg $(PORT) -t $(IMAGE) . -f base/Containerfile + +.PHONY: install +install: + pip install -r tests/requirements.txt + +.PHONY: test +test: + @if [ ! -f "../../models/$(MODEL_NAME)" ]; then \ + echo "Model file -- $(MODEL_NAME) -- not present in the models directory."; \ + exit 1; \ + else \ + if [ ! -f "./$(MODEL_NAME)" ]; then \ + ln -s ../../models/$(MODEL_NAME) ./$(MODEL_NAME); \ + fi; \ + REGISTRY=$(REGISTRY) IMAGE_NAME=$(IMAGE_NAME) MODEL_NAME=$(MODEL_NAME) MODEL_PATH=$(MODEL_PATH) PORT=$(PORT) pytest -vvv -s ; \ + fi; + +.PHONY: clean +clean: + - rm ./$(MODEL_NAME) &> /dev/null + +.PHONY: run +run: + cd ../../models && \ + podman run -it -d -p $(PORT):$(PORT) -v ./$(MODEL_NAME):$(MODELS_PATH)/$(MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/$(MODEL_NAME) -e HOST=0.0.0.0 -e PORT=$(PORT) $(IMAGE) + +.PHONY: podman-clean +podman-clean: + @container_ids=$$(podman ps --format "{{.ID}} {{.Image}}" | awk '$$2 == "$(IMAGE)" {print $$1}'); \ + echo "removing all containers with IMAGE=$(IMAGE)"; \ + for id in $$container_ids; do \ + echo "Removing container: $$id,"; \ + podman rm -f $$id; \ + done diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile index 05fc58db..883d9f15 100644 --- a/model_servers/llamacpp_python/Makefile +++ b/model_servers/llamacpp_python/Makefile @@ -1,42 +1,18 @@ APP := llamacpp_python -IMAGE_BASE := llamacpp-python -PORT := 8001 +PORT ?= 8001 -IMAGE := quay.io/ai-lab/$(IMAGE_BASE):latest -CUDA_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-cuda:latest -VULKAN_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-vulkan:latest +include ../common/Makefile.common -# ----- MODEL OPTIONS ----- +IMAGE_NAME ?= $(REGISTRY_ORG)/$(COMPONENT)/$(APP):latest +IMAGE := $(REGISTRY)/$(IMAGE_NAME) +CUDA_IMAGE := $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/$(APP)_cuda:latest +VULKAN_IMAGE := $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/$(APP)_vulkan:latest -LLAMA_MODEL_NAME := llama-2-7b-chat.Q5_K_S.gguf -LLAMA_MODEL_URL := https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf - -TINY_LLAMA_MODEL_NAME := tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf -TINY_LLAMA_MODEL_URL := https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_K_S.gguf - -MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf -MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf - -# --- END MODEL OPTIONS --- - -SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(MISTRAL_MODEL_NAME)) -SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(MISTRAL_MODEL_URL)) - -RELATIVE_MODELS_PATH := ../../models MODELS_PATH := /locallm/models - -BIND_MOUNT_OPTIONS := ro -OS := $(shell uname -s) -ifeq ($(OS),Linux) - BIND_MOUNT_OPTIONS := ro,Z -endif +MODEL_NAME ?= mistral-7b-instruct-v0.1.Q4_K_M.gguf .Phony: all -all: build mistral run - -.PHONY: build -build: - podman build --squash-all -t $(IMAGE) . -f base/Containerfile +all: build download-model-mistral run .PHONY: build-cuda build-cuda: @@ -46,40 +22,7 @@ build-cuda: build-vulkan: podman build --squash-all -t $(VULKAN_IMAGE) . -f vulkan/Containerfile -.PHONY: download-model-tiny-llama -download-model-tiny-llama: - curl -H "Cache-Control: no-cache" -s -S -L -f $(TINY_LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(LLAMA_MODEL_NAME) - -.PHONY: download-model-llama -download-model-llama: - curl -H "Cache-Control: no-cache" -s -S -L -f $(LLAMA_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(TINY_LLAMA_MODEL_NAME) - - -.PHONY: mistral -mistral: - curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) - -.PHONY: install -install: - pip install -r tests/requirements.txt - -.PHONY: run -run: +.PHONY: download-model-mistral # default model +download-model-mistral: cd ../../models && \ - podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) $(IMAGE) - -# TODO: Add tests for llamacpp-cuda -# This never fails, placeholder for future test -.PHONY: run-cuda -run-cuda: - cd ../../models && \ - podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host --device nvidia.com/gpu=all $(IMAGE) || true - -# TODO: Add tests for llamacpp-cuda -.PHONY: test-cuda -test-cuda: run-cuda - -.PHONY: test -test: - curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf - pytest --log-cli-level NOTSET + make MODEL_NAME=mistral-7b-instruct-v0.1.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -f Makefile download-model diff --git a/model_servers/llamacpp_python/README.md b/model_servers/llamacpp_python/README.md index fccdd88e..dc463831 100644 --- a/model_servers/llamacpp_python/README.md +++ b/model_servers/llamacpp_python/README.md @@ -71,6 +71,7 @@ podman pull quay.io/ai-lab/llamacpp-python-vulkan ``` + ## Download Model(s) There are many models to choose from these days, most of which can be found on [huggingface.co](https://huggingface.co). In order to use a model with the llamacpp_python model server, it must be in GGUF format. You can either download pre-converted GGUF models directly or convert them yourself with the [model converter utility](../../convert_models/) available in this repo. @@ -81,26 +82,26 @@ Download URL: [https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/res Place all models in the [models](../../models/) directory. -You can use this snippet below to download models. +You can use this snippet below to download the default model: ```bash -cd ../../models -curl -sLO -cd model_servers/llamacpp_python +make -f Makefile download-model-mistral ``` -or: +Or you can use the generic `download-models` target from the `/models` directory to download any model file from huggingface: ```bash -make -f Makefile download-model-mistral -make -f Makefile download-model-llama +cd ../../models +make MODEL_NAME= MODEL_URL= -f Makefile download-model +# EX: make MODEL_NAME=mistral-7b-instruct-v0.1.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -f Makefile download-model ``` + ## Deploy Model Service ### Single Model Service: -To deploy the LLM server you must specify a volume mount `-v` where your models are stored on the host machine and the `MODEL_PATH` for your model of choice. The model_server is most easily deploy from calling the make command: `make -f Makefile run` +To deploy the LLM server you must specify a volume mount `-v` where your models are stored on the host machine and the `MODEL_PATH` for your model of choice. The model_server is most easily deploy from calling the make command: `make -f Makefile run`. Of course as with all our make calls you can pass any number of the following variables: `REGISTRY`, `IMAGE_NAME`, `MODEL_NAME`, `MODEL_PATH`, and `PORT`. ```bash podman run --rm -it \ diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py index 55ee11a8..6cafe06f 100644 --- a/model_servers/llamacpp_python/tests/conftest.py +++ b/model_servers/llamacpp_python/tests/conftest.py @@ -2,24 +2,53 @@ import os # For cuda, will add this to below Container: extra_launch_args=["--device", "nvidia.com/gpu=all"], +if not 'REGISTRY' in os.environ: + REGISTRY = 'ghcr.io' +else: + REGISTRY = os.environ['REGISTRY'] + +if not 'IMAGE_NAME' in os.environ: + IMAGE_NAME = 'containers/llamacpp_python:latest' +else: + IMAGE_NAME = os.environ['IMAGE_NAME'] + +if not 'MODEL_NAME' in os.environ: + MODEL_NAME = 'mistral-7b-instruct-v0.1.Q4_K_M.gguf' +else: + MODEL_NAME = os.environ['MODEL_NAME'] + +if not 'MODEL_PATH' in os.environ: + MODEL_PATH = "/locallm/models" +else: + MODEL_PATH = os.environ['MODEL_PATH'] + +if not 'PORT' in os.environ: + PORT = 8001 +else: + PORT = os.environ['PORT'] + try: + PORT = int(PORT) + except: + PORT = 8001 + MS = pytest_container.Container( - url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}", + url=f"containers-storage:{REGISTRY}/{IMAGE_NAME}", volume_mounts=[ pytest_container.container.BindMount( - container_path="/locallm/models/model.gguf", - host_path=f"./model.gguf", + container_path="{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME), + host_path=f"./{MODEL_NAME}", flags=["ro"] ) ], extra_environment_variables={ - "MODEL_PATH": "/locallm/models/model.gguf", + "MODEL_PATH": "{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME), "HOST": "0.0.0.0", - "PORT": "8001" + "PORT": f"{PORT}" }, forwarded_ports=[ pytest_container.PortForwarding( - container_port=8001, - host_port=8001 + container_port=PORT, + host_port=PORT ) ], ) diff --git a/model_servers/llamacpp_python/tests/test_alive.py b/model_servers/llamacpp_python/tests/test_alive.py index 226aac1c..b44e5467 100644 --- a/model_servers/llamacpp_python/tests/test_alive.py +++ b/model_servers/llamacpp_python/tests/test_alive.py @@ -1,6 +1,7 @@ import pytest_container from .conftest import MS import tenacity +import os CONTAINER_IMAGES = [MS] diff --git a/model_servers/whispercpp/Makefile b/model_servers/whispercpp/Makefile index 4657904b..37434dd2 100644 --- a/model_servers/whispercpp/Makefile +++ b/model_servers/whispercpp/Makefile @@ -1,57 +1,20 @@ -PORT := 8001 APP := whispercpp -IMAGE := quay.io/ai-lab/model_servers/$(APP):latest -CUDA_IMAGE := quay.io/ai-lab/model_servers/$(APP)_cuda:latest -VULKAN_IMAGE :=quay.io/ai-lab/model_servers/$(APP)_vulkan:latest +PORT ?= 8001 -# ----- MODEL OPTIONS ----- +include ../common/Makefile.common -WHISPER_SMALL_MODEL_NAME := ggml-small.bin -WHISPER_SMALL_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin - -WHISPER_BASE_MODEL_NAME := ggml-base.en.bin -WHISPER_BASE_MODEL_URL := https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin - -SELECTED_MODEL_NAME := $(or $(SELECTED_MODEL),$(WHISPER_SMALL_MODEL_NAME)) -SELECTED_MODEL_URL := $(or $(SELECTED_MODEL_LINK),$(WHISPER_SMALL_MODEL_URL)) - -# --- END MODEL OPTIONS --- +IMAGE_NAME ?= $(REGISTRY_ORG)/$(COMPONENT)/$(APP):latest +IMAGE ?= $(REGISTRY)/$(IMAGE_NAME) +# CUDA_IMAGE_NAME := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_cuda:latest +# VULKAN_IMAGE := $(REGISTRY)/$(BASE_IMAGE_NAME)/$(APP)_vulkan:latest MODELS_PATH := /app/models - -BIND_MOUNT_OPTIONS := ro -OS := $(shell uname -s) -ifeq ($(OS),Linux) - BIND_MOUNT_OPTIONS := Z,ro -endif +MODEL_NAME ?= ggml-small.bin .PHONY: all -all: build whisper-small run - -.PHONY: build -build: - podman build -t $(IMAGE) . -f Containerfile +all: build download-model-whisper-small run -.PHONY: whisper-small -whisper-small: +.PHONY: download-model-whisper-small # small .bin model type testing +download-model-whisper-small: cd ../../models && \ - curl -s -S -L -f $(WHISPER_SMALL_MODEL_URL) -z $(WHISPER_SMALL_MODEL_NAME) -o $(WHISPER_SMALL_MODEL_NAME).tmp && mv -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_SMALL_MODEL_NAME).tmp $(WHISPER_SMALL_MODEL_NAME) - -.PHONY: install -install: - pip install -r tests/requirements.txt - -.PHONY: download-model-whisper-base -download-model-whisper-base: - cd ../../models && \ - curl -s -S -L -f $(WHISPER_BASE_MODEL_URL) -z $(WHISPER_BASE_MODEL_NAME) -o $(WHISPER_BASE_MODEL_NAME).tmp && mv -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME) 2>/dev/null || rm -f $(WHISPER_BASE_MODEL_NAME).tmp $(WHISPER_BASE_MODEL_NAME) - -.PHONY: run -run: - cd ../../models && \ - podman run -d --rm -it -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/$(SELECTED_MODEL_NAME):$(BIND_MOUNT_OPTIONS) -e HOST=0.0.0.0 -e MODEL_PATH=$(MODELS_PATH)/$(SELECTED_MODEL_NAME) -e PORT=$(PORT) $(IMAGE) - -.PHONY: test -test: - curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf - pytest --log-cli-level NOTSET + make MODEL_NAME=ggml-small.bin MODEL_URL=https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin -f Makefile download-model diff --git a/model_servers/whispercpp/tests/conftest.py b/model_servers/whispercpp/tests/conftest.py index 380262b1..3d8159ac 100644 --- a/model_servers/whispercpp/tests/conftest.py +++ b/model_servers/whispercpp/tests/conftest.py @@ -1,24 +1,53 @@ import pytest_container import os +if not 'REGISTRY' in os.environ: + REGISTRY = 'ghcr.io' +else: + REGISTRY = os.environ['REGISTRY'] + +if not 'IMAGE_NAME' in os.environ: + IMAGE_NAME = 'containers/whispercpp:latest' +else: + IMAGE_NAME = os.environ['IMAGE_NAME'] + +if not 'MODEL_NAME' in os.environ: + MODEL_NAME = 'ggml-small.bin' +else: + MODEL_NAME = os.environ['MODEL_NAME'] + +if not 'MODEL_PATH' in os.environ: + MODEL_PATH = "/app/models" +else: + MODEL_PATH = os.environ['MODEL_PATH'] + +if not 'PORT' in os.environ: + PORT = 8001 +else: + PORT = os.environ['PORT'] + try: + PORT = int(PORT) + except: + PORT = 8001 + MS = pytest_container.Container( - url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}", + url=f"containers-storage:{REGISTRY}/{IMAGE_NAME}", volume_mounts=[ pytest_container.container.BindMount( - container_path="/locallm/models/model.gguf", - host_path=f"./model.gguf", + container_path="{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME), + host_path=f"./{MODEL_NAME}", flags=["ro"] ) ], extra_environment_variables={ - "MODEL_PATH": "/locallm/models/model.gguf", + "MODEL_PATH": "{MODEL_PATH}/{MODEL_NAME}".format(MODEL_PATH=MODEL_PATH, MODEL_NAME=MODEL_NAME), "HOST": "0.0.0.0", - "PORT": "8001" + "PORT": f"{PORT}" }, forwarded_ports=[ pytest_container.PortForwarding( - container_port=8001, - host_port=8001 + container_port=PORT, + host_port=PORT ) ], ) diff --git a/model_servers/whispercpp/tests/test_alive.py b/model_servers/whispercpp/tests/test_alive.py index fcad510a..226aac1c 100644 --- a/model_servers/whispercpp/tests/test_alive.py +++ b/model_servers/whispercpp/tests/test_alive.py @@ -4,7 +4,6 @@ CONTAINER_IMAGES = [MS] - def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData): assert auto_container.connection.file("/etc/os-release").exists diff --git a/models/Containerfile b/models/Containerfile index a49d39c0..ba2eccfd 100644 --- a/models/Containerfile +++ b/models/Containerfile @@ -8,8 +8,8 @@ FROM registry.access.redhat.com/ubi9/ubi-micro:9.3-13 # Can be substituted using the --build-arg defined above -ARG MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf +ARG MODEL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf # By default the Model Server container image uses the AI Model stored in the model/model.file file. WORKDIR /model -ADD $MODEL_URL /model/model.file +ADD $MODEL /model/model.file diff --git a/models/Makefile b/models/Makefile index d3f46a72..7ac4d312 100644 --- a/models/Makefile +++ b/models/Makefile @@ -1,6 +1,18 @@ -MODEL ?= https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -IMAGE ?= quay.io/ai-lab/llama:latest +MODEL_URL ?= https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf +MODEL_NAME ?= mistral-7b-instruct-v0.1.Q4_K_M.gguf + +REGISTRY ?= quay.io +REGISTRY_ORG ?= ai-lab +COMPONENT = models + +IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/$(COMPONENT)/llama:latest .PHONY: build build: - podman build $${MODEL:+--build-arg MODEL=$${MODEL}} -f Containerfile -t ${IMAGE} . + podman build $${MODEL_URL:+--build-arg MODEL=$${MODEL_URL}} -f Containerfile -t ${IMAGE} . + +.PHONY: download-model +download-model: + curl -H "Cache-Control: no-cache" -s -S -L -f $(MODEL_URL) -z $(MODEL_NAME) -o $(MODEL_NAME).tmp && \ + mv -f $(MODEL_NAME).tmp $(MODEL_NAME) 2>/dev/null || \ + rm -f $(MODEL_NAME).tmp $(MODEL_NAME) diff --git a/models/README.md b/models/README.md index f5cdd254..27e7d088 100644 --- a/models/README.md +++ b/models/README.md @@ -1,13 +1,19 @@ # Directory to store model files -The following suggested list of open models is available on huggingface.co. +The models directory stores models and provides automation around downloading models. -* https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -* https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf -* https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin +Want to try one of our tested models? Try or or all of the following: -You can easily build one of these models into a container image by executing - -``` -make MODEL=https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf IMAGE=your.registry.com/llama:latest +```bash +make -f Makefile download-model-llama +make -f Makefile download-model-tiny-llama +make -f Makefile download-model-mistral +make -f Makefile download-model-whisper-small +make -f Makefile download-model-whisper-base ``` + +Want to download and run a model you dont see listed? This is supported with the `MODEL_NAME` and `MODEL_URL` params: + +```bash +make -f Makefile download-model MODEL_URL=https://huggingface.co/andrewcanis/c4ai-command-r-v01-GGUF/resolve/main/c4ai-command-r-v01-Q4_K_S.gguf MODEL_NAME=c4ai-command-r-v01-Q4_K_S.gguf +``` \ No newline at end of file diff --git a/recipes/common/Makefile.common b/recipes/common/Makefile.common index 4a826ca8..712dde49 100644 --- a/recipes/common/Makefile.common +++ b/recipes/common/Makefile.common @@ -64,7 +64,8 @@ MODEL_NAME ?= $(MISTRAL_MODEL_NAME) .PHONY: download-model-mistral download-model-mistral: - curl -H "Cache-Control: no-cache" -s -S -L -f $(MISTRAL_MODEL_URL) -z $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) -o $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp && mv -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) 2>/dev/null || rm -f $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME).tmp $(RELATIVE_MODELS_PATH)/$(MISTRAL_MODEL_NAME) + cd ../../../models && \ + make MODEL_NAME=mistral-7b-instruct-v0.1.Q4_K_M.gguf MODEL_URL=https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -f Makefile download-model .PHONY: install install: @@ -146,7 +147,7 @@ install-chrome: elif [[ "$(OS)" == "Darwin" ]]; then \ open $(CHROME_DOWNLOAD_PATH); \ rm $(CHROME_DOWNLOAD_PATH); \ - mv /Volumes/Google\ Chrome/Google\ Chrome.app $(CHROMEDRIVER_INSTALLATION_PATH); \ + cp -r /Volumes/Google\ Chrome/Google\ Chrome.app $(CHROMEDRIVER_INSTALLATION_PATH); \ diskutil unmount /Volumes/Google\ Chrome; \ fi; diff --git a/recipes/natural_language_processing/tests/functional/conftest.py b/recipes/natural_language_processing/tests/functional/conftest.py index d024235f..fbc4be69 100644 --- a/recipes/natural_language_processing/tests/functional/conftest.py +++ b/recipes/natural_language_processing/tests/functional/conftest.py @@ -50,7 +50,7 @@ host_port=8501 ) ], - extra_launch_args=["--net=host"] + extra_launch_args=["--network=host"] ) def pytest_generate_tests(metafunc):