Merge branch 'master' into transformers_use_filesystem_model_if_exists

mudler · Oct 10, 2024 · 0b28007 · 0b28007
2 parents aeeeacd + ea8675d
commit 0b28007
Show file tree

Hide file tree

Showing 10 changed files with 218 additions and 23 deletions.
diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml
@@ -33,7 +33,7 @@ jobs:
         run: |
           CGO_ENABLED=0 make build-api
       - name: rm
-        uses: appleboy/ssh-action@v1.0.3
+        uses: appleboy/ssh-action@v1.1.0
         with:
             host: ${{ secrets.EXPLORER_SSH_HOST }}
             username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
             rm: true
             target: ./local-ai
       - name: restarting
-        uses: appleboy/ssh-action@v1.0.3
+        uses: appleboy/ssh-action@v1.1.0
         with:
             host: ${{ secrets.EXPLORER_SSH_HOST }}
             username: ${{ secrets.EXPLORER_SSH_USERNAME }}

diff --git a/Makefile b/Makefile
@@ -8,15 +8,15 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=d5cb86844f26f600c48bf3643738ea68138f961d
+CPPLLAMA_VERSION?=c81f3bbb051f8b736e117dfc78c99d7c4e0450f6
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=9f346d00840bcd7af62794871109841af40cecfb
+WHISPER_CPP_VERSION?=fdbfb460ed546452a5d53611bba66d10d842e719
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -470,38 +470,38 @@ run-e2e-image:
 
 run-e2e-aio: protogen-go
 	@echo 'Running e2e AIO tests'
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
 
 test-e2e:
 	@echo 'Running e2e tests'
 	BUILD_TYPE=$(BUILD_TYPE) \
 	LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
 
 teardown-e2e:
 	rm -rf $(TEST_DIR) || true
 	docker stop $$(docker ps -q --filter ancestor=localai-tests)
 
 test-llama: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
 
 test-llama-gguf: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
 
 test-tts: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r $(TEST_PATHS)
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
 
 test-stablediffusion: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
 
 test-stores: backend-assets/grpc/local-store
 	mkdir -p tests/integration/backend-assets/grpc
 	cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
 
 test-container:
 	docker build --target requirements -t local-ai-test-container .

diff --git a/core/http/app_test.go b/core/http/app_test.go
@@ -12,6 +12,7 @@ import (
 	"os"
 	"path/filepath"
 	"runtime"
+	"strings"
 
 	"github.com/mudler/LocalAI/core/config"
 	. "github.com/mudler/LocalAI/core/http"
@@ -950,7 +951,7 @@ var _ = Describe("API test", func() {
 					openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
 				Expect(err).ToNot(HaveOccurred())
 				Expect(len(resp.Choices) > 0).To(BeTrue())
-				Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
+				Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
 
 				stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
 				Expect(err).ToNot(HaveOccurred())
@@ -969,7 +970,7 @@ var _ = Describe("API test", func() {
 					tokens++
 				}
 				Expect(text).ToNot(BeEmpty())
-				Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
+				Expect(strings.ToLower(text)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
 
 				Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
 			})

diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt
@@ -1,4 +1,4 @@
-llama_index==0.11.14
+llama_index==0.11.16
 requests==2.32.3
 weaviate_client==4.8.1
 transformers

diff --git a/examples/k8sgpt/broken-pod.yaml b/examples/k8sgpt/broken-pod.yaml
@@ -5,7 +5,7 @@ metadata:
 spec:
   containers:
     - name: broken-pod
-      image: nginx:1.27.0
+      image: nginx:1.27.2
       livenessProbe:
         httpGet:
           path: /

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
 langchain==0.3.1
-openai==1.50.2
+openai==1.51.1
 chromadb==0.5.11
-llama-index==0.11.14
+llama-index==0.11.16
diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,24 +1,24 @@
-aiohttp==3.10.8
+aiohttp==3.10.9
 aiosignal==1.3.1
 async-timeout==4.0.3
 attrs==24.2.0
 certifi==2024.8.30
 charset-normalizer==3.3.2
 colorama==0.4.6
 dataclasses-json==0.6.7
-debugpy==1.8.2
+debugpy==1.8.6
 frozenlist==1.4.1
 greenlet==3.1.1
 idna==3.10
-langchain==0.3.1
+langchain==0.3.2
 langchain-community==0.3.1
 marshmallow==3.22.0
 marshmallow-enum==1.5.1
-multidict==6.0.5
+multidict==6.1.0
 mypy-extensions==1.0.0
 numexpr==2.10.1
 numpy==2.1.1
-openai==1.45.1
+openai==1.51.1
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.9.2

diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt
@@ -1,2 +1,2 @@
-streamlit==1.38.0
+streamlit==1.39.0
 requests
diff --git a/gallery/arch-function.yaml b/gallery/arch-function.yaml
@@ -0,0 +1,66 @@
+---
+name: "chatml"
+
+config_file: |
+  mmap: true
+  function:
+    disable_no_action: true
+    grammar:
+      mixed_mode: false
+      disable: true
+      parallel_calls: true
+      expect_strings_after_json: true
+    json_regex_match:
+    - "(?s)<tool_call>(.*?)</tool_call>"
+    - "(?s)<tool_call>(.*)"
+    capture_llm_results:
+      - (?s)<scratchpad>(.*?)</scratchpad>
+    replace_llm_results:
+      - key: (?s)<scratchpad>(.*?)</scratchpad>
+        value: ""
+  template:
+    chat_message: |
+      <|im_start|>{{ .RoleName }}
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content }}
+      {{ end -}}
+      {{ if .FunctionCall -}}
+      {{toJson .FunctionCall}}
+      {{ end -}}<|im_end|>
+    function: |
+      <|im_start|>system
+      # Tools
+
+      You may call one or more functions to assist with the user query.
+
+      You are provided with function signatures within <tools></tools> XML tags:
+      <tools>
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      </tools>
+      For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+      <tool_call>
+      {"name": <function-name>, "arguments": <args-json-object>}
+      </tool_call>
+      <|im_end|>
+      {{.Input -}}
+      <|im_start|>assistant
+    chat: |
+      {{.Input -}}
+      <|im_start|>assistant
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - '<|im_end|>'
+  - '<dummy32000>'
+  - '</s>'
+  - "<|eot_id|>"
+  - "<|end_of_text|>"
diff --git a/gallery/index.yaml b/gallery/index.yaml
@@ -1,4 +1,21 @@
 ---
+- name: "moe-girl-1ba-7bt-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/kTXXSSSqpb21rfyOX7FUa.jpeg
+  # chatml
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/allura-org/MoE-Girl-1BA-7BT
+    - https://huggingface.co/mradermacher/MoE-Girl-1BA-7BT-i1-GGUF
+  description: |
+    A finetune of OLMoE by AllenAI designed for roleplaying (and maybe general usecases if you try hard enough).
+    PLEASE do not expect godliness out of this, it's a model with 1 billion active parameters. Expect something more akin to Gemma 2 2B, not Llama 3 8B.
+  overrides:
+    parameters:
+      model: MoE-Girl-1BA-7BT.i1-Q4_K_M.gguf
+  files:
+    - filename: MoE-Girl-1BA-7BT.i1-Q4_K_M.gguf
+      sha256: e6ef9c311c73573b243de6ff7538b386f430af30b2be0a96a5745c17137ad432
+      uri: huggingface://mradermacher/MoE-Girl-1BA-7BT-i1-GGUF/MoE-Girl-1BA-7BT.i1-Q4_K_M.gguf
 - name: "salamandra-7b-instruct"
   icon: https://huggingface.co/BSC-LT/salamandra-7b-instruct/resolve/main/images/salamandra_header.png
   # Uses chatml
@@ -83,6 +100,49 @@
     - filename: llama-3.2-1b-instruct-q8_0.gguf
       sha256: ba345c83bf5cc679c653b853c46517eea5a34f03ed2205449db77184d9ae62a9
       uri: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF/llama-3.2-1b-instruct-q8_0.gguf
+## Uncensored
+- !!merge <<: *llama32
+  icon: https://cdn-uploads.huggingface.co/production/uploads/66c9d7a26f2335ba288810a4/4YDg-rcEXCK0fdTS1fBzE.webp
+  name: "versatillama-llama-3.2-3b-instruct-abliterated"
+  urls:
+    - https://huggingface.co/QuantFactory/VersatiLlama-Llama-3.2-3B-Instruct-Abliterated-GGUF
+  description: |
+    Small but Smart Fine-Tuned on Vast dataset of Conversations. Able to Generate Human like text with high performance within its size. It is Very Versatile when compared for it's size and Parameters and offers capability almost as good as Llama 3.1 8B Instruct.
+  overrides:
+    parameters:
+      model: VersatiLlama-Llama-3.2-3B-Instruct-Abliterated.Q4_K_M.gguf
+  files:
+    - filename: VersatiLlama-Llama-3.2-3B-Instruct-Abliterated.Q4_K_M.gguf
+      sha256: 15b9e4a987f50d7594d030815c7166a996e20db46fe1e20da03e96955020312c
+      uri: huggingface://QuantFactory/VersatiLlama-Llama-3.2-3B-Instruct-Abliterated-GGUF/VersatiLlama-Llama-3.2-3B-Instruct-Abliterated.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama3.2-3b-enigma"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/it7MY5MyLCLpFQev5dUis.jpeg
+  urls:
+    - https://huggingface.co/QuantFactory/Llama3.2-3B-Enigma-GGUF
+  description: |
+    Enigma is a code-instruct model built on Llama 3.2 3b. It is a high quality code instruct model with the Llama 3.2 Instruct chat format. The model is finetuned on synthetic code-instruct data generated with Llama 3.1 405b and supplemented with generalist synthetic data. It uses the Llama 3.2 Instruct prompt format.
+  overrides:
+    parameters:
+      model: Llama3.2-3B-Enigma.Q4_K_M.gguf
+  files:
+    - filename: Llama3.2-3B-Enigma.Q4_K_M.gguf
+      sha256: 4304e6ee1e348b228470700ec1e9423f5972333d376295195ce6cd5c70cae5e4
+      uri: huggingface://QuantFactory/Llama3.2-3B-Enigma-GGUF/Llama3.2-3B-Enigma.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama3.2-3b-esper2"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/4I6oK8DG0so4VD8GroFsd.jpeg
+  urls:
+    - https://huggingface.co/QuantFactory/Llama3.2-3B-Esper2-GGUF
+  description: |
+    Esper 2 is a DevOps and cloud architecture code specialist built on Llama 3.2 3b. It is an AI assistant focused on AWS, Azure, GCP, Terraform, Dockerfiles, pipelines, shell scripts and more, with real world problem solving and high quality code instruct performance within the Llama 3.2 Instruct chat format. Finetuned on synthetic DevOps-instruct and code-instruct data generated with Llama 3.1 405b and supplemented with generalist chat data.
+  overrides:
+    parameters:
+      model: Llama3.2-3B-Esper2.Q4_K_M.gguf
+  files:
+    - filename: Llama3.2-3B-Esper2.Q4_K_M.gguf
+      sha256: 11d2bd674aa22a71a59ec49ad29b695000d14bc275b0195b8d7089bfc7582fc7
+      uri: huggingface://QuantFactory/Llama3.2-3B-Esper2-GGUF/Llama3.2-3B-Esper2.Q4_K_M.gguf
 - &qwen25
   ## Qwen2.5
   name: "qwen2.5-14b-instruct"
@@ -336,6 +396,59 @@
     - filename: T.E-8.1-Q4_K_M-imat.gguf
       sha256: 1b7892b82c01ea4cbebe34cd00f9836cbbc369fc3247c1f44a92842201e7ec0b
       uri: huggingface://Lewdiculous/T.E-8.1-GGUF-IQ-Imatrix-Request/T.E-8.1-Q4_K_M-imat.gguf
+- &archfunct
+  license: apache-2.0
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - qwen
+    - qwen2.5
+    - cpu
+    - function-calling
+  name: "arch-function-1.5b"
+  uri: "github:mudler/LocalAI/gallery/arch-function.yaml@master"
+  urls:
+    - https://huggingface.co/katanemolabs/Arch-Function-1.5B
+    - https://huggingface.co/mradermacher/Arch-Function-1.5B-GGUF
+  description: |
+    The Katanemo Arch-Function collection of large language models (LLMs) is a collection state-of-the-art (SOTA) LLMs specifically designed for function calling tasks. The models are designed to understand complex function signatures, identify required parameters, and produce accurate function call outputs based on natural language prompts. Achieving performance on par with GPT-4, these models set a new benchmark in the domain of function-oriented tasks, making them suitable for scenarios where automated API interaction and function execution is crucial.
+    In summary, the Katanemo Arch-Function collection demonstrates:
+        State-of-the-art performance in function calling
+        Accurate parameter identification and suggestion, even in ambiguous or incomplete inputs
+        High generalization across multiple function calling use cases, from API interactions to automated backend tasks.
+        Optimized low-latency, high-throughput performance, making it suitable for real-time, production environments.
+  overrides:
+    parameters:
+      model: Arch-Function-1.5B.Q4_K_M.gguf
+  files:
+    - filename: Arch-Function-1.5B.Q4_K_M.gguf
+      sha256: 5ac54d2d50cca0ee0335ca2c9b688204c0829cd3a73de3ee3fda108281ad9691
+      uri: huggingface://mradermacher/Arch-Function-1.5B-GGUF/Arch-Function-1.5B.Q4_K_M.gguf
+- !!merge <<: *archfunct
+  name: "arch-function-7b"
+  urls:
+    - https://huggingface.co/katanemolabs/Arch-Function-7B
+    - https://huggingface.co/mradermacher/Arch-Function-7B-GGUF
+  overrides:
+    parameters:
+      model: Arch-Function-7B.Q4_K_M.gguf
+  files:
+    - filename: Arch-Function-7B.Q4_K_M.gguf
+      sha256: 6e38661321d79d02b8cf57c79d97c6c0e19adb9ffa66083cc440c24e257234b6
+      uri: huggingface://mradermacher/Arch-Function-7B-GGUF/Arch-Function-7B.Q4_K_M.gguf
+- !!merge <<: *archfunct
+  name: "arch-function-3b"
+  urls:
+    - https://huggingface.co/katanemolabs/Arch-Function-3B
+    - https://huggingface.co/mradermacher/Arch-Function-3B-GGUF
+  overrides:
+    parameters:
+      model: Arch-Function-3B.Q4_K_M.gguf
+  files:
+    - filename: Arch-Function-3B.Q4_K_M.gguf
+      sha256: 9945cb8d070498d163e5df90c1987f591d35e4fd2222a6c51bcfff848c4b573b
+      uri: huggingface://mradermacher/Arch-Function-3B-GGUF/Arch-Function-3B.Q4_K_M.gguf
 - &smollm
   ## SmolLM
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -845,6 +958,21 @@
     - filename: NightyGurps-14b-v1.1-Q4_K_M.gguf
       sha256: d09d53259ad2c0298150fa8c2db98fe42f11731af89fdc80ad0e255a19adc4b0
       uri: huggingface://bartowski/NightyGurps-14b-v1.1-GGUF/NightyGurps-14b-v1.1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-swallow-70b-v0.1-i1"
+  icon: https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-70B-v0.1/resolve/main/logo.png
+  urls:
+    - https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-70B-v0.1
+    - https://huggingface.co/mradermacher/Llama-3.1-Swallow-70B-v0.1-i1-GGUF
+  description: |
+    Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models. Llama 3.1 Swallow enhanced the Japanese language capabilities of the original Llama 3.1 while retaining the English language capabilities. We use approximately 200 billion tokens that were sampled from a large Japanese web corpus (Swallow Corpus Version 2), Japanese and English Wikipedia articles, and mathematical and coding contents, etc (see the Training Datasets section) for continual pre-training. The instruction-tuned models (Instruct) were built by supervised fine-tuning (SFT) on the synthetic data specially built for Japanese. See the Swallow Model Index section to find other model variants.
+  overrides:
+    parameters:
+      model: Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
+      sha256: 9eaa08a4872a26f56fe34b27a99f7bd0d22ee2b2d1c84cfcde2091b5f61af5fa
+      uri: huggingface://mradermacher/Llama-3.1-Swallow-70B-v0.1-i1-GGUF/Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "humanish-roleplay-llama-3.1-8b-i1"