Skip to content

Commit

Permalink
Merge branch 'master' into transformers_use_filesystem_model_if_exists
Browse files Browse the repository at this point in the history
  • Loading branch information
joshbtn authored Oct 10, 2024
2 parents aeeeacd + ea8675d commit 0b28007
Show file tree
Hide file tree
Showing 10 changed files with 218 additions and 23 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/deploy-explorer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
run: |
CGO_ENABLED=0 make build-api
- name: rm
uses: appleboy/ssh-action@v1.0.3
uses: appleboy/ssh-action@v1.1.0
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
Expand All @@ -53,7 +53,7 @@ jobs:
rm: true
target: ./local-ai
- name: restarting
uses: appleboy/ssh-action@v1.0.3
uses: appleboy/ssh-action@v1.1.0
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
Expand Down
18 changes: 9 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=d5cb86844f26f600c48bf3643738ea68138f961d
CPPLLAMA_VERSION?=c81f3bbb051f8b736e117dfc78c99d7c4e0450f6

# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=9f346d00840bcd7af62794871109841af40cecfb
WHISPER_CPP_VERSION?=fdbfb460ed546452a5d53611bba66d10d842e719

# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
Expand Down Expand Up @@ -470,38 +470,38 @@ run-e2e-image:

run-e2e-aio: protogen-go
@echo 'Running e2e AIO tests'
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio

test-e2e:
@echo 'Running e2e tests'
BUILD_TYPE=$(BUILD_TYPE) \
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e

teardown-e2e:
rm -rf $(TEST_DIR) || true
docker stop $$(docker ps -q --filter ancestor=localai-tests)

test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)

test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)

test-tts: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)

test-stablediffusion: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)

test-stores: backend-assets/grpc/local-store
mkdir -p tests/integration/backend-assets/grpc
cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration

test-container:
docker build --target requirements -t local-ai-test-container .
Expand Down
5 changes: 3 additions & 2 deletions core/http/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"os"
"path/filepath"
"runtime"
"strings"

"github.com/mudler/LocalAI/core/config"
. "github.com/mudler/LocalAI/core/http"
Expand Down Expand Up @@ -950,7 +951,7 @@ var _ = Describe("API test", func() {
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices) > 0).To(BeTrue())
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))

stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
Expand All @@ -969,7 +970,7 @@ var _ = Describe("API test", func() {
tokens++
}
Expect(text).ToNot(BeEmpty())
Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
Expect(strings.ToLower(text)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))

Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
})
Expand Down
2 changes: 1 addition & 1 deletion examples/chainlit/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
llama_index==0.11.14
llama_index==0.11.16
requests==2.32.3
weaviate_client==4.8.1
transformers
Expand Down
2 changes: 1 addition & 1 deletion examples/k8sgpt/broken-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
spec:
containers:
- name: broken-pod
image: nginx:1.27.0
image: nginx:1.27.2
livenessProbe:
httpGet:
path: /
Expand Down
4 changes: 2 additions & 2 deletions examples/langchain-chroma/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
langchain==0.3.1
openai==1.50.2
openai==1.51.1
chromadb==0.5.11
llama-index==0.11.14
llama-index==0.11.16
10 changes: 5 additions & 5 deletions examples/langchain/langchainpy-localai-example/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
aiohttp==3.10.8
aiohttp==3.10.9
aiosignal==1.3.1
async-timeout==4.0.3
attrs==24.2.0
certifi==2024.8.30
charset-normalizer==3.3.2
colorama==0.4.6
dataclasses-json==0.6.7
debugpy==1.8.2
debugpy==1.8.6
frozenlist==1.4.1
greenlet==3.1.1
idna==3.10
langchain==0.3.1
langchain==0.3.2
langchain-community==0.3.1
marshmallow==3.22.0
marshmallow-enum==1.5.1
multidict==6.0.5
multidict==6.1.0
mypy-extensions==1.0.0
numexpr==2.10.1
numpy==2.1.1
openai==1.45.1
openai==1.51.1
openapi-schema-pydantic==1.2.4
packaging>=23.2
pydantic==2.9.2
Expand Down
2 changes: 1 addition & 1 deletion examples/streamlit-bot/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
streamlit==1.38.0
streamlit==1.39.0
requests
66 changes: 66 additions & 0 deletions gallery/arch-function.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
---
name: "chatml"

config_file: |
mmap: true
function:
disable_no_action: true
grammar:
mixed_mode: false
disable: true
parallel_calls: true
expect_strings_after_json: true
json_regex_match:
- "(?s)<tool_call>(.*?)</tool_call>"
- "(?s)<tool_call>(.*)"
capture_llm_results:
- (?s)<scratchpad>(.*?)</scratchpad>
replace_llm_results:
- key: (?s)<scratchpad>(.*?)</scratchpad>
value: ""
template:
chat_message: |
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}<|im_end|>
function: |
<|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
<|im_end|>
{{.Input -}}
<|im_start|>assistant
chat: |
{{.Input -}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- '<|im_end|>'
- '<dummy32000>'
- '</s>'
- "<|eot_id|>"
- "<|end_of_text|>"
128 changes: 128 additions & 0 deletions gallery/index.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
---
- name: "moe-girl-1ba-7bt-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/kTXXSSSqpb21rfyOX7FUa.jpeg
# chatml
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
urls:
- https://huggingface.co/allura-org/MoE-Girl-1BA-7BT
- https://huggingface.co/mradermacher/MoE-Girl-1BA-7BT-i1-GGUF
description: |
A finetune of OLMoE by AllenAI designed for roleplaying (and maybe general usecases if you try hard enough).
PLEASE do not expect godliness out of this, it's a model with 1 billion active parameters. Expect something more akin to Gemma 2 2B, not Llama 3 8B.
overrides:
parameters:
model: MoE-Girl-1BA-7BT.i1-Q4_K_M.gguf
files:
- filename: MoE-Girl-1BA-7BT.i1-Q4_K_M.gguf
sha256: e6ef9c311c73573b243de6ff7538b386f430af30b2be0a96a5745c17137ad432
uri: huggingface://mradermacher/MoE-Girl-1BA-7BT-i1-GGUF/MoE-Girl-1BA-7BT.i1-Q4_K_M.gguf
- name: "salamandra-7b-instruct"
icon: https://huggingface.co/BSC-LT/salamandra-7b-instruct/resolve/main/images/salamandra_header.png
# Uses chatml
Expand Down Expand Up @@ -83,6 +100,49 @@
- filename: llama-3.2-1b-instruct-q8_0.gguf
sha256: ba345c83bf5cc679c653b853c46517eea5a34f03ed2205449db77184d9ae62a9
uri: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF/llama-3.2-1b-instruct-q8_0.gguf
## Uncensored
- !!merge <<: *llama32
icon: https://cdn-uploads.huggingface.co/production/uploads/66c9d7a26f2335ba288810a4/4YDg-rcEXCK0fdTS1fBzE.webp
name: "versatillama-llama-3.2-3b-instruct-abliterated"
urls:
- https://huggingface.co/QuantFactory/VersatiLlama-Llama-3.2-3B-Instruct-Abliterated-GGUF
description: |
Small but Smart Fine-Tuned on Vast dataset of Conversations. Able to Generate Human like text with high performance within its size. It is Very Versatile when compared for it's size and Parameters and offers capability almost as good as Llama 3.1 8B Instruct.
overrides:
parameters:
model: VersatiLlama-Llama-3.2-3B-Instruct-Abliterated.Q4_K_M.gguf
files:
- filename: VersatiLlama-Llama-3.2-3B-Instruct-Abliterated.Q4_K_M.gguf
sha256: 15b9e4a987f50d7594d030815c7166a996e20db46fe1e20da03e96955020312c
uri: huggingface://QuantFactory/VersatiLlama-Llama-3.2-3B-Instruct-Abliterated-GGUF/VersatiLlama-Llama-3.2-3B-Instruct-Abliterated.Q4_K_M.gguf
- !!merge <<: *llama32
name: "llama3.2-3b-enigma"
icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/it7MY5MyLCLpFQev5dUis.jpeg
urls:
- https://huggingface.co/QuantFactory/Llama3.2-3B-Enigma-GGUF
description: |
Enigma is a code-instruct model built on Llama 3.2 3b. It is a high quality code instruct model with the Llama 3.2 Instruct chat format. The model is finetuned on synthetic code-instruct data generated with Llama 3.1 405b and supplemented with generalist synthetic data. It uses the Llama 3.2 Instruct prompt format.
overrides:
parameters:
model: Llama3.2-3B-Enigma.Q4_K_M.gguf
files:
- filename: Llama3.2-3B-Enigma.Q4_K_M.gguf
sha256: 4304e6ee1e348b228470700ec1e9423f5972333d376295195ce6cd5c70cae5e4
uri: huggingface://QuantFactory/Llama3.2-3B-Enigma-GGUF/Llama3.2-3B-Enigma.Q4_K_M.gguf
- !!merge <<: *llama32
name: "llama3.2-3b-esper2"
icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/4I6oK8DG0so4VD8GroFsd.jpeg
urls:
- https://huggingface.co/QuantFactory/Llama3.2-3B-Esper2-GGUF
description: |
Esper 2 is a DevOps and cloud architecture code specialist built on Llama 3.2 3b. It is an AI assistant focused on AWS, Azure, GCP, Terraform, Dockerfiles, pipelines, shell scripts and more, with real world problem solving and high quality code instruct performance within the Llama 3.2 Instruct chat format. Finetuned on synthetic DevOps-instruct and code-instruct data generated with Llama 3.1 405b and supplemented with generalist chat data.
overrides:
parameters:
model: Llama3.2-3B-Esper2.Q4_K_M.gguf
files:
- filename: Llama3.2-3B-Esper2.Q4_K_M.gguf
sha256: 11d2bd674aa22a71a59ec49ad29b695000d14bc275b0195b8d7089bfc7582fc7
uri: huggingface://QuantFactory/Llama3.2-3B-Esper2-GGUF/Llama3.2-3B-Esper2.Q4_K_M.gguf
- &qwen25
## Qwen2.5
name: "qwen2.5-14b-instruct"
Expand Down Expand Up @@ -336,6 +396,59 @@
- filename: T.E-8.1-Q4_K_M-imat.gguf
sha256: 1b7892b82c01ea4cbebe34cd00f9836cbbc369fc3247c1f44a92842201e7ec0b
uri: huggingface://Lewdiculous/T.E-8.1-GGUF-IQ-Imatrix-Request/T.E-8.1-Q4_K_M-imat.gguf
- &archfunct
license: apache-2.0
tags:
- llm
- gguf
- gpu
- qwen
- qwen2.5
- cpu
- function-calling
name: "arch-function-1.5b"
uri: "github:mudler/LocalAI/gallery/arch-function.yaml@master"
urls:
- https://huggingface.co/katanemolabs/Arch-Function-1.5B
- https://huggingface.co/mradermacher/Arch-Function-1.5B-GGUF
description: |
The Katanemo Arch-Function collection of large language models (LLMs) is a collection state-of-the-art (SOTA) LLMs specifically designed for function calling tasks. The models are designed to understand complex function signatures, identify required parameters, and produce accurate function call outputs based on natural language prompts. Achieving performance on par with GPT-4, these models set a new benchmark in the domain of function-oriented tasks, making them suitable for scenarios where automated API interaction and function execution is crucial.
In summary, the Katanemo Arch-Function collection demonstrates:
State-of-the-art performance in function calling
Accurate parameter identification and suggestion, even in ambiguous or incomplete inputs
High generalization across multiple function calling use cases, from API interactions to automated backend tasks.
Optimized low-latency, high-throughput performance, making it suitable for real-time, production environments.
overrides:
parameters:
model: Arch-Function-1.5B.Q4_K_M.gguf
files:
- filename: Arch-Function-1.5B.Q4_K_M.gguf
sha256: 5ac54d2d50cca0ee0335ca2c9b688204c0829cd3a73de3ee3fda108281ad9691
uri: huggingface://mradermacher/Arch-Function-1.5B-GGUF/Arch-Function-1.5B.Q4_K_M.gguf
- !!merge <<: *archfunct
name: "arch-function-7b"
urls:
- https://huggingface.co/katanemolabs/Arch-Function-7B
- https://huggingface.co/mradermacher/Arch-Function-7B-GGUF
overrides:
parameters:
model: Arch-Function-7B.Q4_K_M.gguf
files:
- filename: Arch-Function-7B.Q4_K_M.gguf
sha256: 6e38661321d79d02b8cf57c79d97c6c0e19adb9ffa66083cc440c24e257234b6
uri: huggingface://mradermacher/Arch-Function-7B-GGUF/Arch-Function-7B.Q4_K_M.gguf
- !!merge <<: *archfunct
name: "arch-function-3b"
urls:
- https://huggingface.co/katanemolabs/Arch-Function-3B
- https://huggingface.co/mradermacher/Arch-Function-3B-GGUF
overrides:
parameters:
model: Arch-Function-3B.Q4_K_M.gguf
files:
- filename: Arch-Function-3B.Q4_K_M.gguf
sha256: 9945cb8d070498d163e5df90c1987f591d35e4fd2222a6c51bcfff848c4b573b
uri: huggingface://mradermacher/Arch-Function-3B-GGUF/Arch-Function-3B.Q4_K_M.gguf
- &smollm
## SmolLM
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
Expand Down Expand Up @@ -845,6 +958,21 @@
- filename: NightyGurps-14b-v1.1-Q4_K_M.gguf
sha256: d09d53259ad2c0298150fa8c2db98fe42f11731af89fdc80ad0e255a19adc4b0
uri: huggingface://bartowski/NightyGurps-14b-v1.1-GGUF/NightyGurps-14b-v1.1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "llama-3.1-swallow-70b-v0.1-i1"
icon: https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-70B-v0.1/resolve/main/logo.png
urls:
- https://huggingface.co/tokyotech-llm/Llama-3.1-Swallow-70B-v0.1
- https://huggingface.co/mradermacher/Llama-3.1-Swallow-70B-v0.1-i1-GGUF
description: |
Llama 3.1 Swallow is a series of large language models (8B, 70B) that were built by continual pre-training on the Meta Llama 3.1 models. Llama 3.1 Swallow enhanced the Japanese language capabilities of the original Llama 3.1 while retaining the English language capabilities. We use approximately 200 billion tokens that were sampled from a large Japanese web corpus (Swallow Corpus Version 2), Japanese and English Wikipedia articles, and mathematical and coding contents, etc (see the Training Datasets section) for continual pre-training. The instruction-tuned models (Instruct) were built by supervised fine-tuning (SFT) on the synthetic data specially built for Japanese. See the Swallow Model Index section to find other model variants.
overrides:
parameters:
model: Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
files:
- filename: Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
sha256: 9eaa08a4872a26f56fe34b27a99f7bd0d22ee2b2d1c84cfcde2091b5f61af5fa
uri: huggingface://mradermacher/Llama-3.1-Swallow-70B-v0.1-i1-GGUF/Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
## Uncensored models
- !!merge <<: *llama31
name: "humanish-roleplay-llama-3.1-8b-i1"
Expand Down

0 comments on commit 0b28007

Please sign in to comment.