Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] master from mudler:master #96

Merged
merged 9 commits into from
Sep 8, 2024
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=581c305186a0ff93f360346c57e21fe16e967bb7
CPPLLAMA_VERSION?=815b1fb20a53e439882171757825bacb1350de04

# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=5236f0278420ab776d1787c4330678d80219b4b6
WHISPER_CPP_VERSION?=5caa19240d55bfd6ee316d50fbad32c6e9c39528

# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
Expand Down
11 changes: 11 additions & 0 deletions core/http/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,17 @@ var _ = Describe("API test", func() {
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error:"))
})

It("shows the external backend", func() {
// do an http request to the /system endpoint
resp, err := http.Get("http://127.0.0.1:9090/system")
Expect(err).ToNot(HaveOccurred())
Expect(resp.StatusCode).To(Equal(200))
dat, err := io.ReadAll(resp.Body)
Expect(err).ToNot(HaveOccurred())
Expect(string(dat)).To(ContainSubstring("huggingface"))
Expect(string(dat)).To(ContainSubstring("llama-cpp"))
})

It("transcribes audio", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
Expand Down
29 changes: 29 additions & 0 deletions core/http/endpoints/localai/system.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package localai

import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
)

// SystemInformations returns the system informations
// @Summary Show the LocalAI instance information
// @Success 200 {object} schema.SystemInformationResponse "Response"
// @Router /system [get]
func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
return func(c *fiber.Ctx) error {
availableBackends, err := ml.ListAvailableBackends(appConfig.AssetsDestination)
if err != nil {
return err
}
for b := range appConfig.ExternalGRPCBackends {
availableBackends = append(availableBackends, b)
}
return c.JSON(
schema.SystemInformationResponse{
Backends: availableBackends,
},
)
}
}
2 changes: 2 additions & 0 deletions core/http/routes/localai.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,6 @@ func RegisterLocalAIRoutes(app *fiber.App,
}{Version: internal.PrintableVersion()})
})

app.Get("/system", auth, localai.SystemInformations(ml, appConfig))

}
4 changes: 4 additions & 0 deletions core/schema/localai.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,7 @@ type P2PNodesResponse struct {
Nodes []p2p.NodeData `json:"nodes" yaml:"nodes"`
FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
}

type SystemInformationResponse struct {
Backends []string `json:"backends"`
}
132 changes: 130 additions & 2 deletions gallery/index.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,22 @@
- filename: Hubble-4B-v1-Q4_K_M.gguf
uri: huggingface://TheDrummer/Hubble-4B-v1-GGUF/Hubble-4B-v1-Q4_K_M.gguf
sha256: 0721294d0e861c6e6162a112fc7242e0c4b260c156137f4bcbb08667f1748080
- !!merge <<: *llama31
name: "reflection-llama-3.1-70b"
urls:
- https://huggingface.co/leafspark/Reflection-Llama-3.1-70B-bf16
- https://huggingface.co/senseable/Reflection-Llama-3.1-70B-gguf
description: |
Reflection Llama-3.1 70B is (currently) the world's top open-source LLM, trained with a new technique called Reflection-Tuning that teaches a LLM to detect mistakes in its reasoning and correct course.

The model was trained on synthetic data generated by Glaive. If you're training a model, Glaive is incredible — use them.
overrides:
parameters:
model: Reflection-Llama-3.1-70B-q4_k_m.gguf
files:
- filename: Reflection-Llama-3.1-70B-q4_k_m.gguf
sha256: 16064e07037883a750cfeae9a7be41143aa857dbac81c2e93c68e2f941dee7b2
uri: huggingface://senseable/Reflection-Llama-3.1-70B-gguf/Reflection-Llama-3.1-70B-q4_k_m.gguf
## Uncensored models
- !!merge <<: *llama31
name: "humanish-roleplay-llama-3.1-8b-i1"
Expand Down Expand Up @@ -1692,6 +1708,20 @@
- filename: shieldgemma-9b.i1-Q4_K_M.gguf
sha256: ffa7eaadcc0c7d0544fda5b0d86bba3ffa3431b673e5b2135f421cfe65bd8732
uri: huggingface://mradermacher/shieldgemma-9b-i1-GGUF/shieldgemma-9b.i1-Q4_K_M.gguf
- !!merge <<: *gemma
name: "athena-codegemma-2-2b-it"
urls:
- https://huggingface.co/EpistemeAI/Athena-codegemma-2-2b-it
- https://huggingface.co/mradermacher/Athena-codegemma-2-2b-it-GGUF
description: |
Supervised fine tuned (sft unsloth) for coding with EpistemeAI coding dataset.
overrides:
parameters:
model: Athena-codegemma-2-2b-it.Q4_K_M.gguf
files:
- filename: Athena-codegemma-2-2b-it.Q4_K_M.gguf
sha256: 59ce17023438b0da603dd211c7d39f78e7acac4108258ac0818a97a4ca7d64e3
uri: huggingface://mradermacher/Athena-codegemma-2-2b-it-GGUF/Athena-codegemma-2-2b-it.Q4_K_M.gguf
- &llama3
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
Expand Down Expand Up @@ -1724,7 +1754,7 @@
files:
- filename: Meta-Llama-3-8B-Instruct.Q4_0.gguf
uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_0.gguf
sha256: 18c8eb909db870d456a823700b4c82f6259e6052899f0ebf2bddc9b2417cd355
sha256: 2b4675c2208f09ad8762d8cf1b6a4a26bf65e6f0641aba324ec65143c0b4ad9f
- !!merge <<: *llama3
name: "llama3-8b-instruct:Q6_K"
overrides:
Expand All @@ -1733,7 +1763,7 @@
files:
- filename: Meta-Llama-3-8B-Instruct.Q6_K.gguf
uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q6_K.gguf
sha256: 67f8eb2218938a5fd711605d526d2287e9a4ad26849efdf3bf7c0c17dcbde018
sha256: bd7efd73f9fb67e4b9ecc43f861f37c7e594e78a8a5ff9c29da021692bd243ef
- !!merge <<: *llama3
name: "llama-3-8b-instruct-abliterated"
urls:
Expand Down Expand Up @@ -3193,6 +3223,104 @@
- filename: magnum-v3-34b-Q4_K_M.gguf
sha256: f902956c0731581f1ff189e547e6e5aad86b77af5f4dc7e4fc26bcda5c1f7cc3
uri: huggingface://bartowski/magnum-v3-34b-GGUF/magnum-v3-34b-Q4_K_M.gguf
- !!merge <<: *yi-chat
name: "yi-coder-9b-chat"
urls:
- https://huggingface.co/01-ai/Yi-Coder-9B-Chat
- https://huggingface.co/bartowski/Yi-Coder-9B-Chat-GGUF
- https://01-ai.github.io/
- https://github.com/01-ai/Yi-Coder
description: |
Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
Key features:

Excelling in long-context understanding with a maximum context length of 128K tokens.
Supporting 52 major programming languages:

'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'

For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
overrides:
parameters:
model: Yi-Coder-9B-Chat-Q4_K_M.gguf
files:
- filename: Yi-Coder-9B-Chat-Q4_K_M.gguf
sha256: 251cc196e3813d149694f362bb0f8f154f3320abe44724eebe58c23dc54f201d
uri: huggingface://bartowski/Yi-Coder-9B-Chat-GGUF/Yi-Coder-9B-Chat-Q4_K_M.gguf
- !!merge <<: *yi-chat
name: "yi-coder-1.5b-chat"
urls:
- https://huggingface.co/01-ai/Yi-Coder-1.5B-Chat
- https://huggingface.co/MaziyarPanahi/Yi-Coder-1.5B-Chat-GGUF
- https://01-ai.github.io/
- https://github.com/01-ai/Yi-Coder
description: |
Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
Key features:

Excelling in long-context understanding with a maximum context length of 128K tokens.
Supporting 52 major programming languages:

'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'

For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
overrides:
parameters:
model: Yi-Coder-1.5B-Chat.Q4_K_M.gguf
files:
- filename: Yi-Coder-1.5B-Chat.Q4_K_M.gguf
sha256: e2e8fa659cd75c828d7783b5c2fb60d220e08836065901fad8edb48e537c1cec
uri: huggingface://MaziyarPanahi/Yi-Coder-1.5B-Chat-GGUF/Yi-Coder-1.5B-Chat.Q4_K_M.gguf
- !!merge <<: *yi-chat
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
name: "yi-coder-1.5b"
urls:
- https://huggingface.co/01-ai/Yi-Coder-1.5B
- https://huggingface.co/QuantFactory/Yi-Coder-1.5B-GGUF
- https://01-ai.github.io/
- https://github.com/01-ai/Yi-Coder
description: |
Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
Key features:

Excelling in long-context understanding with a maximum context length of 128K tokens.
Supporting 52 major programming languages:

'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'

For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
overrides:
parameters:
model: Yi-Coder-1.5B.Q4_K_M.gguf
files:
- filename: Yi-Coder-1.5B.Q4_K_M.gguf
sha256: 86a280dd36c9b2342b7023532f9c2c287e251f5cd10bc81ca262db8c1668f272
uri: huggingface://QuantFactory/Yi-Coder-1.5B-GGUF/Yi-Coder-1.5B.Q4_K_M.gguf
- !!merge <<: *yi-chat
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
name: "yi-coder-9b"
urls:
- https://huggingface.co/01-ai/Yi-Coder-9B
- https://huggingface.co/QuantFactory/Yi-Coder-9B-GGUF
- https://01-ai.github.io/
- https://github.com/01-ai/Yi-Coder
description: |
Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
Key features:

Excelling in long-context understanding with a maximum context length of 128K tokens.
Supporting 52 major programming languages:

'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'

For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
overrides:
parameters:
model: Yi-Coder-9B.Q4_K_M.gguf
files:
- filename: Yi-Coder-9B.Q4_K_M.gguf
sha256: cff3db8a69c43654e3c2d2984e86ad2791d1d446ec56b24a636ba1ce78363308
uri: huggingface://QuantFactory/Yi-Coder-9B-GGUF/Yi-Coder-9B.Q4_K_M.gguf
- &vicuna-chat
## LLama2 and derivatives
### Start Fimbulvetr
Expand Down
4 changes: 4 additions & 0 deletions pkg/model/initializers.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
}
}

func (ml *ModelLoader) ListAvailableBackends(assetdir string) ([]string, error) {
return backendsInAssetDir(assetdir)
}

func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err error) {
o := NewOptions(opts...)

Expand Down
24 changes: 24 additions & 0 deletions swagger/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,19 @@ const docTemplate = `{
}
}
},
"/system": {
"get": {
"summary": "Show the LocalAI instance information",
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.SystemInformationResponse"
}
}
}
}
},
"/tts": {
"post": {
"consumes": [
Expand Down Expand Up @@ -1649,6 +1662,17 @@ const docTemplate = `{
}
}
},
"schema.SystemInformationResponse": {
"type": "object",
"properties": {
"backends": {
"type": "array",
"items": {
"type": "string"
}
}
}
},
"schema.TTSRequest": {
"description": "TTS request body",
"type": "object",
Expand Down
24 changes: 24 additions & 0 deletions swagger/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,19 @@
}
}
},
"/system": {
"get": {
"summary": "Show the LocalAI instance information",
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.SystemInformationResponse"
}
}
}
}
},
"/tts": {
"post": {
"consumes": [
Expand Down Expand Up @@ -1642,6 +1655,17 @@
}
}
},
"schema.SystemInformationResponse": {
"type": "object",
"properties": {
"backends": {
"type": "array",
"items": {
"type": "string"
}
}
}
},
"schema.TTSRequest": {
"description": "TTS request body",
"type": "object",
Expand Down
15 changes: 15 additions & 0 deletions swagger/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,13 @@ definitions:
$ref: '#/definitions/p2p.NodeData'
type: array
type: object
schema.SystemInformationResponse:
properties:
backends:
items:
type: string
type: array
type: object
schema.TTSRequest:
description: TTS request body
properties:
Expand Down Expand Up @@ -832,6 +839,14 @@ paths:
schema:
$ref: '#/definitions/gallery.GalleryOpStatus'
summary: Returns the job status
/system:
get:
responses:
"200":
description: Response
schema:
$ref: '#/definitions/schema.SystemInformationResponse'
summary: Show the LocalAI instance information
/tts:
post:
consumes:
Expand Down
Loading