kp-forks · pull · Sep 8, 2024 · Sep 5, 2024 · Sep 5, 2024 · Sep 5, 2024
diff --git a/Makefile b/Makefile
@@ -8,15 +8,15 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=581c305186a0ff93f360346c57e21fe16e967bb7
+CPPLLAMA_VERSION?=815b1fb20a53e439882171757825bacb1350de04
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=5236f0278420ab776d1787c4330678d80219b4b6
+WHISPER_CPP_VERSION?=5caa19240d55bfd6ee316d50fbad32c6e9c39528
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp

diff --git a/core/http/app_test.go b/core/http/app_test.go
@@ -772,6 +772,17 @@ var _ = Describe("API test", func() {
 			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error:"))
 		})
 
+		It("shows the external backend", func() {
+			// do an http request to the /system endpoint
+			resp, err := http.Get("http://127.0.0.1:9090/system")
+			Expect(err).ToNot(HaveOccurred())
+			Expect(resp.StatusCode).To(Equal(200))
+			dat, err := io.ReadAll(resp.Body)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(string(dat)).To(ContainSubstring("huggingface"))
+			Expect(string(dat)).To(ContainSubstring("llama-cpp"))
+		})
+
 		It("transcribes audio", func() {
 			if runtime.GOOS != "linux" {
 				Skip("test supported only on linux")

diff --git a/core/http/endpoints/localai/system.go b/core/http/endpoints/localai/system.go
@@ -0,0 +1,29 @@
+package localai
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/model"
+)
+
+// SystemInformations returns the system informations
+// @Summary Show the LocalAI instance information
+// @Success 200 {object} schema.SystemInformationResponse "Response"
+// @Router /system [get]
+func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		availableBackends, err := ml.ListAvailableBackends(appConfig.AssetsDestination)
+		if err != nil {
+			return err
+		}
+		for b := range appConfig.ExternalGRPCBackends {
+			availableBackends = append(availableBackends, b)
+		}
+		return c.JSON(
+			schema.SystemInformationResponse{
+				Backends: availableBackends,
+			},
+		)
+	}
+}
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
@@ -70,4 +70,6 @@ func RegisterLocalAIRoutes(app *fiber.App,
 		}{Version: internal.PrintableVersion()})
 	})
 
+	app.Get("/system", auth, localai.SystemInformations(ml, appConfig))
+
 }
diff --git a/core/schema/localai.go b/core/schema/localai.go
@@ -70,3 +70,7 @@ type P2PNodesResponse struct {
 	Nodes          []p2p.NodeData `json:"nodes" yaml:"nodes"`
 	FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
 }
+
+type SystemInformationResponse struct {
+	Backends []string `json:"backends"`
+}
diff --git a/gallery/index.yaml b/gallery/index.yaml
@@ -423,6 +423,22 @@
     - filename: Hubble-4B-v1-Q4_K_M.gguf
       uri: huggingface://TheDrummer/Hubble-4B-v1-GGUF/Hubble-4B-v1-Q4_K_M.gguf
       sha256: 0721294d0e861c6e6162a112fc7242e0c4b260c156137f4bcbb08667f1748080
+- !!merge <<: *llama31
+  name: "reflection-llama-3.1-70b"
+  urls:
+    - https://huggingface.co/leafspark/Reflection-Llama-3.1-70B-bf16
+    - https://huggingface.co/senseable/Reflection-Llama-3.1-70B-gguf
+  description: |
+      Reflection Llama-3.1 70B is (currently) the world's top open-source LLM, trained with a new technique called Reflection-Tuning that teaches a LLM to detect mistakes in its reasoning and correct course.
+
+      The model was trained on synthetic data generated by Glaive. If you're training a model, Glaive is incredible — use them.
+  overrides:
+    parameters:
+      model: Reflection-Llama-3.1-70B-q4_k_m.gguf
+  files:
+    - filename: Reflection-Llama-3.1-70B-q4_k_m.gguf
+      sha256: 16064e07037883a750cfeae9a7be41143aa857dbac81c2e93c68e2f941dee7b2
+      uri: huggingface://senseable/Reflection-Llama-3.1-70B-gguf/Reflection-Llama-3.1-70B-q4_k_m.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "humanish-roleplay-llama-3.1-8b-i1"
@@ -1692,6 +1708,20 @@
     - filename: shieldgemma-9b.i1-Q4_K_M.gguf
       sha256: ffa7eaadcc0c7d0544fda5b0d86bba3ffa3431b673e5b2135f421cfe65bd8732
       uri: huggingface://mradermacher/shieldgemma-9b-i1-GGUF/shieldgemma-9b.i1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "athena-codegemma-2-2b-it"
+  urls:
+    - https://huggingface.co/EpistemeAI/Athena-codegemma-2-2b-it
+    - https://huggingface.co/mradermacher/Athena-codegemma-2-2b-it-GGUF
+  description: |
+      Supervised fine tuned (sft unsloth) for coding with EpistemeAI coding dataset.
+  overrides:
+    parameters:
+      model: Athena-codegemma-2-2b-it.Q4_K_M.gguf
+  files:
+    - filename: Athena-codegemma-2-2b-it.Q4_K_M.gguf
+      sha256: 59ce17023438b0da603dd211c7d39f78e7acac4108258ac0818a97a4ca7d64e3
+      uri: huggingface://mradermacher/Athena-codegemma-2-2b-it-GGUF/Athena-codegemma-2-2b-it.Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -1724,7 +1754,7 @@
   files:
     - filename: Meta-Llama-3-8B-Instruct.Q4_0.gguf
       uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_0.gguf
-      sha256: 18c8eb909db870d456a823700b4c82f6259e6052899f0ebf2bddc9b2417cd355
+      sha256: 2b4675c2208f09ad8762d8cf1b6a4a26bf65e6f0641aba324ec65143c0b4ad9f
 - !!merge <<: *llama3
   name: "llama3-8b-instruct:Q6_K"
   overrides:
@@ -1733,7 +1763,7 @@
   files:
     - filename: Meta-Llama-3-8B-Instruct.Q6_K.gguf
       uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q6_K.gguf
-      sha256: 67f8eb2218938a5fd711605d526d2287e9a4ad26849efdf3bf7c0c17dcbde018
+      sha256: bd7efd73f9fb67e4b9ecc43f861f37c7e594e78a8a5ff9c29da021692bd243ef
 - !!merge <<: *llama3
   name: "llama-3-8b-instruct-abliterated"
   urls:
@@ -3193,6 +3223,104 @@
     - filename: magnum-v3-34b-Q4_K_M.gguf
       sha256: f902956c0731581f1ff189e547e6e5aad86b77af5f4dc7e4fc26bcda5c1f7cc3
       uri: huggingface://bartowski/magnum-v3-34b-GGUF/magnum-v3-34b-Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  name: "yi-coder-9b-chat"
+  urls:
+    - https://huggingface.co/01-ai/Yi-Coder-9B-Chat
+    - https://huggingface.co/bartowski/Yi-Coder-9B-Chat-GGUF
+    - https://01-ai.github.io/
+    - https://github.com/01-ai/Yi-Coder
+  description: |
+    Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
+    Key features:
+
+        Excelling in long-context understanding with a maximum context length of 128K tokens.
+        Supporting 52 major programming languages:
+
+      'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'
+
+    For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
+  overrides:
+    parameters:
+      model: Yi-Coder-9B-Chat-Q4_K_M.gguf
+  files:
+    - filename: Yi-Coder-9B-Chat-Q4_K_M.gguf
+      sha256: 251cc196e3813d149694f362bb0f8f154f3320abe44724eebe58c23dc54f201d
+      uri: huggingface://bartowski/Yi-Coder-9B-Chat-GGUF/Yi-Coder-9B-Chat-Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  name: "yi-coder-1.5b-chat"
+  urls:
+    - https://huggingface.co/01-ai/Yi-Coder-1.5B-Chat
+    - https://huggingface.co/MaziyarPanahi/Yi-Coder-1.5B-Chat-GGUF
+    - https://01-ai.github.io/
+    - https://github.com/01-ai/Yi-Coder
+  description: |
+    Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
+    Key features:
+
+        Excelling in long-context understanding with a maximum context length of 128K tokens.
+        Supporting 52 major programming languages:
+
+      'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'
+
+    For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
+  overrides:
+    parameters:
+      model: Yi-Coder-1.5B-Chat.Q4_K_M.gguf
+  files:
+    - filename: Yi-Coder-1.5B-Chat.Q4_K_M.gguf
+      sha256: e2e8fa659cd75c828d7783b5c2fb60d220e08836065901fad8edb48e537c1cec
+      uri: huggingface://MaziyarPanahi/Yi-Coder-1.5B-Chat-GGUF/Yi-Coder-1.5B-Chat.Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
+  name: "yi-coder-1.5b"
+  urls:
+    - https://huggingface.co/01-ai/Yi-Coder-1.5B
+    - https://huggingface.co/QuantFactory/Yi-Coder-1.5B-GGUF
+    - https://01-ai.github.io/
+    - https://github.com/01-ai/Yi-Coder
+  description: |
+    Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
+    Key features:
+
+        Excelling in long-context understanding with a maximum context length of 128K tokens.
+        Supporting 52 major programming languages:
+
+      'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'
+
+    For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
+  overrides:
+    parameters:
+      model: Yi-Coder-1.5B.Q4_K_M.gguf
+  files:
+    - filename: Yi-Coder-1.5B.Q4_K_M.gguf
+      sha256: 86a280dd36c9b2342b7023532f9c2c287e251f5cd10bc81ca262db8c1668f272
+      uri: huggingface://QuantFactory/Yi-Coder-1.5B-GGUF/Yi-Coder-1.5B.Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
+  name: "yi-coder-9b"
+  urls:
+    - https://huggingface.co/01-ai/Yi-Coder-9B
+    - https://huggingface.co/QuantFactory/Yi-Coder-9B-GGUF
+    - https://01-ai.github.io/
+    - https://github.com/01-ai/Yi-Coder
+  description: |
+    Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
+    Key features:
+
+        Excelling in long-context understanding with a maximum context length of 128K tokens.
+        Supporting 52 major programming languages:
+
+      'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'
+
+    For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
+  overrides:
+    parameters:
+      model: Yi-Coder-9B.Q4_K_M.gguf
+  files:
+    - filename: Yi-Coder-9B.Q4_K_M.gguf
+      sha256: cff3db8a69c43654e3c2d2984e86ad2791d1d446ec56b24a636ba1ce78363308
+      uri: huggingface://QuantFactory/Yi-Coder-9B-GGUF/Yi-Coder-9B.Q4_K_M.gguf
 - &vicuna-chat
   ## LLama2 and derivatives
   ### Start Fimbulvetr

diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
@@ -393,6 +393,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 	}
 }
 
+func (ml *ModelLoader) ListAvailableBackends(assetdir string) ([]string, error) {
+	return backendsInAssetDir(assetdir)
+}
+
 func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err error) {
 	o := NewOptions(opts...)
 

diff --git a/swagger/docs.go b/swagger/docs.go
@@ -266,6 +266,19 @@ const docTemplate = `{
                 }
             }
         },
+        "/system": {
+            "get": {
+                "summary": "Show the LocalAI instance information",
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.SystemInformationResponse"
+                        }
+                    }
+                }
+            }
+        },
         "/tts": {
             "post": {
                 "consumes": [
@@ -1649,6 +1662,17 @@ const docTemplate = `{
                 }
             }
         },
+        "schema.SystemInformationResponse": {
+            "type": "object",
+            "properties": {
+                "backends": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                }
+            }
+        },
         "schema.TTSRequest": {
             "description": "TTS request body",
             "type": "object",

diff --git a/swagger/swagger.json b/swagger/swagger.json
@@ -259,6 +259,19 @@
                 }
             }
         },
+        "/system": {
+            "get": {
+                "summary": "Show the LocalAI instance information",
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.SystemInformationResponse"
+                        }
+                    }
+                }
+            }
+        },
         "/tts": {
             "post": {
                 "consumes": [
@@ -1642,6 +1655,17 @@
                 }
             }
         },
+        "schema.SystemInformationResponse": {
+            "type": "object",
+            "properties": {
+                "backends": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                }
+            }
+        },
         "schema.TTSRequest": {
             "description": "TTS request body",
             "type": "object",

diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
@@ -638,6 +638,13 @@ definitions:
           $ref: '#/definitions/p2p.NodeData'
         type: array
     type: object
+  schema.SystemInformationResponse:
+    properties:
+      backends:
+        items:
+          type: string
+        type: array
+    type: object
   schema.TTSRequest:
     description: TTS request body
     properties:
@@ -832,6 +839,14 @@ paths:
           schema:
             $ref: '#/definitions/gallery.GalleryOpStatus'
       summary: Returns the job status
+  /system:
+    get:
+      responses:
+        "200":
+          description: Response
+          schema:
+            $ref: '#/definitions/schema.SystemInformationResponse'
+      summary: Show the LocalAI instance information
   /tts:
     post:
       consumes:
-Original file line number
+Diff line change
@@ Expand Up / @@ -70,4 +70,6 @@ func RegisterLocalAIRoutes(app *fiber.App, @@
     		}{Version: internal.PrintableVersion()})
     	})
+    	app.Get("/system", auth, localai.SystemInformations(ml, appConfig))
     }