Skip to content

Commit

Permalink
refactor: backend/service split, channel-based llm flow (#1963)
Browse files Browse the repository at this point in the history
Refactor: channel based llm flow and services split

---------

Signed-off-by: Dave Lee <[email protected]>
  • Loading branch information
dave-gray101 authored Apr 13, 2024
1 parent 1981154 commit eed5706
Show file tree
Hide file tree
Showing 52 changed files with 3,054 additions and 2,269 deletions.
15 changes: 9 additions & 6 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,9 @@ jobs:
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3
timeout-minutes: 5
uses: dave-gray101/action-tmate@master
with:
connect-timeout-seconds: 180

tests-aio-container:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -173,8 +174,9 @@ jobs:
make run-e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3
timeout-minutes: 5
uses: dave-gray101/action-tmate@master
with:
connect-timeout-seconds: 180

tests-apple:
runs-on: macOS-14
Expand Down Expand Up @@ -207,5 +209,6 @@ jobs:
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3
timeout-minutes: 5
uses: dave-gray101/action-tmate@master
with:
connect-timeout-seconds: 180
18 changes: 12 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,9 @@ clean-tests:
rm -rf test-dir
rm -rf core/http/backend-assets

halt-backends: ## Used to clean up stray backends sometimes left running when debugging manually
ps | grep 'backend-assets/grpc/' | awk '{print $$1}' | xargs -I {} kill -9 {}

## Build:
build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
Expand Down Expand Up @@ -365,29 +368,29 @@ run-e2e-image:

run-e2e-aio:
@echo 'Running e2e AIO tests'
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio

test-e2e:
@echo 'Running e2e tests'
BUILD_TYPE=$(BUILD_TYPE) \
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e

teardown-e2e:
rm -rf $(TEST_DIR) || true
docker stop $$(docker ps -q --filter ancestor=localai-tests)

test-gpt4all: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)

test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)

test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)

test-tts: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
Expand Down Expand Up @@ -636,7 +639,10 @@ backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libb
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/

# EXPERIMENTAL:
ifeq ($(BUILD_TYPE),metal)
cp $(CURDIR)/sources/go-llama-ggml/llama.cpp/ggml-metal.metal backend-assets/grpc/
endif
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
Expand Down
6 changes: 3 additions & 3 deletions backend/go/transcribe/transcript.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ func runCommand(command []string) (string, error) {
// AudioToWav converts audio to wav for transcribe.
// TODO: use https://github.com/mccoyst/ogg?
func audioToWav(src, dst string) error {
command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
out, err := runCommand(command)
if err != nil {
return fmt.Errorf("error: %w out: %s", err, out)
}
return nil
}

func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) {
res := schema.Result{}
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
res := schema.TranscriptionResult{}

dir, err := os.MkdirTemp("", "whisper")
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion backend/go/transcribe/whisper.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
return err
}

func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) {
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
}
90 changes: 88 additions & 2 deletions core/backend/embeddings.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,100 @@ package backend

import (
"fmt"
"time"

"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/google/uuid"

"github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/grpc"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/model"
)

func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
type EmbeddingsBackendService struct {
ml *model.ModelLoader
bcl *config.BackendConfigLoader
appConfig *config.ApplicationConfig
}

func NewEmbeddingsBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *EmbeddingsBackendService {
return &EmbeddingsBackendService{
ml: ml,
bcl: bcl,
appConfig: appConfig,
}
}

func (ebs *EmbeddingsBackendService) Embeddings(request *schema.OpenAIRequest) <-chan concurrency.ErrorOr[*schema.OpenAIResponse] {

resultChannel := make(chan concurrency.ErrorOr[*schema.OpenAIResponse])
go func(request *schema.OpenAIRequest) {
if request.Model == "" {
request.Model = model.StableDiffusionBackend
}

bc, request, err := ebs.bcl.LoadBackendConfigForModelAndOpenAIRequest(request.Model, request, ebs.appConfig)
if err != nil {
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
close(resultChannel)
return
}

items := []schema.Item{}

for i, s := range bc.InputToken {
// get the model function to call for the result
embedFn, err := modelEmbedding("", s, ebs.ml, bc, ebs.appConfig)
if err != nil {
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
close(resultChannel)
return
}

embeddings, err := embedFn()
if err != nil {
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
close(resultChannel)
return
}
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
}

for i, s := range bc.InputStrings {
// get the model function to call for the result
embedFn, err := modelEmbedding(s, []int{}, ebs.ml, bc, ebs.appConfig)
if err != nil {
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
close(resultChannel)
return
}

embeddings, err := embedFn()
if err != nil {
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Error: err}
close(resultChannel)
return
}
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
}

id := uuid.New().String()
created := int(time.Now().Unix())
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.
Data: items,
Object: "list",
}
resultChannel <- concurrency.ErrorOr[*schema.OpenAIResponse]{Value: resp}
close(resultChannel)
}(request)
return resultChannel
}

func modelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig *config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
modelFile := backendConfig.Model

grpcOpts := gRPCModelOpts(backendConfig)
Expand Down
Loading

0 comments on commit eed5706

Please sign in to comment.