From d11aebb028313c12fe4f25d9f617b061c0dda57f Mon Sep 17 00:00:00 2001 From: Steve Zhang Date: Mon, 22 Jul 2024 23:15:17 +0800 Subject: [PATCH] support switch mode in GMC for MI6 team. (#206) Signed-off-by: zhlsunshine --- microservices-connector/cmd/router/main.go | 4 +- .../config/samples/chatQnA_switch_gaudi.yaml | 124 ++++++++++++++++++ .../config/samples/chatQnA_switch_xeon.yaml | 124 ++++++++++++++++++ 3 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 microservices-connector/config/samples/chatQnA_switch_gaudi.yaml create mode 100644 microservices-connector/config/samples/chatQnA_switch_xeon.yaml diff --git a/microservices-connector/cmd/router/main.go b/microservices-connector/cmd/router/main.go index a24fe4b5..c49e8ca6 100644 --- a/microservices-connector/cmd/router/main.go +++ b/microservices-connector/cmd/router/main.go @@ -210,7 +210,7 @@ func handleSwitchNode( if route.NodeName != "" { stepType = ServiceNode } - log.Info("Starting execution of step", "type", stepType, "stepName", route.StepName) + log.Info("Starting execution of step", "Node Name", route.NodeName, "type", stepType, "stepName", route.StepName) if responseBytes, statusCode, err = executeStep(route, graph, initInput, request, headers); err != nil { return nil, 500, err } @@ -248,6 +248,7 @@ func handleSwitchPipeline(nodeName string, ) continue } + log.Info("Current Step Information", "Node Name", nodeName, "Step Index", index) request := input if route.Data == "$response" && index > 0 { request = responseBytes @@ -406,6 +407,7 @@ func routeStep(nodeName string, ) ([]byte, int, error) { defer timeTrack(time.Now(), "node", nodeName) currentNode := graph.Spec.Nodes[nodeName] + log.Info("Current Node", "Node Name", nodeName) if currentNode.RouterType == mcv1alpha3.Switch { return handleSwitchPipeline(nodeName, graph, initInput, input, headers) diff --git a/microservices-connector/config/samples/chatQnA_switch_gaudi.yaml b/microservices-connector/config/samples/chatQnA_switch_gaudi.yaml new file mode 100644 index 00000000..0af8cebd --- /dev/null +++ b/microservices-connector/config/samples/chatQnA_switch_gaudi.yaml @@ -0,0 +1,124 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: gmc.opea.io/v1alpha3 +kind: GMConnector +metadata: + labels: + app.kubernetes.io/name: gmconnector + app.kubernetes.io/managed-by: kustomize + gmc/platform: gaudi + name: switch + namespace: switch +spec: + routerConfig: + name: router + serviceName: router-service + nodes: + root: + routerType: Sequence + steps: + - name: Embedding + nodeName: node1 + - name: Reranking + data: $response + internalService: + serviceName: reranking-svc + config: + endpoint: /v1/reranking + TEI_RERANKING_ENDPOINT: tei-reranking-svc + - name: TeiReranking + internalService: + serviceName: tei-reranking-svc + config: + endpoint: /rerank + isDownstreamService: true + - name: Llm + data: $response + nodeName: node2 + node1: + routerType: Switch + steps: + - name: Embedding + condition: embedding-model-id==large + internalService: + serviceName: embedding-svc-large + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15 + - name: Embedding + condition: embedding-model-id==small + internalService: + serviceName: embedding-svc-small + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small + - name: TeiEmbeddingGaudi + internalService: + serviceName: tei-embedding-gaudi-svc-bge15 + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: TeiEmbeddingGaudi + internalService: + serviceName: tei-embedding-gaudi-svc-bge-small + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: Retriever + condition: embedding-model-id==large + data: $response + internalService: + serviceName: retriever-svc-large + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-large + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15 + - name: Retriever + condition: embedding-model-id==small + data: $response + internalService: + serviceName: retriever-svc-small + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-small + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small + - name: VectorDB + internalService: + serviceName: redis-vector-db-large + isDownstreamService: true + - name: VectorDB + internalService: + serviceName: redis-vector-db-small + isDownstreamService: true + node2: + routerType: Switch + steps: + - name: Llm + condition: model-id==intel + internalService: + serviceName: llm-svc-intel + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-gaudi-service-intel + - name: Llm + condition: model-id==llama + internalService: + serviceName: llm-svc-llama + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-gaudi-service-llama + - name: TgiGaudi + internalService: + serviceName: tgi-gaudi-service-intel + config: + endpoint: /generate + MODEL_ID: Intel/neural-chat-7b-v3-3 + isDownstreamService: true + - name: TgiGaudi + internalService: + serviceName: tgi-gaudi-service-llama + config: + endpoint: /generate + MODEL_ID: openlm-research/open_llama_3b + isDownstreamService: true diff --git a/microservices-connector/config/samples/chatQnA_switch_xeon.yaml b/microservices-connector/config/samples/chatQnA_switch_xeon.yaml new file mode 100644 index 00000000..4f06a210 --- /dev/null +++ b/microservices-connector/config/samples/chatQnA_switch_xeon.yaml @@ -0,0 +1,124 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: gmc.opea.io/v1alpha3 +kind: GMConnector +metadata: + labels: + app.kubernetes.io/name: gmconnector + app.kubernetes.io/managed-by: kustomize + gmc/platform: xeon + name: switch + namespace: switch +spec: + routerConfig: + name: router + serviceName: router-service + nodes: + root: + routerType: Sequence + steps: + - name: Embedding + nodeName: node1 + - name: Reranking + data: $response + internalService: + serviceName: reranking-svc + config: + endpoint: /v1/reranking + TEI_RERANKING_ENDPOINT: tei-reranking-svc + - name: TeiReranking + internalService: + serviceName: tei-reranking-svc + config: + endpoint: /rerank + isDownstreamService: true + - name: Llm + data: $response + nodeName: node2 + node1: + routerType: Switch + steps: + - name: Embedding + condition: embedding-model-id==large + internalService: + serviceName: embedding-svc-large + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15 + - name: Embedding + condition: embedding-model-id==small + internalService: + serviceName: embedding-svc-small + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small + - name: TeiEmbedding + internalService: + serviceName: tei-embedding-svc-bge15 + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: TeiEmbedding + internalService: + serviceName: tei-embedding-svc-bge-small + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: Retriever + condition: embedding-model-id==large + data: $response + internalService: + serviceName: retriever-svc-large + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-large + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15 + - name: Retriever + condition: embedding-model-id==small + data: $response + internalService: + serviceName: retriever-svc-small + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-small + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small + - name: VectorDB + internalService: + serviceName: redis-vector-db-large + isDownstreamService: true + - name: VectorDB + internalService: + serviceName: redis-vector-db-small + isDownstreamService: true + node2: + routerType: Switch + steps: + - name: Llm + condition: model-id==intel + internalService: + serviceName: llm-svc-intel + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service-intel + - name: Llm + condition: model-id==llama + internalService: + serviceName: llm-svc-llama + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service-llama + - name: Tgi + internalService: + serviceName: tgi-service-intel + config: + endpoint: /generate + MODEL_ID: Intel/neural-chat-7b-v3-3 + isDownstreamService: true + - name: Tgi + internalService: + serviceName: tgi-service-llama + config: + endpoint: /generate + MODEL_ID: bigscience/bloom-560m + isDownstreamService: true