From d11aebb028313c12fe4f25d9f617b061c0dda57f Mon Sep 17 00:00:00 2001
From: Steve Zhang <huailong.zhang@intel.com>
Date: Mon, 22 Jul 2024 23:15:17 +0800
Subject: [PATCH] support switch mode in GMC for MI6 team. (#206)

Signed-off-by: zhlsunshine <huailong.zhang@intel.com>
---
 microservices-connector/cmd/router/main.go    |   4 +-
 .../config/samples/chatQnA_switch_gaudi.yaml  | 124 ++++++++++++++++++
 .../config/samples/chatQnA_switch_xeon.yaml   | 124 ++++++++++++++++++
 3 files changed, 251 insertions(+), 1 deletion(-)
 create mode 100644 microservices-connector/config/samples/chatQnA_switch_gaudi.yaml
 create mode 100644 microservices-connector/config/samples/chatQnA_switch_xeon.yaml

diff --git a/microservices-connector/cmd/router/main.go b/microservices-connector/cmd/router/main.go
index a24fe4b5..c49e8ca6 100644
--- a/microservices-connector/cmd/router/main.go
+++ b/microservices-connector/cmd/router/main.go
@@ -210,7 +210,7 @@ func handleSwitchNode(
 	if route.NodeName != "" {
 		stepType = ServiceNode
 	}
-	log.Info("Starting execution of step", "type", stepType, "stepName", route.StepName)
+	log.Info("Starting execution of step", "Node Name", route.NodeName, "type", stepType, "stepName", route.StepName)
 	if responseBytes, statusCode, err = executeStep(route, graph, initInput, request, headers); err != nil {
 		return nil, 500, err
 	}
@@ -248,6 +248,7 @@ func handleSwitchPipeline(nodeName string,
 			)
 			continue
 		}
+		log.Info("Current Step Information", "Node Name", nodeName, "Step Index", index)
 		request := input
 		if route.Data == "$response" && index > 0 {
 			request = responseBytes
@@ -406,6 +407,7 @@ func routeStep(nodeName string,
 ) ([]byte, int, error) {
 	defer timeTrack(time.Now(), "node", nodeName)
 	currentNode := graph.Spec.Nodes[nodeName]
+	log.Info("Current Node", "Node Name", nodeName)
 
 	if currentNode.RouterType == mcv1alpha3.Switch {
 		return handleSwitchPipeline(nodeName, graph, initInput, input, headers)
diff --git a/microservices-connector/config/samples/chatQnA_switch_gaudi.yaml b/microservices-connector/config/samples/chatQnA_switch_gaudi.yaml
new file mode 100644
index 00000000..0af8cebd
--- /dev/null
+++ b/microservices-connector/config/samples/chatQnA_switch_gaudi.yaml
@@ -0,0 +1,124 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: gmc.opea.io/v1alpha3
+kind: GMConnector
+metadata:
+  labels:
+    app.kubernetes.io/name: gmconnector
+    app.kubernetes.io/managed-by: kustomize
+    gmc/platform: gaudi
+  name: switch
+  namespace: switch
+spec:
+  routerConfig:
+    name: router
+    serviceName: router-service
+  nodes:
+    root:
+      routerType: Sequence
+      steps:
+      - name: Embedding
+        nodeName: node1
+      - name: Reranking
+        data: $response
+        internalService:
+          serviceName: reranking-svc
+          config:
+            endpoint: /v1/reranking
+            TEI_RERANKING_ENDPOINT: tei-reranking-svc
+      - name: TeiReranking
+        internalService:
+          serviceName: tei-reranking-svc
+          config:
+            endpoint: /rerank
+          isDownstreamService: true
+      - name: Llm
+        data: $response
+        nodeName: node2
+    node1:
+      routerType: Switch
+      steps:
+        - name: Embedding
+          condition: embedding-model-id==large
+          internalService:
+            serviceName: embedding-svc-large
+            config:
+              endpoint: /v1/embeddings
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15
+        - name: Embedding
+          condition: embedding-model-id==small
+          internalService:
+            serviceName: embedding-svc-small
+            config:
+              endpoint: /v1/embeddings
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small
+        - name: TeiEmbeddingGaudi
+          internalService:
+            serviceName: tei-embedding-gaudi-svc-bge15
+            config:
+              MODEL_ID: BAAI/bge-base-en-v1.5
+            isDownstreamService: true
+        - name: TeiEmbeddingGaudi
+          internalService:
+            serviceName: tei-embedding-gaudi-svc-bge-small
+            config:
+              MODEL_ID: BAAI/bge-base-en-v1.5
+            isDownstreamService: true
+        - name: Retriever
+          condition: embedding-model-id==large
+          data: $response
+          internalService:
+            serviceName: retriever-svc-large
+            config:
+              endpoint: /v1/retrieval
+              REDIS_URL: redis-vector-db-large
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15
+        - name: Retriever
+          condition: embedding-model-id==small
+          data: $response
+          internalService:
+            serviceName: retriever-svc-small
+            config:
+              endpoint: /v1/retrieval
+              REDIS_URL: redis-vector-db-small
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small
+        - name: VectorDB
+          internalService:
+            serviceName: redis-vector-db-large
+            isDownstreamService: true
+        - name: VectorDB
+          internalService:
+            serviceName: redis-vector-db-small
+            isDownstreamService: true
+    node2:
+      routerType: Switch
+      steps:
+        - name: Llm
+          condition: model-id==intel
+          internalService:
+            serviceName: llm-svc-intel
+            config:
+              endpoint: /v1/chat/completions
+              TGI_LLM_ENDPOINT: tgi-gaudi-service-intel
+        - name: Llm
+          condition: model-id==llama
+          internalService:
+            serviceName: llm-svc-llama
+            config:
+              endpoint: /v1/chat/completions
+              TGI_LLM_ENDPOINT: tgi-gaudi-service-llama
+        - name: TgiGaudi
+          internalService:
+            serviceName: tgi-gaudi-service-intel
+            config:
+              endpoint: /generate
+              MODEL_ID: Intel/neural-chat-7b-v3-3
+            isDownstreamService: true
+        - name: TgiGaudi
+          internalService:
+            serviceName: tgi-gaudi-service-llama
+            config:
+              endpoint: /generate
+              MODEL_ID: openlm-research/open_llama_3b
+            isDownstreamService: true
diff --git a/microservices-connector/config/samples/chatQnA_switch_xeon.yaml b/microservices-connector/config/samples/chatQnA_switch_xeon.yaml
new file mode 100644
index 00000000..4f06a210
--- /dev/null
+++ b/microservices-connector/config/samples/chatQnA_switch_xeon.yaml
@@ -0,0 +1,124 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: gmc.opea.io/v1alpha3
+kind: GMConnector
+metadata:
+  labels:
+    app.kubernetes.io/name: gmconnector
+    app.kubernetes.io/managed-by: kustomize
+    gmc/platform: xeon
+  name: switch
+  namespace: switch
+spec:
+  routerConfig:
+    name: router
+    serviceName: router-service
+  nodes:
+    root:
+      routerType: Sequence
+      steps:
+      - name: Embedding
+        nodeName: node1
+      - name: Reranking
+        data: $response
+        internalService:
+          serviceName: reranking-svc
+          config:
+            endpoint: /v1/reranking
+            TEI_RERANKING_ENDPOINT: tei-reranking-svc
+      - name: TeiReranking
+        internalService:
+          serviceName: tei-reranking-svc
+          config:
+            endpoint: /rerank
+          isDownstreamService: true
+      - name: Llm
+        data: $response
+        nodeName: node2
+    node1:
+      routerType: Switch
+      steps:
+        - name: Embedding
+          condition: embedding-model-id==large
+          internalService:
+            serviceName: embedding-svc-large
+            config:
+              endpoint: /v1/embeddings
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
+        - name: Embedding
+          condition: embedding-model-id==small
+          internalService:
+            serviceName: embedding-svc-small
+            config:
+              endpoint: /v1/embeddings
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
+        - name: TeiEmbedding
+          internalService:
+            serviceName: tei-embedding-svc-bge15
+            config:
+              MODEL_ID: BAAI/bge-base-en-v1.5
+            isDownstreamService: true
+        - name: TeiEmbedding
+          internalService:
+            serviceName: tei-embedding-svc-bge-small
+            config:
+              MODEL_ID: BAAI/bge-base-en-v1.5
+            isDownstreamService: true
+        - name: Retriever
+          condition: embedding-model-id==large
+          data: $response
+          internalService:
+            serviceName: retriever-svc-large
+            config:
+              endpoint: /v1/retrieval
+              REDIS_URL: redis-vector-db-large
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15
+        - name: Retriever
+          condition: embedding-model-id==small
+          data: $response
+          internalService:
+            serviceName: retriever-svc-small
+            config:
+              endpoint: /v1/retrieval
+              REDIS_URL: redis-vector-db-small
+              TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small
+        - name: VectorDB
+          internalService:
+            serviceName: redis-vector-db-large
+            isDownstreamService: true
+        - name: VectorDB
+          internalService:
+            serviceName: redis-vector-db-small
+            isDownstreamService: true
+    node2:
+      routerType: Switch
+      steps:
+        - name: Llm
+          condition: model-id==intel
+          internalService:
+            serviceName: llm-svc-intel
+            config:
+              endpoint: /v1/chat/completions
+              TGI_LLM_ENDPOINT: tgi-service-intel
+        - name: Llm
+          condition: model-id==llama
+          internalService:
+            serviceName: llm-svc-llama
+            config:
+              endpoint: /v1/chat/completions
+              TGI_LLM_ENDPOINT: tgi-service-llama
+        - name: Tgi
+          internalService:
+            serviceName: tgi-service-intel
+            config:
+              endpoint: /generate
+              MODEL_ID: Intel/neural-chat-7b-v3-3
+            isDownstreamService: true
+        - name: Tgi
+          internalService:
+            serviceName: tgi-service-llama
+            config:
+              endpoint: /generate
+              MODEL_ID: bigscience/bloom-560m
+            isDownstreamService: true