Update GMC manifest changes and misc fixes (#564)

* Adapt to latest upstream vllm changes - Run as the default user of vllm upstream image since the upstream doesn't support run as non-default user - Adpat llm-uservice's vllm-values.yaml to ci-vllm-gaudi-values.yaml to enable CI on gaudi instead of xeon because of test speed * Update GMC manifest changes Signed-off-by: Lianhao Lu <[email protected]>
opea-project · Nov 15, 2024 · 87dc673 · 87dc673
1 parent 823ce22
commit 87dc673
Show file tree

Hide file tree

Showing 37 changed files with 4,052 additions and 31 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,12 +13,14 @@ repos:
       - id: requirements-txt-fixer
       - id: trailing-whitespace
         files: (.*\.(py|rst|cmake|yaml|yml|json|ts|js|html|svelte|sh))$
+        exclude: (microservices-connector/config/manifests/.*\.yaml)$
 
   - repo: https://github.com/Lucas-C/pre-commit-hooks
     rev: v1.5.5
     hooks:
       - id: insert-license
         files: (.*\.(py|yaml|yml|sh))$
+        exclude: (microservices-connector/config/manifests/.*\.yaml)$
         args:
           [
             --license-filepath=.github/license_template.txt,

diff --git a/helm-charts/common/gpt-sovits/values.yaml b/helm-charts/common/gpt-sovits/values.yaml
@@ -77,6 +77,6 @@ tolerations: []
 affinity: {}
 
 global:
-  gpt-sovits_proxy: ""
-  gpt-sovitss_proxy: ""
+  http_proxy: ""
+  https_proxy: ""
   no_proxy: ""
diff --git a/...arts/common/llm-uservice/vllm-values.yaml → ...on/llm-uservice/ci-vllm-gaudi-values.yaml b/...arts/common/llm-uservice/vllm-values.yaml → ...on/llm-uservice/ci-vllm-gaudi-values.yaml
@@ -8,7 +8,15 @@ tgi:
   enabled: false
 vllm:
   enabled: true
+  image:
+    repository: opea/vllm-hpu
+    tag: "latest"
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  OMPI_MCA_btl_vader_single_copy_mechanism: none
+  extraCmdArgs: ["--enforce-eager","--tensor-parallel-size","1","--block-size","128","--max-num-seqs","256","--max-seq_len-to-capture","2048"]
+  resources:
+    limits:
+      habana.ai/gaudi: 1
 
 vLLM_ENDPOINT: ""
 LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

diff --git a/helm-charts/common/vllm/templates/tests/test-pod.yaml b/helm-charts/common/vllm/templates/tests/test-pod.yaml
@@ -19,7 +19,7 @@ spec:
         - |
           max_retry=20;
           for ((i=1; i<=max_retry; i++)); do
-            curl http://{{ include "vllm.fullname" . }}/v1/completions \
+            curl http://{{ include "vllm.fullname" . }}/v1/completions -sS --fail-with-body \
             -H "Content-Type: application/json" \
             -d '{"model": {{ .Values.LLM_MODEL_ID | quote }},"prompt": "What is Deep Learning?","max_tokens": 32,"temperature": 0}' && break;
             curlcode=$?

diff --git a/helm-charts/common/vllm/values.yaml b/helm-charts/common/vllm/values.yaml
@@ -24,16 +24,16 @@ podAnnotations: {}
 podSecurityContext: {}
   # fsGroup: 2000
 
-securityContext:
-  readOnlyRootFilesystem: true
-  allowPrivilegeEscalation: false
-  runAsNonRoot: true
-  runAsUser: 1000
-  capabilities:
-    drop:
-    - ALL
-  seccompProfile:
-    type: RuntimeDefault
+securityContext: {}
+#  readOnlyRootFilesystem: true
+#  allowPrivilegeEscalation: false
+#  runAsNonRoot: true
+#  runAsUser: 1000
+#  capabilities:
+#    drop:
+#    - ALL
+#  seccompProfile:
+#    type: RuntimeDefault
 
 service:
   type: ClusterIP

diff --git a/helm-charts/update_manifests.sh b/helm-charts/update_manifests.sh
@@ -31,7 +31,7 @@ function generate_yaml {
     if [[ "$filename" =~ ^variant_.*-values.yaml ]]; then
       ext=$(echo $filename | sed 's/^variant_//' | sed 's/-values.yaml$//')
       outputfile="$ext-${chart}.yaml"
-      releasename=$ext-$chart
+      releasename=$(echo "${ext}-${chart}" | sed 's/_/-/g')
     else
       ext=$(echo $filename | sed 's/-values.yaml$//')
       outputfile="${chart}_${ext}.yaml"

diff --git a/microservices-connector/config/manifests/agent.yaml b/microservices-connector/config/manifests/agent.yaml
@@ -0,0 +1,143 @@
+---
+# Source: agent/templates/configmap.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: agent-config
+  labels:
+    helm.sh/chart: agent-1.0.0
+    app.kubernetes.io/name: agent
+    app.kubernetes.io/instance: agent
+    app.kubernetes.io/version: "v1.0"
+    app.kubernetes.io/managed-by: Helm
+data:
+  llm_endpoint_url: "http://agent-tgi"
+  #
+  model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
+  RETRIEVAL_TOOL_URL: "http://agent-docretriever:8889/v1/retrievaltool"
+  CRAG_SERVER: "http://agent-crag:8080"
+  WORKER_AGENT_URL: "http://agent-worker:9095/v1/chat/completions"
+  require_human_feedback: "false"
+  recursion_limit: "15"
+  llm_engine: "tgi"
+  strategy: "react_langchain"
+  max_new_tokens: "4096"
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+  HF_HOME: "/tmp/.cache/huggingface"
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  LOGFLAG: "True"
+---
+# Source: agent/templates/service.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: agent
+  labels:
+    helm.sh/chart: agent-1.0.0
+    app.kubernetes.io/name: agent
+    app.kubernetes.io/instance: agent
+    app.kubernetes.io/version: "v1.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  type: ClusterIP
+  ports:
+    - port: 9090
+      targetPort: 9090
+      protocol: TCP
+      name: agent
+  selector:
+    app.kubernetes.io/name: agent
+    app.kubernetes.io/instance: agent
+---
+# Source: agent/templates/deployment.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: agent
+  labels:
+    helm.sh/chart: agent-1.0.0
+    app.kubernetes.io/name: agent
+    app.kubernetes.io/instance: agent
+    app.kubernetes.io/version: "v1.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: agent
+      app.kubernetes.io/instance: agent
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: agent
+        app.kubernetes.io/instance: agent
+    spec:
+      securityContext:
+        {}
+      containers:
+        - name: agent
+          envFrom:
+            - configMapRef:
+                name: agent-config
+            - configMapRef:
+                name: extra-env-config
+                optional: true
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+              - ALL
+            readOnlyRootFilesystem: false
+            runAsNonRoot: true
+            runAsUser: 1000
+            seccompProfile:
+              type: RuntimeDefault
+          image: "opea/agent-langchain:latest"
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: agent
+              containerPort: 9090
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          livenessProbe:
+            failureThreshold: 24
+            httpGet:
+              path: v1/health_check
+              port: agent
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          readinessProbe:
+            httpGet:
+              path: v1/health_check
+              port: agent
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          startupProbe:
+            failureThreshold: 120
+            httpGet:
+              path: v1/health_check
+              port: agent
+            initialDelaySeconds: 5
+            periodSeconds: 5
+          resources:
+            {}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+---
+# Source: agent/templates/servicemonitor.yaml
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0