Merge branch 'main' into langchain_opea

yogeshmpandey · Jan 15, 2025 · 15f01a2 · 15f01a2
2 parents 6f0935e + 4c21738
commit 15f01a2
Show file tree

Hide file tree

Showing 103 changed files with 2,061 additions and 1,161 deletions.
diff --git a/.github/workflows/_comps-workflow.yml b/.github/workflows/_comps-workflow.yml
@@ -65,7 +65,6 @@ jobs:
           fi
           if [[ $(grep -c "vllm-gaudi:" ${docker_compose_yml}) != 0 ]]; then
                git clone https://github.com/HabanaAI/vllm-fork.git vllm-fork
-               cd vllm-fork && git checkout 3c39626 && cd ../
           fi
       - name: Get build list
         id: get-build-list

diff --git a/.github/workflows/check-online-doc-build.yml b/.github/workflows/check-online-doc-build.yml
@@ -13,7 +13,7 @@ on:
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
 
     - name: Checkout

diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml
@@ -11,9 +11,9 @@ services:
     build:
       dockerfile: comps/llms/text-generation/ollama/langchain/Dockerfile
     image: ${REGISTRY:-opea}/llm-ollama:${TAG:-latest}
-  llm-docsum-tgi:
+  llm-docsum:
     build:
-      dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
+      dockerfile: comps/llms/src/doc-summarization/Dockerfile
     image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
   llm-faqgen:
     build:
@@ -46,11 +46,3 @@ services:
     build:
       dockerfile: comps/llms/utils/lm-eval/Dockerfile
     image: ${REGISTRY:-opea}/llm-eval:${TAG:-latest}
-  llm-textgen-predictionguard:
-    build:
-      dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
-    image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest}
-  llm-docsum-vllm:
-    build:
-      dockerfile: comps/llms/summarization/vllm/langchain/Dockerfile
-    image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest}
diff --git a/comps/agent/deployment/kubernetes/README.md b/comps/agent/deployment/kubernetes/README.md
@@ -0,0 +1,11 @@
+# Deploy Agent microservice on Kubernetes cluster
+
+- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
+- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
+
+## Deploy on Kubernetes
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install agent oci://ghcr.io/opea-project/charts/agent --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
+```
diff --git a/comps/agent/deployment/kubernetes/gaudi-values.yaml b/comps/agent/deployment/kubernetes/gaudi-values.yaml
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Accelerate inferencing in heaviest components to improve performance
+# by overriding their subchart values
+
+tgi:
+  enabled: true
+  accelDevice: "gaudi"
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.6"
+  resources:
+    limits:
+      habana.ai/gaudi: 4
+  MAX_INPUT_LENGTH: "4096"
+  MAX_TOTAL_TOKENS: "8192"
+  CUDA_GRAPHS: ""
+  OMPI_MCA_btl_vader_single_copy_mechanism: "none"
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
+  ENABLE_HPU_GRAPH: "true"
+  LIMIT_HPU_GRAPH: "true"
+  USE_FLASH_ATTENTION: "true"
+  FLASH_ATTENTION_RECOMPUTE: "true"
+  extraCmdArgs: ["--sharded","true","--num-shard","4"]
+  livenessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  startupProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+    failureThreshold: 120
diff --git a/comps/agent/src/agent.py b/comps/agent/src/agent.py
@@ -5,7 +5,7 @@
 import pathlib
 import sys
 from datetime import datetime
-from typing import Union
+from typing import List, Optional, Union
 
 from fastapi.responses import StreamingResponse
 
@@ -40,7 +40,10 @@
 agent_inst = instantiate_agent(args, args.strategy, with_memory=args.with_memory)
 
 
-class AgentCompletionRequest(LLMParamsDoc):
+class AgentCompletionRequest(ChatCompletionRequest):
+    # rewrite, specify tools in this turn of conversation
+    tool_choice: Optional[List[str]] = None
+    # for short/long term in-memory
     thread_id: str = "0"
     user_id: str = "0"
 
@@ -52,42 +55,40 @@ class AgentCompletionRequest(LLMParamsDoc):
     host="0.0.0.0",
     port=args.port,
 )
-async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, AgentCompletionRequest]):
+async def llm_generate(input: AgentCompletionRequest):
     if logflag:
         logger.info(input)
 
-    input.stream = args.stream
-    config = {"recursion_limit": args.recursion_limit}
+    # don't use global stream setting
+    # input.stream = args.stream
+    config = {"recursion_limit": args.recursion_limit, "tool_choice": input.tool_choice}
 
     if args.with_memory:
-        if isinstance(input, AgentCompletionRequest):
-            config["configurable"] = {"thread_id": input.thread_id}
-        else:
-            config["configurable"] = {"thread_id": "0"}
+        config["configurable"] = {"thread_id": input.thread_id}
 
     if logflag:
         logger.info(type(agent_inst))
 
-    if isinstance(input, LLMParamsDoc):
-        # use query as input
-        input_query = input.query
+    # openai compatible input
+    if isinstance(input.messages, str):
+        messages = input.messages
     else:
-        # openai compatible input
-        if isinstance(input.messages, str):
-            input_query = input.messages
-        else:
-            input_query = input.messages[-1]["content"]
+        # TODO: need handle multi-turn messages
+        messages = input.messages[-1]["content"]
 
     # 2. prepare the input for the agent
     if input.stream:
         logger.info("-----------STREAMING-------------")
-        return StreamingResponse(agent_inst.stream_generator(input_query, config), media_type="text/event-stream")
+        return StreamingResponse(
+            agent_inst.stream_generator(messages, config),
+            media_type="text/event-stream",
+        )
 
     else:
         logger.info("-----------NOT STREAMING-------------")
-        response = await agent_inst.non_streaming_run(input_query, config)
+        response = await agent_inst.non_streaming_run(messages, config)
         logger.info("-----------Response-------------")
-        return GeneratedDoc(text=response, prompt=input_query)
+        return GeneratedDoc(text=response, prompt=messages)
 
 
 @register_microservice(

diff --git a/comps/agent/src/integrations/strategy/react/planner.py b/comps/agent/src/integrations/strategy/react/planner.py
@@ -11,7 +11,7 @@
 from langgraph.prebuilt import create_react_agent
 
 from ...global_var import threads_global_kv
-from ...utils import has_multi_tool_inputs, tool_renderer
+from ...utils import filter_tools, has_multi_tool_inputs, tool_renderer
 from ..base_agent import BaseAgent
 from .prompt import REACT_SYS_MESSAGE, hwchase17_react_prompt
 
@@ -136,7 +136,8 @@ async def non_streaming_run(self, query, config):
 # does not rely on langchain bind_tools API
 # since tgi and vllm still do not have very good support for tool calling like OpenAI
 
-from typing import Annotated, Sequence, TypedDict
+import json
+from typing import Annotated, List, Optional, Sequence, TypedDict
 
 from langchain_core.messages import AIMessage, BaseMessage
 from langchain_core.prompts import PromptTemplate
@@ -154,6 +155,7 @@ class AgentState(TypedDict):
     """The state of the agent."""
 
     messages: Annotated[Sequence[BaseMessage], add_messages]
+    tool_choice: Optional[List[str]] = None
     is_last_step: IsLastStep
 
 
@@ -191,7 +193,11 @@ def __call__(self, state):
             history = assemble_history(messages)
         print("@@@ History: ", history)
 
-        tools_descriptions = tool_renderer(self.tools)
+        tools_used = self.tools
+        if state["tool_choice"] is not None:
+            tools_used = filter_tools(self.tools, state["tool_choice"])
+
+        tools_descriptions = tool_renderer(tools_used)
         print("@@@ Tools description: ", tools_descriptions)
 
         # invoke chain
@@ -279,21 +285,45 @@ def prepare_initial_state(self, query):
 
     async def stream_generator(self, query, config):
         initial_state = self.prepare_initial_state(query)
+        if "tool_choice" in config:
+            initial_state["tool_choice"] = config.pop("tool_choice")
+
         try:
-            async for event in self.app.astream(initial_state, config=config):
-                for node_name, node_state in event.items():
-                    yield f"--- CALL {node_name} ---\n"
-                    for k, v in node_state.items():
-                        if v is not None:
-                            yield f"{k}: {v}\n"
+            async for event in self.app.astream(initial_state, config=config, stream_mode=["updates"]):
+                event_type = event[0]
+                data = event[1]
+                if event_type == "updates":
+                    for node_name, node_state in data.items():
+                        print(f"--- CALL {node_name} node ---\n")
+                        for k, v in node_state.items():
+                            if v is not None:
+                                print(f"------- {k}, {v} -------\n\n")
+                                if node_name == "agent":
+                                    if v[0].content == "":
+                                        tool_names = []
+                                        for tool_call in v[0].tool_calls:
+                                            tool_names.append(tool_call["name"])
+                                        result = {"tool": tool_names}
+                                    else:
+                                        result = {"content": [v[0].content.replace("\n\n", "\n")]}
+                                    # ui needs this format
+                                    yield f"data: {json.dumps(result)}\n\n"
+                                elif node_name == "tools":
+                                    full_content = v[0].content
+                                    tool_name = v[0].name
+                                    result = {"tool": tool_name, "content": [full_content]}
+                                    yield f"data: {json.dumps(result)}\n\n"
+                                    if not full_content:
+                                        continue
 
-                yield f"data: {repr(event)}\n\n"
             yield "data: [DONE]\n\n"
         except Exception as e:
             yield str(e)
 
     async def non_streaming_run(self, query, config):
         initial_state = self.prepare_initial_state(query)
+        if "tool_choice" in config:
+            initial_state["tool_choice"] = config.pop("tool_choice")
         try:
             async for s in self.app.astream(initial_state, config=config, stream_mode="values"):
                 message = s["messages"][-1]

diff --git a/comps/agent/src/integrations/utils.py b/comps/agent/src/integrations/utils.py
@@ -86,6 +86,14 @@ def tool_renderer(tools):
     return "\n".join(tool_strings)
 
 
+def filter_tools(tools, tools_choices):
+    tool_used = []
+    for tool in tools:
+        if tool.name in tools_choices:
+            tool_used.append(tool)
+    return tool_used
+
+
 def has_multi_tool_inputs(tools):
     ret = False
     for tool in tools:

diff --git a/comps/agent/src/tools/custom_tools.py b/comps/agent/src/tools/custom_tools.py
@@ -4,9 +4,17 @@
 
 # tool for unit test
 def search_web(query: str) -> str:
-    """Search the web for a given query."""
+    """Search the web knowledge for a given query."""
     ret_text = """
     The Linux Foundation AI & Data announced the Open Platform for Enterprise AI (OPEA) as its latest Sandbox Project.
     OPEA aims to accelerate secure, cost-effective generative AI (GenAI) deployments for businesses by driving interoperability across a diverse and heterogeneous ecosystem, starting with retrieval-augmented generation (RAG).
     """
     return ret_text
+
+
+def search_weather(query: str) -> str:
+    """Search the weather for a given query."""
+    ret_text = """
+    It's clear.
+    """
+    return ret_text
diff --git a/comps/asr/deployment/kubernetes/README.md b/comps/asr/deployment/kubernetes/README.md
@@ -0,0 +1,11 @@
+# Deploy ASR microservice on Kubernetes cluster
+
+- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
+- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
+
+## Deploy on Kubernetes
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install asr oci://ghcr.io/opea-project/charts/asr --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
+```
diff --git a/comps/asr/deployment/kubernetes/cpu-values.yaml b/comps/asr/deployment/kubernetes/cpu-values.yaml
@@ -0,0 +1,5 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+whisper:
+  enabled: true
diff --git a/comps/chathistory/deployment/kubernetes/README.md b/comps/chathistory/deployment/kubernetes/README.md
@@ -0,0 +1,11 @@
+# Deploy chathistory microservice on Kubernetes cluster
+
+- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
+- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
+
+## Deploy on Kubernetes
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install chathistory-usvc oci://ghcr.io/opea-project/charts/chathistory-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
+```
diff --git a/comps/chathistory/deployment/kubernetes/cpu-values.yaml b/comps/chathistory/deployment/kubernetes/cpu-values.yaml
@@ -0,0 +1,5 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+mongodb:
+  enabled: true
diff --git a/comps/dataprep/deployment/kubernetes/README.md b/comps/dataprep/deployment/kubernetes/README.md
@@ -0,0 +1,18 @@
+# Deploy dataprep microservice on Kubernetes cluster
+
+- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
+- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).
+
+## Deploy on Kubernetes with redis VectorDB
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install data-prep oci://ghcr.io/opea-project/charts/data-prep --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f redis-values.yaml
+```
+
+## Deploy on Kubernetes with milvus VectorDB
+
+```
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install data-prep oci://ghcr.io/opea-project/charts/data-prep --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f milvus-values.yaml
+```
diff --git a/comps/dataprep/deployment/kubernetes/milvus-values.yaml b/comps/dataprep/deployment/kubernetes/milvus-values.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+milvus:
+  enabled: true
+  cluster:
+    enabled: false
+  etcd:
+    replicaCount: 1
+  pulsar:
+    enabled: false
+  minio:
+    mode: standalone
+redis-vector-db:
+  enabled: false
+tei:
+  enabled: true
+
+image:
+  repository: opea/dataprep-milvus
+
+port: 6010
+# text embedding inference service URL, e.g. http://<service-name>:<port>
+#TEI_EMBEDDING_ENDPOINT: "http://embedding-tei:80"
+# milvus DB configurations
+#MILVUS_HOST: "milvustest"
+MILVUS_PORT: "19530"
+COLLECTION_NAME: "rag_milvus"
+MOSEC_EMBEDDING_ENDPOINT: ""
+MOSEC_EMBEDDING_MODEL: ""
diff --git a/comps/dataprep/deployment/kubernetes/redis-values.yaml b/comps/dataprep/deployment/kubernetes/redis-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+tei:
+  enabled: true
+redis-vector-db:
+  enabled: true
+milvus:
+  enabled: false
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,7 +13,7 @@ on: @@
     jobs:
       build:
-        runs-on: ubuntu-latest
+        runs-on: ubuntu-22.04
         steps:
         - name: Checkout
@@ Expand Down @@