Skip to content

Commit

Permalink
Merge branch 'main' into langchain_opea
Browse files Browse the repository at this point in the history
  • Loading branch information
yogeshmpandey authored Jan 15, 2025
2 parents 6f0935e + 4c21738 commit 15f01a2
Show file tree
Hide file tree
Showing 103 changed files with 2,061 additions and 1,161 deletions.
1 change: 0 additions & 1 deletion .github/workflows/_comps-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ jobs:
fi
if [[ $(grep -c "vllm-gaudi:" ${docker_compose_yml}) != 0 ]]; then
git clone https://github.com/HabanaAI/vllm-fork.git vllm-fork
cd vllm-fork && git checkout 3c39626 && cd ../
fi
- name: Get build list
id: get-build-list
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/check-online-doc-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ on:

jobs:
build:
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:

- name: Checkout
Expand Down
12 changes: 2 additions & 10 deletions .github/workflows/docker/compose/llms-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ services:
build:
dockerfile: comps/llms/text-generation/ollama/langchain/Dockerfile
image: ${REGISTRY:-opea}/llm-ollama:${TAG:-latest}
llm-docsum-tgi:
llm-docsum:
build:
dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
dockerfile: comps/llms/src/doc-summarization/Dockerfile
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
llm-faqgen:
build:
Expand Down Expand Up @@ -46,11 +46,3 @@ services:
build:
dockerfile: comps/llms/utils/lm-eval/Dockerfile
image: ${REGISTRY:-opea}/llm-eval:${TAG:-latest}
llm-textgen-predictionguard:
build:
dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest}
llm-docsum-vllm:
build:
dockerfile: comps/llms/summarization/vllm/langchain/Dockerfile
image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest}
11 changes: 11 additions & 0 deletions comps/agent/deployment/kubernetes/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Deploy Agent microservice on Kubernetes cluster

- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).

## Deploy on Kubernetes

```
export HFTOKEN="insert-your-huggingface-token-here"
helm install agent oci://ghcr.io/opea-project/charts/agent --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml
```
38 changes: 38 additions & 0 deletions comps/agent/deployment/kubernetes/gaudi-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Accelerate inferencing in heaviest components to improve performance
# by overriding their subchart values

tgi:
enabled: true
accelDevice: "gaudi"
image:
repository: ghcr.io/huggingface/tgi-gaudi
tag: "2.0.6"
resources:
limits:
habana.ai/gaudi: 4
MAX_INPUT_LENGTH: "4096"
MAX_TOTAL_TOKENS: "8192"
CUDA_GRAPHS: ""
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
ENABLE_HPU_GRAPH: "true"
LIMIT_HPU_GRAPH: "true"
USE_FLASH_ATTENTION: "true"
FLASH_ATTENTION_RECOMPUTE: "true"
extraCmdArgs: ["--sharded","true","--num-shard","4"]
livenessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
startupProbe:
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 1
failureThreshold: 120
41 changes: 21 additions & 20 deletions comps/agent/src/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pathlib
import sys
from datetime import datetime
from typing import Union
from typing import List, Optional, Union

from fastapi.responses import StreamingResponse

Expand Down Expand Up @@ -40,7 +40,10 @@
agent_inst = instantiate_agent(args, args.strategy, with_memory=args.with_memory)


class AgentCompletionRequest(LLMParamsDoc):
class AgentCompletionRequest(ChatCompletionRequest):
# rewrite, specify tools in this turn of conversation
tool_choice: Optional[List[str]] = None
# for short/long term in-memory
thread_id: str = "0"
user_id: str = "0"

Expand All @@ -52,42 +55,40 @@ class AgentCompletionRequest(LLMParamsDoc):
host="0.0.0.0",
port=args.port,
)
async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, AgentCompletionRequest]):
async def llm_generate(input: AgentCompletionRequest):
if logflag:
logger.info(input)

input.stream = args.stream
config = {"recursion_limit": args.recursion_limit}
# don't use global stream setting
# input.stream = args.stream
config = {"recursion_limit": args.recursion_limit, "tool_choice": input.tool_choice}

if args.with_memory:
if isinstance(input, AgentCompletionRequest):
config["configurable"] = {"thread_id": input.thread_id}
else:
config["configurable"] = {"thread_id": "0"}
config["configurable"] = {"thread_id": input.thread_id}

if logflag:
logger.info(type(agent_inst))

if isinstance(input, LLMParamsDoc):
# use query as input
input_query = input.query
# openai compatible input
if isinstance(input.messages, str):
messages = input.messages
else:
# openai compatible input
if isinstance(input.messages, str):
input_query = input.messages
else:
input_query = input.messages[-1]["content"]
# TODO: need handle multi-turn messages
messages = input.messages[-1]["content"]

# 2. prepare the input for the agent
if input.stream:
logger.info("-----------STREAMING-------------")
return StreamingResponse(agent_inst.stream_generator(input_query, config), media_type="text/event-stream")
return StreamingResponse(
agent_inst.stream_generator(messages, config),
media_type="text/event-stream",
)

else:
logger.info("-----------NOT STREAMING-------------")
response = await agent_inst.non_streaming_run(input_query, config)
response = await agent_inst.non_streaming_run(messages, config)
logger.info("-----------Response-------------")
return GeneratedDoc(text=response, prompt=input_query)
return GeneratedDoc(text=response, prompt=messages)


@register_microservice(
Expand Down
50 changes: 40 additions & 10 deletions comps/agent/src/integrations/strategy/react/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from langgraph.prebuilt import create_react_agent

from ...global_var import threads_global_kv
from ...utils import has_multi_tool_inputs, tool_renderer
from ...utils import filter_tools, has_multi_tool_inputs, tool_renderer
from ..base_agent import BaseAgent
from .prompt import REACT_SYS_MESSAGE, hwchase17_react_prompt

Expand Down Expand Up @@ -136,7 +136,8 @@ async def non_streaming_run(self, query, config):
# does not rely on langchain bind_tools API
# since tgi and vllm still do not have very good support for tool calling like OpenAI

from typing import Annotated, Sequence, TypedDict
import json
from typing import Annotated, List, Optional, Sequence, TypedDict

from langchain_core.messages import AIMessage, BaseMessage
from langchain_core.prompts import PromptTemplate
Expand All @@ -154,6 +155,7 @@ class AgentState(TypedDict):
"""The state of the agent."""

messages: Annotated[Sequence[BaseMessage], add_messages]
tool_choice: Optional[List[str]] = None
is_last_step: IsLastStep


Expand Down Expand Up @@ -191,7 +193,11 @@ def __call__(self, state):
history = assemble_history(messages)
print("@@@ History: ", history)

tools_descriptions = tool_renderer(self.tools)
tools_used = self.tools
if state["tool_choice"] is not None:
tools_used = filter_tools(self.tools, state["tool_choice"])

tools_descriptions = tool_renderer(tools_used)
print("@@@ Tools description: ", tools_descriptions)

# invoke chain
Expand Down Expand Up @@ -279,21 +285,45 @@ def prepare_initial_state(self, query):

async def stream_generator(self, query, config):
initial_state = self.prepare_initial_state(query)
if "tool_choice" in config:
initial_state["tool_choice"] = config.pop("tool_choice")

try:
async for event in self.app.astream(initial_state, config=config):
for node_name, node_state in event.items():
yield f"--- CALL {node_name} ---\n"
for k, v in node_state.items():
if v is not None:
yield f"{k}: {v}\n"
async for event in self.app.astream(initial_state, config=config, stream_mode=["updates"]):
event_type = event[0]
data = event[1]
if event_type == "updates":
for node_name, node_state in data.items():
print(f"--- CALL {node_name} node ---\n")
for k, v in node_state.items():
if v is not None:
print(f"------- {k}, {v} -------\n\n")
if node_name == "agent":
if v[0].content == "":
tool_names = []
for tool_call in v[0].tool_calls:
tool_names.append(tool_call["name"])
result = {"tool": tool_names}
else:
result = {"content": [v[0].content.replace("\n\n", "\n")]}
# ui needs this format
yield f"data: {json.dumps(result)}\n\n"
elif node_name == "tools":
full_content = v[0].content
tool_name = v[0].name
result = {"tool": tool_name, "content": [full_content]}
yield f"data: {json.dumps(result)}\n\n"
if not full_content:
continue

yield f"data: {repr(event)}\n\n"
yield "data: [DONE]\n\n"
except Exception as e:
yield str(e)

async def non_streaming_run(self, query, config):
initial_state = self.prepare_initial_state(query)
if "tool_choice" in config:
initial_state["tool_choice"] = config.pop("tool_choice")
try:
async for s in self.app.astream(initial_state, config=config, stream_mode="values"):
message = s["messages"][-1]
Expand Down
8 changes: 8 additions & 0 deletions comps/agent/src/integrations/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@ def tool_renderer(tools):
return "\n".join(tool_strings)


def filter_tools(tools, tools_choices):
tool_used = []
for tool in tools:
if tool.name in tools_choices:
tool_used.append(tool)
return tool_used


def has_multi_tool_inputs(tools):
ret = False
for tool in tools:
Expand Down
10 changes: 9 additions & 1 deletion comps/agent/src/tools/custom_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,17 @@

# tool for unit test
def search_web(query: str) -> str:
"""Search the web for a given query."""
"""Search the web knowledge for a given query."""
ret_text = """
The Linux Foundation AI & Data announced the Open Platform for Enterprise AI (OPEA) as its latest Sandbox Project.
OPEA aims to accelerate secure, cost-effective generative AI (GenAI) deployments for businesses by driving interoperability across a diverse and heterogeneous ecosystem, starting with retrieval-augmented generation (RAG).
"""
return ret_text


def search_weather(query: str) -> str:
"""Search the weather for a given query."""
ret_text = """
It's clear.
"""
return ret_text
11 changes: 11 additions & 0 deletions comps/asr/deployment/kubernetes/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Deploy ASR microservice on Kubernetes cluster

- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).

## Deploy on Kubernetes

```
export HFTOKEN="insert-your-huggingface-token-here"
helm install asr oci://ghcr.io/opea-project/charts/asr --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
```
5 changes: 5 additions & 0 deletions comps/asr/deployment/kubernetes/cpu-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

whisper:
enabled: true
11 changes: 11 additions & 0 deletions comps/chathistory/deployment/kubernetes/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Deploy chathistory microservice on Kubernetes cluster

- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).

## Deploy on Kubernetes

```
export HFTOKEN="insert-your-huggingface-token-here"
helm install chathistory-usvc oci://ghcr.io/opea-project/charts/chathistory-usvc --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml
```
5 changes: 5 additions & 0 deletions comps/chathistory/deployment/kubernetes/cpu-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

mongodb:
enabled: true
18 changes: 18 additions & 0 deletions comps/dataprep/deployment/kubernetes/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Deploy dataprep microservice on Kubernetes cluster

- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information.
- For more deployment options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme).

## Deploy on Kubernetes with redis VectorDB

```
export HFTOKEN="insert-your-huggingface-token-here"
helm install data-prep oci://ghcr.io/opea-project/charts/data-prep --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f redis-values.yaml
```

## Deploy on Kubernetes with milvus VectorDB

```
export HFTOKEN="insert-your-huggingface-token-here"
helm install data-prep oci://ghcr.io/opea-project/charts/data-prep --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f milvus-values.yaml
```
30 changes: 30 additions & 0 deletions comps/dataprep/deployment/kubernetes/milvus-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

milvus:
enabled: true
cluster:
enabled: false
etcd:
replicaCount: 1
pulsar:
enabled: false
minio:
mode: standalone
redis-vector-db:
enabled: false
tei:
enabled: true

image:
repository: opea/dataprep-milvus

port: 6010
# text embedding inference service URL, e.g. http://<service-name>:<port>
#TEI_EMBEDDING_ENDPOINT: "http://embedding-tei:80"
# milvus DB configurations
#MILVUS_HOST: "milvustest"
MILVUS_PORT: "19530"
COLLECTION_NAME: "rag_milvus"
MOSEC_EMBEDDING_ENDPOINT: ""
MOSEC_EMBEDDING_MODEL: ""
9 changes: 9 additions & 0 deletions comps/dataprep/deployment/kubernetes/redis-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

tei:
enabled: true
redis-vector-db:
enabled: true
milvus:
enabled: false
Loading

0 comments on commit 15f01a2

Please sign in to comment.