diff --git a/comps/agent/langchain/README.md b/comps/agent/langchain/README.md
index b48e393eb..bc694cdf1 100644
--- a/comps/agent/langchain/README.md
+++ b/comps/agent/langchain/README.md
@@ -1,39 +1,70 @@
-# langchain Agent Microservice
+# Agent Microservice
 
-The langchain agent model refers to a framework that integrates the reasoning capabilities of large language models (LLMs) with the ability to take actionable steps, creating a more sophisticated system that can understand and process information, evaluate situations, take appropriate actions, communicate responses, and track ongoing situations.
+## 1. Overview
 
-![Architecture Overview](agent_arch.jpg)
+This agent microservice is built on Langchain/Langgraph frameworks. Agents integrate the reasoning capabilities of large language models (LLMs) with the ability to take actionable steps, creating a more sophisticated system that can understand and process information, evaluate situations, take appropriate actions, communicate responses, and track ongoing situations.
+
+### 1.1 Supported agent types
+
+We currently support the following types of agents:
+
+1. ReAct: use `react_langchain` or `react_langgraph` as strategy. First introduced in this seminal [paper](https://arxiv.org/abs/2210.03629). The ReAct agent engages in "reason-act-observe" cycles to solve problems. Please refer to this [doc](https://python.langchain.com/v0.2/docs/how_to/migrate_agent/) to understand the differences between the langchain and langgraph versions of react agents.
+2. RAG agent: `rag_agent` strategy. This agent is specifically designed for improving RAG performance. It has the capability to rephrase query, check relevancy of retrieved context, and iterate if context is not relevant.
+3. Plan and execute: `plan_execute` strategy. This type of agent first makes a step-by-step plan given a user request, and then execute the plan sequentially (or in parallel, to be implemented in future). If the execution results can solve the problem, then the agent will output an answer; otherwise, it will replan and execute again.
+   For advanced developers who want to implement their own agent strategies, please refer to [Section 5](#5-customize-agent-strategy) below.
+
+### 1.2 LLM engine
+
+Agents use LLM for reasoning and planning. We support 2 options of LLM engine:
+
+1. Open-source LLMs served with TGI-gaudi. To use open-source llms, follow the instructions in [Section 2](#222-start-microservices) below. Note: we recommend using state-of-the-art LLMs, such as llama3.1-70B-instruct, to get higher success rate.
+2. OpenAI LLMs via API calls. To use OpenAI llms, specify `llm_engine=openai` and `export OPENAI_API_KEY=<your-openai-key>`
+
+### 1.3 Tools
 
-## 🚀1. Start Microservice with Python（Option 1）
+The tools are registered with a yaml file. We support the following types of tools:
 
-### 1.1 Install Requirements
+1. Endpoint: user to provide url
+2. User-defined python functions. This is usually used to wrap endpoints with request post or simple pre/post-processing.
+3. Langchain tool modules.
+   Examples of how to register tools can be found in [Section 4](#-4-provide-your-own-tools) below.
+
+### 1.4 Agent APIs
+
+Currently we have implemented OpenAI chat completion compatible API for agents. We are working to support OpenAI assistants APIs.
+
+# 🚀2. Start Agent Microservice
+
+## 2.1 Option 1: with Python
+
+### 2.1.1 Install Requirements
 
 ```bash
 cd comps/agent/langchain/
 pip install -r requirements.txt
 ```
 
-### 1.2 Start Microservice with Python Script
+### 2.1.2 Start Microservice with Python Script
 
 ```bash
 cd comps/agent/langchain/
 python agent.py
 ```
 
-## 🚀2. Start Microservice with Docker (Option 2)
+## 2.2 Option 2. Start Microservice with Docker
 
-### Build Microservices
+### 2.2.1 Build Microservices
 
 ```bash
 cd GenAIComps/ # back to GenAIComps/ folder
 docker build -t opea/comps-agent-langchain:latest -f comps/agent/langchain/docker/Dockerfile .
 ```
 
-### start microservices
+### 2.2.2 Start microservices
 
 ```bash
 export ip_address=$(hostname -I | awk '{print $1}')
-export model=meta-llama/Meta-Llama-3-8B-Instruct
+export model=mistralai/Mistral-7B-Instruct-v0.3
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 
@@ -53,10 +84,10 @@ docker logs comps-langchain-agent-endpoint
 > debug mode
 >
 > ```bash
-> docker run --rm --runtime=runc --name="comps-langchain-agent-endpoint" -v ./comps/agent/langchain/:/home/user/comps/agent/langchain/ -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} --env-file ${agent_env} opea/comps-agent-langchain:latest
+> docker run --rm --runtime=runc --name="comps-langchain-agent-endpoint" -v ./comps/agent/langchain/:/home/user/comps/agent/langchain/ -p 9090:9090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e ip_address=${ip_address} -e strategy=react -e llm_endpoint_url=http://${ip_address}:8080 -e llm_engine=tgi -e recursion_limit=5 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:latest
 > ```
 
-## 🚀3. Validate Microservice
+# 🚀 3. Validate Microservice
 
 Once microservice starts, user can use below script to invoke.
 
@@ -73,7 +104,7 @@ data: [DONE]
 
 ```
 
-## 🚀4. Provide your own tools
+# 🚀 4. Provide your own tools
 
 - Define tools
 
@@ -148,3 +179,8 @@ data: 'The weather information in Austin is not available from the Open Platform
 
 data: [DONE]
 ```
+
+# 5. Customize agent strategy
+
+For advanced developers who want to implement their own agent strategies, you can add a separate folder in `src\strategy`, implement your agent by inherit the `BaseAgent` class, and add your strategy into the `src\agent.py`. The architecture of this agent microservice is shown in the diagram below as a reference.
+![Architecture Overview](agent_arch.jpg)
diff --git a/comps/agent/langchain/agent.py b/comps/agent/langchain/agent.py
index 4d946eda7..fffdc8765 100644
--- a/comps/agent/langchain/agent.py
+++ b/comps/agent/langchain/agent.py
@@ -12,7 +12,7 @@
 comps_path = os.path.join(cur_path, "../../../")
 sys.path.append(comps_path)
 
-from comps import LLMParamsDoc, ServiceType, opea_microservices, register_microservice
+from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice
 from comps.agent.langchain.src.agent import instantiate_agent
 from comps.agent.langchain.src.utils import get_args
 
@@ -27,20 +27,26 @@
     port=args.port,
     input_datatype=LLMParamsDoc,
 )
-def llm_generate(input: LLMParamsDoc):
+async def llm_generate(input: LLMParamsDoc):
     # 1. initialize the agent
     print("args: ", args)
+    input.streaming = args.streaming
     config = {"recursion_limit": args.recursion_limit}
     agent_inst = instantiate_agent(args, args.strategy)
     print(type(agent_inst))
 
     # 2. prepare the input for the agent
     if input.streaming:
+        print("-----------STREAMING-------------")
         return StreamingResponse(agent_inst.stream_generator(input.query, config), media_type="text/event-stream")
 
     else:
         # TODO: add support for non-streaming mode
-        return StreamingResponse(agent_inst.stream_generator(input.query, config), media_type="text/event-stream")
+        print("-----------NOT STREAMING-------------")
+        response = await agent_inst.non_streaming_run(input.query, config)
+        print("-----------Response-------------")
+        print(response)
+        return GeneratedDoc(text=response, prompt=input.query)
 
 
 if __name__ == "__main__":
diff --git a/comps/agent/langchain/requirements.txt b/comps/agent/langchain/requirements.txt
index fa1c4423b..1da88cb85 100644
--- a/comps/agent/langchain/requirements.txt
+++ b/comps/agent/langchain/requirements.txt
@@ -4,7 +4,7 @@ docarray[full]
 #used by tools
 duckduckgo-search
 fastapi
-huggingface_hub
+huggingface_hub==0.24.0
 langchain #==0.1.12
 langchain-huggingface
 langchain-openai
diff --git a/comps/agent/langchain/src/agent.py b/comps/agent/langchain/src/agent.py
index 4c20a3858..55a50e81d 100644
--- a/comps/agent/langchain/src/agent.py
+++ b/comps/agent/langchain/src/agent.py
@@ -2,20 +2,23 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-def instantiate_agent(args, strategy="react"):
-    if strategy == "react":
+def instantiate_agent(args, strategy="react_langchain"):
+    if strategy == "react_langchain":
         from .strategy.react import ReActAgentwithLangchain
 
         return ReActAgentwithLangchain(args)
+    elif strategy == "react_langgraph":
+        from .strategy.react import ReActAgentwithLanggraph
+
+        return ReActAgentwithLanggraph(args)
     elif strategy == "plan_execute":
         from .strategy.planexec import PlanExecuteAgentWithLangGraph
 
         return PlanExecuteAgentWithLangGraph(args)
-    elif strategy == "agentic_rag":
-        from .strategy.agentic_rag import RAGAgentwithLanggraph
 
-        return RAGAgentwithLanggraph(args)
-    else:
-        from .strategy.base_agent import BaseAgent, BaseAgentState
+    elif strategy == "rag_agent":
+        from .strategy.ragagent import RAGAgent
 
-        return BaseAgent(args)
+        return RAGAgent(args)
+    else:
+        raise ValueError(f"Agent strategy: {strategy} not supported!")
diff --git a/comps/agent/langchain/src/strategy/agentic_rag/README.md b/comps/agent/langchain/src/strategy/agentic_rag/README.md
deleted file mode 100644
index 3d6e341a5..000000000
--- a/comps/agent/langchain/src/strategy/agentic_rag/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# Agentic Rag
-
-This strategy is a practise provided with [LangGraph](https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_agentic_rag)
-This agent strategy includes steps listed below:
-
-1. RagAgent
-   decide if this query need to get extra help
-
-   - Yes: Goto 'Retriever'
-   - No: Complete the query with Final answer
-
-2. Retriever:
-
-   - Get relative Info from tools, Goto 'DocumentGrader'
-
-3. DocumentGrader
-   Judge retrieved info relevance based query
-
-   - Yes: Complete the query with Final answer
-   - No: Goto 'Rewriter'
-
-4. Rewriter
-   Rewrite the query and Goto 'RagAgent'
-
-![Agentic Rag Workflow](https://blog.langchain.dev/content/images/size/w1000/2024/02/image-16.png)
diff --git a/comps/agent/langchain/src/strategy/agentic_rag/prompt.py b/comps/agent/langchain/src/strategy/agentic_rag/prompt.py
deleted file mode 100644
index 1f68db32e..000000000
--- a/comps/agent/langchain/src/strategy/agentic_rag/prompt.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
-
-rlm_rag_prompt = ChatPromptTemplate.from_messages(
-    [
-        (
-            "human",
-            "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.Question: {question} Context: {context} Answer:",
-        ),
-    ]
-)
-
-hwchase17_react_prompt = PromptTemplate.from_template(
-    "Answer the following questions as best you can. You have access to the following tools:\n\n{tools}\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: {input}\nThought:{agent_scratchpad}"
-)
diff --git a/comps/agent/langchain/src/strategy/base_agent.py b/comps/agent/langchain/src/strategy/base_agent.py
index a0e35e912..f9e8fed9e 100644
--- a/comps/agent/langchain/src/strategy/base_agent.py
+++ b/comps/agent/langchain/src/strategy/base_agent.py
@@ -17,3 +17,6 @@ def compile(self):
 
     def execute(self, state: dict):
         pass
+
+    def non_streaming_run(self, query, config):
+        raise NotImplementedError
diff --git a/comps/agent/langchain/src/strategy/ragagent/README.md b/comps/agent/langchain/src/strategy/ragagent/README.md
new file mode 100644
index 000000000..23114e1f9
--- /dev/null
+++ b/comps/agent/langchain/src/strategy/ragagent/README.md
@@ -0,0 +1,31 @@
+# RAG Agent
+
+This agent is specifically designed to improve answer quality over conventional RAG.
+This agent strategy includes steps listed below:
+
+1. QueryWriter
+   This is an llm with tool calling capability, it decides if tool calls are needed to answer the user query or it can answer with llm's parametric knowledge.
+
+   - Yes: Rephrase the query in the form of a tool call to the Retriever tool, and send the rephrased query to 'Retriever'. The rephrasing is important as user queries may be not be clear and simply using user query may not retrieve relevant documents.
+   - No: Complete the query with Final answer
+
+2. Retriever:
+
+   - Get related documents from a retrieval tool, then send the documents to 'DocumentGrader'. Note: The retrieval tool here is broad-sense, which can be a text retriever over a proprietary knowledge base, a websearch API, knowledge graph API, SQL database API etc.
+
+3. DocumentGrader
+   Judge retrieved info relevance with respect to the user query
+
+   - Yes: Go to TextGenerator
+   - No: Go back to QueryWriter to rewrite query.
+
+4. TextGenerator
+   - Generate an answer based on query and last retrieved context.
+   - After generation, go to END.
+
+Note:
+
+- Currently the performance of this RAG agent has been tested and validated with only one retrieval tool. If you want to use multiple retrieval tools, we recommend a hierarchical multi-agent system where a supervisor agent dispatches requests to multiple worker RAG agents, where individual worker RAG agents uses one type of retrieval tool.
+- The max number of retrieves is set at 3.
+- You can specify a small `recursion_limit` to stop early or a big `recursion_limit` to fully use the 3 retrieves.
+- The TextGenerator only looks at the last retrieved docs.
diff --git a/comps/agent/langchain/src/strategy/agentic_rag/__init__.py b/comps/agent/langchain/src/strategy/ragagent/__init__.py
similarity index 64%
rename from comps/agent/langchain/src/strategy/agentic_rag/__init__.py
rename to comps/agent/langchain/src/strategy/ragagent/__init__.py
index 8ed6f0281..f369d1928 100644
--- a/comps/agent/langchain/src/strategy/agentic_rag/__init__.py
+++ b/comps/agent/langchain/src/strategy/ragagent/__init__.py
@@ -1,4 +1,4 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-from .planner import RAGAgentwithLanggraph
+from .planner import RAGAgent
diff --git a/comps/agent/langchain/src/strategy/agentic_rag/planner.py b/comps/agent/langchain/src/strategy/ragagent/planner.py
similarity index 52%
rename from comps/agent/langchain/src/strategy/agentic_rag/planner.py
rename to comps/agent/langchain/src/strategy/ragagent/planner.py
index 18ab42083..1a60f3ca2 100644
--- a/comps/agent/langchain/src/strategy/agentic_rag/planner.py
+++ b/comps/agent/langchain/src/strategy/ragagent/planner.py
@@ -4,18 +4,22 @@
 from typing import Annotated, Any, Literal, Sequence, TypedDict
 
 from langchain.output_parsers import PydanticOutputParser
-from langchain_core.messages import BaseMessage, HumanMessage
+from langchain_core.messages import BaseMessage, HumanMessage, ToolMessage
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.output_parsers.openai_tools import PydanticToolsParser
 from langchain_core.prompts import PromptTemplate
 from langchain_core.pydantic_v1 import BaseModel, Field
-from langchain_huggingface import ChatHuggingFace
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
+from langchain_openai import ChatOpenAI
 from langgraph.graph import END, START, StateGraph
 from langgraph.graph.message import add_messages
 from langgraph.prebuilt import ToolNode, tools_condition
 
 from ..base_agent import BaseAgent
-from .prompt import rlm_rag_prompt
+from .prompt import DOC_GRADER_PROMPT, RAG_PROMPT
+
+instruction = "Retrieved document is not sufficient or relevant to answer the query. Reformulate the query to search knowledge base again."
+MAX_RETRY = 3
 
 
 class AgentState(TypedDict):
@@ -23,6 +27,40 @@ class AgentState(TypedDict):
     # Default is to replace. add_messages says "append"
     messages: Annotated[Sequence[BaseMessage], add_messages]
     output: str
+    doc_score: str
+    query_time: str
+
+
+class QueryWriter:
+    """Invokes llm to generate a response based on the current state. Given
+    the question, it will decide to retrieve using the retriever tool, or simply end.
+
+    Args:
+        state (messages): The current state
+
+    Returns:
+        dict: The updated state with the response appended to messages
+    """
+
+    def __init__(self, llm_endpoint, model_id, tools):
+        if isinstance(llm_endpoint, HuggingFaceEndpoint):
+            self.llm = ChatHuggingFace(llm=llm_endpoint, model_id=model_id).bind_tools(tools)
+        elif isinstance(llm_endpoint, ChatOpenAI):
+            self.llm = llm_endpoint.bind_tools(tools)
+
+    def __call__(self, state):
+        print("---CALL QueryWriter---")
+        messages = state["messages"]
+
+        response = self.llm.invoke(messages)
+        # We return a list, because this will get added to the existing list
+        return {"messages": [response], "output": response}
+
+
+class Retriever:
+    @classmethod
+    def create(cls, tools_descriptions):
+        return ToolNode(tools_descriptions)
 
 
 class DocumentGrader:
@@ -43,24 +81,23 @@ class grade(BaseModel):
 
         # Prompt
         prompt = PromptTemplate(
-            template="""You are a grader assessing relevance of a retrieved document to a user question. \n
-            Here is the retrieved document: \n\n {context} \n\n
-            Here is the user question: {question} \n
-            If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
-            Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.""",
+            template=DOC_GRADER_PROMPT,
             input_variables=["context", "question"],
         )
 
-        llm = ChatHuggingFace(llm=llm_endpoint, model_id=model_id).bind_tools([grade])
+        if isinstance(llm_endpoint, HuggingFaceEndpoint):
+            llm = ChatHuggingFace(llm=llm_endpoint, model_id=model_id).bind_tools([grade])
+        elif isinstance(llm_endpoint, ChatOpenAI):
+            llm = llm_endpoint.bind_tools([grade])
         output_parser = PydanticToolsParser(tools=[grade], first_tool_only=True)
         self.chain = prompt | llm | output_parser
 
     def __call__(self, state) -> Literal["generate", "rewrite"]:
         print("---CALL DocumentGrader---")
         messages = state["messages"]
-        last_message = messages[-1]
+        last_message = messages[-1]  # the latest retrieved doc
 
-        question = messages[0].content
+        question = messages[0].content  # the original query
         docs = last_message.content
 
         scored_result = self.chain.invoke({"question": question, "context": docs})
@@ -69,74 +106,12 @@ def __call__(self, state) -> Literal["generate", "rewrite"]:
 
         if score.startswith("yes"):
             print("---DECISION: DOCS RELEVANT---")
-            return "generate"
+            return {"doc_score": "generate"}
 
         else:
             print(f"---DECISION: DOCS NOT RELEVANT, score is {score}---")
-            return "rewrite"
-
-
-class RagAgent:
-    """Invokes the agent model to generate a response based on the current state. Given
-    the question, it will decide to retrieve using the retriever tool, or simply end.
-
-    Args:
-        state (messages): The current state
-
-    Returns:
-        dict: The updated state with the agent response appended to messages
-    """
-
-    def __init__(self, llm_endpoint, model_id, tools):
-        self.llm = ChatHuggingFace(llm=llm_endpoint, model_id=model_id).bind_tools(tools)
-
-    def __call__(self, state):
-        print("---CALL RagAgent---")
-        messages = state["messages"]
-
-        response = self.llm.invoke(messages)
-        # We return a list, because this will get added to the existing list
-        return {"messages": [response], "output": response}
-
-
-class Retriever:
-    @classmethod
-    def create(cls, tools_descriptions):
-        return ToolNode(tools_descriptions)
-
-
-class Rewriter:
-    """Transform the query to produce a better question.
-
-    Args:
-        state (messages): The current state
-
-    Returns:
-        dict: The updated state with re-phrased question
-    """
-
-    def __init__(self, llm_endpoint):
-        self.llm = llm_endpoint
-
-    def __call__(self, state):
-        print("---TRANSFORM QUERY---")
-        messages = state["messages"]
-        question = messages[0].content
-
-        msg = [
-            HumanMessage(
-                content=f""" \n
-        Look at the input and try to reason about the underlying semantic intent / meaning. \n
-        Here is the initial question:
-        \n ------- \n
-        {question}
-        \n ------- \n
-        Formulate an improved question: """,
-            )
-        ]
 
-        response = self.llm.invoke(msg)
-        return {"messages": [response]}
+            return {"messages": [HumanMessage(content=instruction)], "doc_score": "rewrite"}
 
 
 class TextGenerator:
@@ -151,77 +126,121 @@ class TextGenerator:
 
     def __init__(self, llm_endpoint, model_id=None):
         # Chain
-        prompt = rlm_rag_prompt
+        # prompt = rlm_rag_prompt
+        prompt = RAG_PROMPT
         self.rag_chain = prompt | llm_endpoint | StrOutputParser()
 
     def __call__(self, state):
         print("---GENERATE---")
         messages = state["messages"]
         question = messages[0].content
-        last_message = messages[-1]
+        query_time = state["query_time"]
+
+        # find the latest retrieved doc
+        # which is a ToolMessage
+        for m in state["messages"][::-1]:
+            if isinstance(m, ToolMessage):
+                last_message = m
+                break
 
         question = messages[0].content
         docs = last_message.content
 
         # Run
-        response = self.rag_chain.invoke({"context": docs, "question": question})
-        return {"output": response}
+        response = self.rag_chain.invoke({"context": docs, "question": question, "time": query_time})
+        print("@@@@ Used this doc for generation:\n", docs)
+        print("@@@@ Generated response: ", response)
+        return {"messages": [response], "output": response}
 
 
-class RAGAgentwithLanggraph(BaseAgent):
+class RAGAgent(BaseAgent):
     def __init__(self, args):
         super().__init__(args)
 
         # Define Nodes
         document_grader = DocumentGrader(self.llm_endpoint, args.model)
-        rag_agent = RagAgent(self.llm_endpoint, args.model, self.tools_descriptions)
-        retriever = Retriever.create(self.tools_descriptions)
-        rewriter = Rewriter(self.llm_endpoint)
+        query_writer = QueryWriter(self.llm_endpoint, args.model, self.tools_descriptions)
         text_generator = TextGenerator(self.llm_endpoint)
+        retriever = Retriever.create(self.tools_descriptions)
 
         # Define graph
         workflow = StateGraph(AgentState)
 
         # Define the nodes we will cycle between
-        workflow.add_node("agent", rag_agent)
+        workflow.add_node("query_writer", query_writer)
         workflow.add_node("retrieve", retriever)
-        workflow.add_node("rewrite", rewriter)
+        workflow.add_node("doc_grader", document_grader)
         workflow.add_node("generate", text_generator)
 
         # connect as graph
-        workflow.add_edge(START, "agent")
+        workflow.add_edge(START, "query_writer")
         workflow.add_conditional_edges(
-            "agent",
+            "query_writer",
             tools_condition,
             {
                 "tools": "retrieve",  # if tools_condition return 'tools', then go to 'retrieve'
                 END: END,  # if tools_condition return 'END', then go to END
             },
         )
+
+        workflow.add_edge("retrieve", "doc_grader")
+
         workflow.add_conditional_edges(
-            "retrieve",
-            document_grader,
+            "doc_grader",
+            self.should_retry,
             {
-                "generate": "generate",  # if tools_condition return 'generate', then go to 'generate' node
-                "rewrite": "rewrite",  # if tools_condition return 'rewrite', then go to 'rewrite' node
+                False: "generate",
+                True: "query_writer",
             },
         )
         workflow.add_edge("generate", END)
-        workflow.add_edge("rewrite", "agent")
 
         self.app = workflow.compile()
 
+    def should_retry(self, state):
+        # first check how many retry attempts have been made
+        num_retry = 0
+        for m in state["messages"]:
+            if instruction in m.content:
+                num_retry += 1
+
+        print("**********Num retry: ", num_retry)
+
+        if (num_retry < MAX_RETRY) and (state["doc_score"] == "rewrite"):
+            return True
+        else:
+            return False
+
     def prepare_initial_state(self, query):
         return {"messages": [HumanMessage(content=query)]}
 
     async def stream_generator(self, query, config):
         initial_state = self.prepare_initial_state(query)
-        async for event in self.app.astream(initial_state, config=config):
-            for node_name, node_state in event.items():
-                yield f"--- CALL {node_name} ---\n"
-                for k, v in node_state.items():
-                    if v is not None:
-                        yield f"{k}: {v}\n"
-
-            yield f"data: {repr(event)}\n\n"
-        yield "data: [DONE]\n\n"
+        try:
+            async for event in self.app.astream(initial_state, config=config):
+                for node_name, node_state in event.items():
+                    yield f"--- CALL {node_name} ---\n"
+                    for k, v in node_state.items():
+                        if v is not None:
+                            yield f"{k}: {v}\n"
+
+                yield f"data: {repr(event)}\n\n"
+            yield "data: [DONE]\n\n"
+        except Exception as e:
+            yield str(e)
+
+    async def non_streaming_run(self, query, config):
+        initial_state = self.prepare_initial_state(query)
+        try:
+            async for s in self.app.astream(initial_state, config=config, stream_mode="values"):
+                message = s["messages"][-1]
+                if isinstance(message, tuple):
+                    print(message)
+                else:
+                    message.pretty_print()
+
+            last_message = s["messages"][-1]
+            print("******Response: ", last_message.content)
+            return last_message.content
+        except Exception as e:
+            return str(e)
diff --git a/comps/agent/langchain/src/strategy/ragagent/prompt.py b/comps/agent/langchain/src/strategy/ragagent/prompt.py
new file mode 100644
index 000000000..bf12422ec
--- /dev/null
+++ b/comps/agent/langchain/src/strategy/ragagent/prompt.py
@@ -0,0 +1,36 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
+
+DOC_GRADER_PROMPT = """\
+Given the QUERY, determine if a relevant answer can be derived from the DOCUMENT.\n
+QUERY: {question} \n
+DOCUMENT:\n{context}\n\n
+Give score 'yes' if the document provides sufficient and relevant information to answer the question. Otherwise, give score 'no'. ONLY answer with 'yes' or 'no'. NOTHING ELSE."""
+
+
+PROMPT = """\
+### You are a helpful, respectful and honest assistant.
+You are given a Question and the time when it was asked in the Pacific Time Zone (PT), referred to as "Query
+Time". The query time is formatted as "mm/dd/yyyy, hh:mm:ss PT".
+Please follow these guidelines when formulating your answer:
+1. If the question contains a false premise or assumption, answer “invalid question”.
+2. If you are uncertain or do not know the answer, respond with “I don’t know”.
+3. Refer to the search results to form your answer.
+4. Give concise, factual and relevant answers.
+
+### Search results: {context} \n
+### Question: {question} \n
+### Query Time: {time} \n
+### Answer:
+"""
+
+RAG_PROMPT = ChatPromptTemplate.from_messages(
+    [
+        (
+            "human",
+            PROMPT,
+        ),
+    ]
+)
diff --git a/comps/agent/langchain/src/strategy/react/__init__.py b/comps/agent/langchain/src/strategy/react/__init__.py
index 63f79e32a..ad1aa456e 100644
--- a/comps/agent/langchain/src/strategy/react/__init__.py
+++ b/comps/agent/langchain/src/strategy/react/__init__.py
@@ -2,3 +2,4 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from .planner import ReActAgentwithLangchain
+from .planner import ReActAgentwithLanggraph
diff --git a/comps/agent/langchain/src/strategy/react/planner.py b/comps/agent/langchain/src/strategy/react/planner.py
index 58cc70104..93d185493 100644
--- a/comps/agent/langchain/src/strategy/react/planner.py
+++ b/comps/agent/langchain/src/strategy/react/planner.py
@@ -1,11 +1,16 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-from langchain.agents import AgentExecutor, create_react_agent
+from langchain.agents import AgentExecutor
+from langchain.agents import create_react_agent as create_react_langchain_agent
+from langchain_core.messages import HumanMessage
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
+from langchain_openai import ChatOpenAI
+from langgraph.prebuilt import create_react_agent
 
 from ...utils import has_multi_tool_inputs, tool_renderer
 from ..base_agent import BaseAgent
-from .prompt import hwchase17_react_prompt
+from .prompt import REACT_SYS_MESSAGE, hwchase17_react_prompt
 
 
 class ReActAgentwithLangchain(BaseAgent):
@@ -13,9 +18,9 @@ def __init__(self, args):
         super().__init__(args)
         prompt = hwchase17_react_prompt
         if has_multi_tool_inputs(self.tools_descriptions):
-            raise ValueError("Only supports single input tools when using strategy == react")
+            raise ValueError("Only supports single input tools when using strategy == react_langchain")
         else:
-            agent_chain = create_react_agent(
+            agent_chain = create_react_langchain_agent(
                 self.llm_endpoint, self.tools_descriptions, prompt, tools_renderer=tool_renderer
             )
         self.app = AgentExecutor(
@@ -42,3 +47,51 @@ async def stream_generator(self, query, config):
                 raise ValueError()
             print("---")
         yield "data: [DONE]\n\n"
+
+
+class ReActAgentwithLanggraph(BaseAgent):
+    def __init__(self, args):
+        super().__init__(args)
+
+        if isinstance(self.llm_endpoint, HuggingFaceEndpoint):
+            self.llm = ChatHuggingFace(llm=self.llm_endpoint, model_id=args.model)
+        elif isinstance(self.llm_endpoint, ChatOpenAI):
+            self.llm = self.llm_endpoint
+
+        tools = self.tools_descriptions
+
+        self.app = create_react_agent(self.llm, tools=tools, state_modifier=REACT_SYS_MESSAGE)
+
+    def prepare_initial_state(self, query):
+        return {"messages": [HumanMessage(content=query)]}
+
+    async def stream_generator(self, query, config):
+        initial_state = self.prepare_initial_state(query)
+        try:
+            async for event in self.app.astream(initial_state, config=config):
+                for node_name, node_state in event.items():
+                    yield f"--- CALL {node_name} ---\n"
+                    for k, v in node_state.items():
+                        if v is not None:
+                            yield f"{k}: {v}\n"
+
+                yield f"data: {repr(event)}\n\n"
+            yield "data: [DONE]\n\n"
+        except Exception as e:
+            yield str(e)
+
+    async def non_streaming_run(self, query, config):
+        initial_state = self.prepare_initial_state(query)
+        try:
+            async for s in self.app.astream(initial_state, config=config, stream_mode="values"):
+                message = s["messages"][-1]
+                if isinstance(message, tuple):
+                    print(message)
+                else:
+                    message.pretty_print()
+
+            last_message = s["messages"][-1]
+            print("******Response: ", last_message.content)
+            return last_message.content
+        except Exception as e:
+            return str(e)
diff --git a/comps/agent/langchain/src/strategy/react/prompt.py b/comps/agent/langchain/src/strategy/react/prompt.py
index bfec54fe3..5404aa73a 100644
--- a/comps/agent/langchain/src/strategy/react/prompt.py
+++ b/comps/agent/langchain/src/strategy/react/prompt.py
@@ -6,3 +6,13 @@
 hwchase17_react_prompt = PromptTemplate.from_template(
     "Answer the following questions as best you can. You have access to the following tools:\n\n{tools}\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: {input}\nThought:{agent_scratchpad}"
 )
+
+
+REACT_SYS_MESSAGE = """\
+Decompose the user request into a series of simple tasks when necessary and solve the problem step by step.
+When you cannot get the answer at first, do not give up. Reflect on the info you have from the tools and try to solve the problem in a different way.
+Please follow these guidelines when formulating your answer:
+1. If the question contains a false premise or assumption, answer “invalid question”.
+2. If you are uncertain or do not know the answer, respond with “I don’t know”.
+3. Give concise, factual and relevant answers.
+"""
diff --git a/comps/agent/langchain/src/utils.py b/comps/agent/langchain/src/utils.py
index d84b7e225..4f5fb450c 100644
--- a/comps/agent/langchain/src/utils.py
+++ b/comps/agent/langchain/src/utils.py
@@ -68,7 +68,7 @@ def setup_openai_client(args):
     from langchain_openai import ChatOpenAI
 
     params = {
-        "temperature": 0.5,
+        "temperature": args.temperature,
         "max_tokens": args.max_new_tokens,
         "streaming": args.streaming,
     }
diff --git a/tests/test_agent_langchain.sh b/tests/test_agent_langchain.sh
index 0c4db337e..8efaa971d 100644
--- a/tests/test_agent_langchain.sh
+++ b/tests/test_agent_langchain.sh
@@ -7,6 +7,11 @@
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
+tgi_port=8080
+tgi_volume=$WORKPATH/data
+
+export model=mistralai/Mistral-7B-Instruct-v0.3
+export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
 
 function build_docker_images() {
     echo "Building the docker images"
@@ -21,27 +26,20 @@ function build_docker_images() {
     fi
 }
 
-function start_service() {
+function start_tgi_service() {
     # redis endpoint
-    export model=meta-llama/Meta-Llama-3-8B-Instruct
-    export HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN}
     echo "token is ${HF_TOKEN}"
 
     #single card
     echo "start tgi gaudi service"
-    docker run -d --runtime=habana --name "test-comps-tgi-gaudi-service" -p 8080:80 -v ./data:/data -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:latest --model-id $model --max-input-tokens 4096 --max-total-tokens 8092
+    docker run -d --runtime=habana --name "test-comps-tgi-gaudi-service" -p $tgi_port:80 -v $tgi_volume:/data -e HF_TOKEN=$HF_TOKEN -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:latest --model-id $model --max-input-tokens 4096 --max-total-tokens 8092
     sleep 5s
     docker logs test-comps-tgi-gaudi-service
 
-    echo "Starting agent microservice"
-    docker run -d --runtime=runc --name="test-comps-langchain-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 5042:9090 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e strategy=react -e llm_endpoint_url=http://${ip_address}:8080 -e llm_engine=tgi -e recursion_limit=5 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:comps
-    sleep 5s
-    docker logs test-comps-langchain-agent-endpoint
-
     echo "Waiting tgi gaudi ready"
     n=0
     until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
-        docker logs test-comps-tgi-gaudi-service > ${WORKPATH}/tests/tgi-gaudi-service.log
+        docker logs test-comps-tgi-gaudi-service
         n=$((n+1))
         if grep -q Connected ${WORKPATH}/tests/tgi-gaudi-service.log; then
             break
@@ -53,6 +51,41 @@ function start_service() {
     echo "Service started successfully"
 }
 
+function start_react_langchain_agent_service() {
+    echo "Starting react_langchain agent microservice"
+    docker run -d --runtime=runc --name="comps-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 9090:9090 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e strategy=react_langchain -e llm_endpoint_url=http://${ip_address}:${tgi_port} -e llm_engine=tgi -e recursion_limit=10 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:comps
+    sleep 5s
+    docker logs comps-agent-endpoint
+    echo "Service started successfully"
+}
+
+
+function start_react_langgraph_agent_service() {
+    echo "Starting react_langgraph agent microservice"
+    docker run -d --runtime=runc --name="comps-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 9090:9090 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e strategy=react_langgraph -e llm_endpoint_url=http://${ip_address}:${tgi_port} -e llm_engine=tgi -e recursion_limit=10 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:comps
+    sleep 5s
+    docker logs comps-agent-endpoint
+    echo "Service started successfully"
+}
+
+function start_react_langgraph_agent_service_openai() {
+    echo "Starting react_langgraph agent microservice"
+    docker run -d --runtime=runc --name="comps-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 9090:9090 --ipc=host -e model=gpt-4o-mini-2024-07-18 -e strategy=react_langgraph -e llm_engine=openai -e OPENAI_API_KEY=${OPENAI_API_KEY} -e recursion_limit=10 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:comps
+    sleep 5s
+    docker logs comps-agent-endpoint
+    echo "Service started successfully"
+}
+
+
+function start_ragagent_agent_service() {
+    echo "Starting rag agent microservice"
+    docker run -d --runtime=runc --name="comps-agent-endpoint" -v $WORKPATH/comps/agent/langchain/tools:/home/user/comps/agent/langchain/tools -p 9090:9090 --ipc=host -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${model} -e strategy=rag_agent -e llm_endpoint_url=http://${ip_address}:${tgi_port} -e llm_engine=tgi -e recursion_limit=10 -e require_human_feedback=false -e tools=/home/user/comps/agent/langchain/tools/custom_tools.yaml opea/comps-agent-langchain:comps
+    sleep 5s
+    docker logs comps-agent-endpoint
+    echo "Service started successfully"
+}
+
+
 function validate() {
     local CONTENT="$1"
     local EXPECTED_RESULT="$2"
@@ -69,38 +102,70 @@ function validate() {
 
 function validate_microservice() {
     echo "Testing agent service"
-    local CONTENT=$(curl http://${ip_address}:5042/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
+    local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
      "query": "What is Intel OPEA project?"
-    }' | tee ${LOG_PATH}/test-agent-langchain.log)
+    }')
     local EXIT_CODE=$(validate "$CONTENT" "OPEA" "test-agent-langchain")
     echo "$EXIT_CODE"
     local EXIT_CODE="${EXIT_CODE:0-1}"
     echo "return value is $EXIT_CODE"
     if [ "$EXIT_CODE" == "1" ]; then
-        docker logs test-comps-tgi-gaudi-service &> ${LOG_PATH}/test-comps-tgi-gaudi-service.log
-        docker logs test-comps-langchain-agent-endpoint &> ${LOG_PATH}/test-comps-langchain-agent-endpoint.log
+        echo "==============tgi container log ==================="
+        docker logs test-comps-tgi-gaudi-service
+        echo "==============agent container log ==================="
+        docker logs comps-agent-endpoint
         exit 1
     fi
 }
 
-function stop_docker() {
+function stop_tgi_docker() {
     cid=$(docker ps -aq --filter "name=test-comps-tgi-gaudi-service")
     echo "Stopping the docker containers "${cid}
     if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
-    cid=$(docker ps -aq --filter "name=test-comps-langchain-agent-endpoint")
+    echo "Docker containers stopped successfully"
+}
+
+function stop_agent_docker() {
+    cid=$(docker ps -aq --filter "name=comps-agent-endpoint")
     echo "Stopping the docker containers "${cid}
     if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
     echo "Docker containers stopped successfully"
 }
 
+function stop_docker() {
+    stop_tgi_docker
+    stop_agent_docker
+}
+
 function main() {
 
     stop_docker
 
     build_docker_images
-    start_service
 
+    start_tgi_service
+
+    # test react_langchain
+    start_react_langchain_agent_service
+    echo "=============Testing ReAct Langchain============="
+    validate_microservice
+    stop_agent_docker
+    echo "============================================="
+
+    # # test react_langgraph
+    ## For now need OpenAI llms for react_langgraph
+    # start_react_langgraph_agent_service_openai
+    # echo "===========Testing ReAct Langgraph (OpenAI LLM)============="
+    # validate_microservice
+    # stop_agent_docker
+    # echo "============================================="
+
+
+    # test rag agent
+    start_ragagent_agent_service
+    echo "=============Testing RAG Agent============="
     validate_microservice
+    echo "============================================="
 
     stop_docker
     echo y | docker system prune 2>&1 > /dev/null