opea-project · ashahba · Oct 30, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
@@ -27,3 +27,7 @@ services:
     build:
       dockerfile: comps/dataprep/neo4j/langchain/Dockerfile
     image: ${REGISTRY:-opea}/dataprep-neo4j:${TAG:-latest}
+  dataprep-neo4j-llamaindex:
+    build:
+      dockerfile: comps/dataprep/neo4j/llama_index/Dockerfile
+    image: ${REGISTRY:-opea}/dataprep-neo4j-llamaindex:${TAG:-latest}
@@ -27,3 +27,7 @@ services:
     build:
       dockerfile: comps/retrievers/neo4j/langchain/Dockerfile
     image: ${REGISTRY:-opea}/retriever-neo4j:${TAG:-latest}
+  retriever-neo4j-llamaindex:
+    build:
+      dockerfile: comps/retrievers/neo4j/llama_index/Dockerfile
+    image: ${REGISTRY:-opea}/retriever-neo4j-llamaindex:${TAG:-latest}
@@ -59,6 +59,7 @@
     VideoQnAGateway,
     VisualQnAGateway,
     MultimodalQnAGateway,
+    GraphragGateway,
     AvatarChatbotGateway,
 )
 

@@ -51,6 +51,7 @@ class MegaServiceEndpoint(Enum):
     TRANSLATION = "/v1/translation"
     RETRIEVALTOOL = "/v1/retrievaltool"
     FAQ_GEN = "/v1/faqgen"
+    GRAPH_RAG = "/v1/graphrag"
     # Follow OPENAI
     EMBEDDINGS = "/v1/embeddings"
     TTS = "/v1/audio/speech"

@@ -156,9 +156,12 @@
 
     async def handle_request(self, request: Request):
         data = await request.json()
+        print("data in handle request", data)
         stream_opt = data.get("stream", True)
         chat_request = ChatCompletionRequest.parse_obj(data)
+        print("chat request in handle request", chat_request)
         prompt = self._handle_message(chat_request.messages)
+        print("prompt in gateway", prompt)
         parameters = LLMParams(
             max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
             top_k=chat_request.top_k if chat_request.top_k else 10,
@@ -959,3 +962,75 @@
         last_node = runtime_graph.all_leaves()[-1]
         response = result_dict[last_node]["video_path"]
         return response
+
+
+class GraphragGateway(Gateway):
+    def __init__(self, megaservice, host="0.0.0.0", port=8888):
+        super().__init__(
+            megaservice, host, port, str(MegaServiceEndpoint.GRAPH_RAG), ChatCompletionRequest, ChatCompletionResponse
+        )
+
+    async def handle_request(self, request: Request):
+        data = await request.json()
+        stream_opt = data.get("stream", True)
+        chat_request = ChatCompletionRequest.parse_obj(data)
+
+        def parser_input(data, TypeClass, key):
+            chat_request = None
+            try:
+                chat_request = TypeClass.parse_obj(data)
+                query = getattr(chat_request, key)
+            except:
+                query = None
+            return query, chat_request
+
+        query = None
+        for key, TypeClass in zip(["text", "input", "messages"], [TextDoc, EmbeddingRequest, ChatCompletionRequest]):
+            query, chat_request = parser_input(data, TypeClass, key)
+            if query is not None:
+                break
+        if query is None:
+            raise ValueError(f"Unknown request type: {data}")
+        if chat_request is None:
+            raise ValueError(f"Unknown request type: {data}")
+        prompt = self._handle_message(chat_request.messages)
+        parameters = LLMParams(
+            max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
+            top_k=chat_request.top_k if chat_request.top_k else 10,
+            top_p=chat_request.top_p if chat_request.top_p else 0.95,
+            temperature=chat_request.temperature if chat_request.temperature else 0.01,
+            frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
+            presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
+            repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
+            streaming=stream_opt,
+            chat_template=chat_request.chat_template if chat_request.chat_template else None,
+        )
+        retriever_parameters = RetrieverParms(
+            search_type=chat_request.search_type if chat_request.search_type else "similarity",
+            k=chat_request.k if chat_request.k else 4,
+            distance_threshold=chat_request.distance_threshold if chat_request.distance_threshold else None,
+            fetch_k=chat_request.fetch_k if chat_request.fetch_k else 20,
+            lambda_mult=chat_request.lambda_mult if chat_request.lambda_mult else 0.5,
+            score_threshold=chat_request.score_threshold if chat_request.score_threshold else 0.2,
+        )
+        initial_inputs = chat_request
+        result_dict, runtime_graph = await self.megaservice.schedule(
+            initial_inputs=initial_inputs,
+            llm_parameters=parameters,
+            retriever_parameters=retriever_parameters,
+        )
+        for node, response in result_dict.items():
+            if isinstance(response, StreamingResponse):
+                return response
+        last_node = runtime_graph.all_leaves()[-1]
+        response = result_dict[last_node]["text"]
+        choices = []
+        usage = UsageInfo()
+        choices.append(
+            ChatCompletionResponseChoice(
+                index=0,
+                message=ChatMessage(role="assistant", content=response),
+                finish_reason="stop",
+            )
+        )
+        return ChatCompletionResponse(model="chatqna", choices=choices, usage=usage)
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,39 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+ENV LANG=C.UTF-8
+
+ARG ARCH="cpu"
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    build-essential \
+    default-jre \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+    if [ ${ARCH} = "cpu" ]; then pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu; fi && \
+    pip install --no-cache-dir -r /home/user/comps/dataprep/neo4j/llama_index/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+USER root
+
+RUN mkdir -p /home/user/comps/dataprep/neo4j/llama_index/uploaded_files && chown -R user /home/user/comps/dataprep/neo4j/llama_index/uploaded_files
+
+USER user
+
+WORKDIR /home/user/comps/dataprep/neo4j/llama_index
+
+ENTRYPOINT ["python", "extract_graph_neo4j.py"]
@@ -0,0 +1,94 @@
+# Dataprep Microservice with Neo4J
+
+This dataprep microservice ingests the input files and uses LLM (TGI or OpenAI model when OPENAI_API_KEY is set) to extract entities, relationships and descriptions of those to build a graph-based text index.
+
+### Setup Environment Variables
+
+```bash
+# Manually set private environment settings
+export host_ip=${your_hostname IP}  # local IP
+export no_proxy=$no_proxy,${host_ip}  # important to add {host_ip} for containers communication
+export http_proxy=${your_http_proxy}
+export https_proxy=${your_http_proxy}
+export NEO4J_URI=${your_neo4j_url}
+export NEO4J_USERNAME=${your_neo4j_username}
+export NEO4J_PASSWORD=${your_neo4j_password}  # should match what was used in NEO4J_AUTH when running the neo4j-apoc
+export PYTHONPATH=${path_to_comps}
+export OPENAI_KEY=${your_openai_api_key}  # optional, when not provided will use smaller models TGI/TEI
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_token}
+# set additional environment settings
+source ./set_env.sh
+```
+
+## 🚀Start Microservice with Docker
+
+### 1. Build Docker Image
+
+```bash
+cd ../../../../
+docker build -t opea/dataprep-neo4j-llamaindex:latest --build-arg no_proxy=$no_proxy --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/neo4j/llama_index/Dockerfile .
+```
+
+### 2. Setup Environment Variables
+
+```bash
+# Set private environment settings
+export host_ip=${your_hostname IP}  # local IP
+export no_proxy=$no_proxy,${host_ip}  # important to add {host_ip} for containers communication
+export http_proxy=${your_http_proxy}
+export https_proxy=${your_http_proxy}
+export NEO4J_URI=${your_neo4j_url}
+export NEO4J_USERNAME=${your_neo4j_username}
+export NEO4J_PASSWORD=${your_neo4j_password}
+export PYTHONPATH=${path_to_comps}
+export OPENAI_KEY=${your_openai_api_key}  # optional, when not provided will use smaller models TGI/TEI
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_token}
+# set additional environment settings
+source ./set_env.sh
+```
+
+### 3. Run Docker with Docker Compose
+
+Docker compose will start 4 microservices: dataprep-neo4j-llamaindex, neo4j-apoc, tgi-gaudi-service and tei-embedding-service. The reason TGI and TEI are needed is because dataprep relies on LLM to extract entities and relationships from text to build the graph and Neo4j Property Graph Index. Neo4j database supports embeddings natively so we do not need a separate vector store. Checkout the blog [Introducing the Property Graph Index: A Powerful New Way to Build Knowledge Graphs with LLMs](https://www.llamaindex.ai/blog/introducing-the-property-graph-index-a-powerful-new-way-to-build-knowledge-graphs-with-llms) for a better understanding of Property Graph Store and Index.
+
+```bash
+cd comps/dataprep/neo4j/llama_index
+docker compose -f compose.yaml up -d
+```
+
+## Invoke Microservice
+
+Once document preparation microservice for Neo4J is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database.
+
+```bash
+curl -X POST \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./file1.txt" \
+    http://${host_ip}:6004/v1/dataprep
+```
+
+You can specify chunk_size and chunk_size by the following commands.
+
+```bash
+curl -X POST \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./file1.txt" \
+    -F "chunk_size=1500" \
+    -F "chunk_overlap=100" \
+    http://${host_ip}:6004/v1/dataprep
+```
+
+We support table extraction from pdf documents. You can specify process_table and table_strategy by the following commands. "table_strategy" refers to the strategies to understand tables for table retrieval. As the setting progresses from "fast" to "hq" to "llm," the focus shifts towards deeper table understanding at the expense of processing speed. The default strategy is "fast".
+
+Note: If you specify "table_strategy=llm" TGI service will be used.
+
+For ensure the quality and comprehensiveness of the extracted entities, we recommend to use `gpt-4o` as the default model for parsing the document. To enable the openai service, please `export OPENAI_KEY=xxxx` before using this services.
+
+```bash
+curl -X POST \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./your_file.pdf" \
+    -F "process_table=true" \
+    -F "table_strategy=hq" \
+    http://localhost:6004/v1/dataprep
+```
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,97 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+services:
+  neo4j-apoc:
+    image: neo4j:latest
+    container_name: neo4j-apoc
+    volumes:
+        - /$HOME/neo4j/logs:/logs
+        - /$HOME/neo4j/config:/config
+        - /$HOME/neo4j/data:/data
+        - /$HOME/neo4j/plugins:/plugins
+    ipc: host
+    environment:
+      - NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD}
+      - NEO4J_PLUGINS=["apoc"]
+      - NEO4J_apoc_export_file_enabled=true
+      - NEO4J_apoc_import_file_enabled=true
+      - NEO4J_apoc_import_file_use__neo4j__config=true
+      - NEO4J_dbms_security_procedures_unrestricted=apoc.\*
+    ports:
+      - "7474:7474"
+      - "7687:7687"
+    restart: always
+  tei-embedding-service:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-server
+    ports:
+      - "6006:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      NO_PROXY: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    ipc: host
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+  tgi-gaudi-service:
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    container_name: tgi-gaudi-server
+    ports:
+      - "6005:80"
+    volumes:
+      - "./data:/data"
+    environment:
+      no_proxy: ${no_proxy}
+      NO_PROXY: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
+  dataprep-neo4j-llamaindex:
+    image: opea/dataprep-neo4j-llamaindex:latest
+    container_name: dataprep-neo4j-server
+    depends_on:
+      - neo4j-apoc
+      - tgi-gaudi-service
+      - tei-embedding-service
+    ports:
+      - "6004:6004"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      host_ip: ${host_ip}
+      NEO4J_URL: ${NEO4J_URL}
+      NEO4J_USERNAME: ${NEO4J_USERNAME}
+      NEO4J_PASSWORD: ${NEO4J_PASSWORD}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL}
+      OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
+      EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
+      LLM_MODEL_ID: ${LLM_MODEL_ID}
+      LOGFLAG: ${LOGFLAG}
+    restart: unless-stopped
+networks:
+  default:
+    driver: bridge
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+host_ip = os.getenv("host_ip")
+# Neo4J configuration
+NEO4J_URL = os.getenv("NEO4J_URL", f"bolt://{host_ip}:7687")
+NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j")
+NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "neo4jtest")
+
+# LLM/Embedding endpoints
+TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", f"http://{host_ip}:6005")
+TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT ", f"http://{host_ip}:6006")
+
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+OPENAI_EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small")
+OPENAI_LLM_MODEL = os.getenv("OPENAI_LLM_MODEL", "gpt-4o")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Copyright (C) 2024 Intel Corporation
		# SPDX-License-Identifier: Apache-2.0