Refactor llm predictionguard (opea-project#1143)

* refactor llm predictionguard Signed-off-by: Xinyao Wang <[email protected]> * refine predictionguard ut Signed-off-by: Xinyao Wang <[email protected]> * remove duplicated dockerfile path Signed-off-by: Xinyao Wang <[email protected]> * fix bug Signed-off-by: Xinyao Wang <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix bug Signed-off-by: Xinyao Wang <[email protected]> --------- Signed-off-by: Xinyao Wang <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: ZePan110 <[email protected]>
Spycsh · Jan 15, 2025 · 4c21738 · 4c21738
1 parent 89dd628
commit 4c21738
Show file tree

Hide file tree

Showing 12 changed files with 119 additions and 164 deletions.
diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml
@@ -46,7 +46,3 @@ services:
     build:
       dockerfile: comps/llms/utils/lm-eval/Dockerfile
     image: ${REGISTRY:-opea}/llm-eval:${TAG:-latest}
-  llm-textgen-predictionguard:
-    build:
-      dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
-    image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest}
diff --git a/...text-generation/predictionguard/README.md → ...text-generation/README_predictionguard.md b/...text-generation/predictionguard/README.md → ...text-generation/README_predictionguard.md
@@ -4,13 +4,6 @@
 
 ## Get Started
 
-### Build Docker Image
-
-```bash
-cd ../../..
-docker build -t opea/llm-textgen-predictionguard:latest -f comps/llms/text-generation/predictionguard/Dockerfile .
-```
-
 ### Run the Predictionguard Microservice
 
 ```bash
@@ -28,7 +21,7 @@ curl -X POST http://localhost:9000/v1/chat/completions \
     -H "Content-Type: application/json" \
     -d '{
         "model": "Hermes-2-Pro-Llama-3-8B",
-        "query": "Tell me a joke.",
+        "messages": "Tell me a joke.",
         "max_tokens": 100,
         "temperature": 0.7,
         "top_p": 0.9,
@@ -44,7 +37,7 @@ curl -N -X POST http://localhost:9000/v1/chat/completions \
     -H "Content-Type: application/json" \
     -d '{
         "model": "Hermes-2-Pro-Llama-3-8B",
-        "query": "Tell me a joke.",
+        "messages": "Tell me a joke.",
         "max_tokens": 100,
         "temperature": 0.7,
         "top_p": 0.9,

diff --git a/comps/llms/src/text-generation/integrations/predictionguard.py b/comps/llms/src/text-generation/integrations/predictionguard.py
@@ -0,0 +1,101 @@
+# Copyright (C) 2024 Prediction Guard, Inc.
+# SPDX-License-Identified: Apache-2.0
+
+import os
+import time
+
+from fastapi import HTTPException
+from fastapi.responses import StreamingResponse
+from predictionguard import PredictionGuard
+
+from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType
+from comps.cores.proto.api_protocol import ChatCompletionRequest
+
+logger = CustomLogger("opea_textgen_predictionguard")
+logflag = os.getenv("LOGFLAG", False)
+
+
+@OpeaComponentRegistry.register("OPEATextGen_Predictionguard")
+class OPEATextGen_Predictionguard(OpeaComponent):
+    """A specialized OPEA TextGen component derived from OpeaComponent for interacting with Predictionguard services.
+
+    Attributes:
+        client (Predictionguard): An instance of the Predictionguard client for text generation.
+    """
+
+    def __init__(self, name: str, description: str, config: dict = None):
+        super().__init__(name, ServiceType.LLM.name.lower(), description, config)
+        self.client = PredictionGuard()
+        health_status = self.check_health()
+        if not health_status:
+            logger.error("OPEATextGen_Predictionguard health check failed.")
+        else:
+            logger.info("OPEATextGen_Predictionguard health check success.")
+
+    def check_health(self) -> bool:
+        """Checks the health of the Predictionguard LLM service.
+
+        Returns:
+            bool: True if the service is reachable and healthy, False otherwise.
+        """
+
+        try:
+            response = self.client.models.list()
+            return response is not None
+        except Exception as e:
+            logger.error(e)
+            logger.error("Health check failed")
+            return False
+
+    async def invoke(self, input: ChatCompletionRequest):
+        """Invokes the Predictionguard LLM service to generate output for the provided input.
+
+        Args:
+            input (ChatCompletionRequest): The input text(s).
+        """
+        if isinstance(input.messages, str):
+            messages = [
+                {
+                    "role": "system",
+                    "content": "You are a helpful assistant. Your goal is to provide accurate, detailed, and safe responses to the user's queries.",
+                },
+                {"role": "user", "content": input.messages},
+            ]
+        else:
+            messages = input.messages
+
+        if input.stream:
+
+            async def stream_generator():
+                chat_response = ""
+                for res in self.client.chat.completions.create(
+                    model=input.model,
+                    messages=messages,
+                    max_tokens=input.max_tokens,
+                    temperature=input.temperature,
+                    top_p=input.top_p,
+                    top_k=input.top_k,
+                    stream=True,
+                ):
+                    if "choices" in res["data"] and "delta" in res["data"]["choices"][0]:
+                        delta_content = res["data"]["choices"][0]["delta"]["content"]
+                        chat_response += delta_content
+                        yield f"data: {delta_content}\n\n"
+                    else:
+                        yield "data: [DONE]\n\n"
+
+            return StreamingResponse(stream_generator(), media_type="text/event-stream")
+        else:
+            try:
+                response = self.client.chat.completions.create(
+                    model=input.model,
+                    messages=messages,
+                    max_tokens=input.max_tokens,
+                    temperature=input.temperature,
+                    top_p=input.top_p,
+                    top_k=input.top_k,
+                )
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=str(e))
+
+            return response
diff --git a/comps/llms/src/text-generation/opea_llm_microservice.py b/comps/llms/src/text-generation/opea_llm_microservice.py
@@ -6,6 +6,7 @@
 from typing import Union
 
 from integrations.opea import OPEALLM
+from integrations.predictionguard import OPEATextGen_Predictionguard
 
 from comps import (
     CustomLogger,
@@ -25,6 +26,8 @@
 
 
 llm_component_name = os.getenv("LLM_COMPONENT_NAME", "OPEA_LLM")
+if logflag:
+    logger.info(f"Get llm_component_name {llm_component_name}")
 # Initialize OpeaComponentLoader
 loader = OpeaComponentLoader(llm_component_name, description=f"OPEA LLM Component: {llm_component_name}")
 

diff --git a/comps/llms/src/text-generation/requirements.txt b/comps/llms/src/text-generation/requirements.txt
@@ -8,6 +8,8 @@ openai==1.57.4
 opentelemetry-api
 opentelemetry-exporter-otlp
 opentelemetry-sdk
+Pillow
+predictionguard
 prometheus-fastapi-instrumentator
 shortuuid
 transformers

diff --git a/comps/llms/text-generation/predictionguard/Dockerfile b/comps/llms/text-generation/predictionguard/Dockerfile
diff --git a/comps/llms/text-generation/predictionguard/__init__.py b/comps/llms/text-generation/predictionguard/__init__.py
diff --git a/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml b/comps/llms/text-generation/predictionguard/docker_compose_llm.yaml
diff --git a/comps/llms/text-generation/predictionguard/entrypoint.sh b/comps/llms/text-generation/predictionguard/entrypoint.sh
diff --git a/comps/llms/text-generation/predictionguard/llm_predictionguard.py b/comps/llms/text-generation/predictionguard/llm_predictionguard.py
diff --git a/comps/llms/text-generation/predictionguard/requirements.txt b/comps/llms/text-generation/predictionguard/requirements.txt
diff --git a/tests/llms/test_llms_text-generation_predictionguard.sh b/tests/llms/test_llms_text-generation_predictionguard.sh
@@ -13,43 +13,45 @@ fi
 function build_docker_images() {
     cd $WORKPATH
     echo $(pwd)
-    docker build --no-cache -t opea/llm-pg:comps -f comps/llms/text-generation/predictionguard/Dockerfile .
+    docker build --no-cache -t opea/llm-textgen:comps -f comps/llms/src/text-generation/Dockerfile .
     if [ $? -ne 0 ]; then
-        echo "opea/llm-pg built failed"
+        echo "opea/llm-textgen built failed"
         exit 1
     else
-        echo "opea/llm-pg built successfully"
+        echo "opea/llm-textgen built successfully"
     fi
 }
 
 function start_service() {
     llm_service_port=9000
     unset http_proxy
-    docker run -d --name=test-comps-llm-pg-server \
+    docker run -d --name=test-comps-llm-textgen-pg-server \
         -e http_proxy= -e https_proxy= \
         -e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \
-        -p 9000:9000 --ipc=host opea/llm-pg:comps
+        -e LLM_COMPONENT_NAME="OPEATextGen_Predictionguard" \
+        -e LOGFLAG=True \
+        -p 9000:9000 --ipc=host opea/llm-textgen:comps
     sleep 60  # Sleep for 1 minute to allow the service to start
 }
 
 function validate_microservice() {
     llm_service_port=9000
     result=$(http_proxy="" curl http://${ip_address}:${llm_service_port}/v1/chat/completions \
         -X POST \
-        -d '{"model": "Hermes-2-Pro-Llama-3-8B", "query": "What is AI?", "stream": false, "max_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \
+        -d '{"model": "Hermes-2-Pro-Llama-3-8B", "messages": "What is AI?", "stream": false, "max_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \
         -H 'Content-Type: application/json')
 
-    if [[ $result == *"text"* ]]; then
+    if [[ $result == *"content"* ]]; then
         echo "Service response is correct."
     else
         echo "Result wrong. Received was $result"
-        docker logs test-comps-llm-pg-server
+        docker logs test-comps-llm-textgen-pg-server
         exit 1
     fi
 }
 
 function stop_docker() {
-    cid=$(docker ps -aq --filter "name=test-comps-llm-pg-*")
+    cid=$(docker ps -aq --filter "name=test-comps-llm-textgen-pg-*")
     if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
 }