Skip to content

Commit

Permalink
Refactor llm predictionguard (opea-project#1143)
Browse files Browse the repository at this point in the history
* refactor llm predictionguard

Signed-off-by: Xinyao Wang <[email protected]>

* refine predictionguard ut

Signed-off-by: Xinyao Wang <[email protected]>

* remove duplicated dockerfile path

Signed-off-by: Xinyao Wang <[email protected]>

* fix bug

Signed-off-by: Xinyao Wang <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix bug

Signed-off-by: Xinyao Wang <[email protected]>

---------

Signed-off-by: Xinyao Wang <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: ZePan110 <[email protected]>
  • Loading branch information
3 people authored Jan 15, 2025
1 parent 89dd628 commit 4c21738
Show file tree
Hide file tree
Showing 12 changed files with 119 additions and 164 deletions.
4 changes: 0 additions & 4 deletions .github/workflows/docker/compose/llms-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,3 @@ services:
build:
dockerfile: comps/llms/utils/lm-eval/Dockerfile
image: ${REGISTRY:-opea}/llm-eval:${TAG:-latest}
llm-textgen-predictionguard:
build:
dockerfile: comps/llms/text-generation/predictionguard/Dockerfile
image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest}
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@

## Get Started

### Build Docker Image

```bash
cd ../../..
docker build -t opea/llm-textgen-predictionguard:latest -f comps/llms/text-generation/predictionguard/Dockerfile .
```

### Run the Predictionguard Microservice

```bash
Expand All @@ -28,7 +21,7 @@ curl -X POST http://localhost:9000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "Hermes-2-Pro-Llama-3-8B",
"query": "Tell me a joke.",
"messages": "Tell me a joke.",
"max_tokens": 100,
"temperature": 0.7,
"top_p": 0.9,
Expand All @@ -44,7 +37,7 @@ curl -N -X POST http://localhost:9000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "Hermes-2-Pro-Llama-3-8B",
"query": "Tell me a joke.",
"messages": "Tell me a joke.",
"max_tokens": 100,
"temperature": 0.7,
"top_p": 0.9,
Expand Down
101 changes: 101 additions & 0 deletions comps/llms/src/text-generation/integrations/predictionguard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Copyright (C) 2024 Prediction Guard, Inc.
# SPDX-License-Identified: Apache-2.0

import os
import time

from fastapi import HTTPException
from fastapi.responses import StreamingResponse
from predictionguard import PredictionGuard

from comps import CustomLogger, OpeaComponent, OpeaComponentRegistry, ServiceType
from comps.cores.proto.api_protocol import ChatCompletionRequest

logger = CustomLogger("opea_textgen_predictionguard")
logflag = os.getenv("LOGFLAG", False)


@OpeaComponentRegistry.register("OPEATextGen_Predictionguard")
class OPEATextGen_Predictionguard(OpeaComponent):
"""A specialized OPEA TextGen component derived from OpeaComponent for interacting with Predictionguard services.
Attributes:
client (Predictionguard): An instance of the Predictionguard client for text generation.
"""

def __init__(self, name: str, description: str, config: dict = None):
super().__init__(name, ServiceType.LLM.name.lower(), description, config)
self.client = PredictionGuard()
health_status = self.check_health()
if not health_status:
logger.error("OPEATextGen_Predictionguard health check failed.")
else:
logger.info("OPEATextGen_Predictionguard health check success.")

def check_health(self) -> bool:
"""Checks the health of the Predictionguard LLM service.
Returns:
bool: True if the service is reachable and healthy, False otherwise.
"""

try:
response = self.client.models.list()
return response is not None
except Exception as e:
logger.error(e)
logger.error("Health check failed")
return False

async def invoke(self, input: ChatCompletionRequest):
"""Invokes the Predictionguard LLM service to generate output for the provided input.
Args:
input (ChatCompletionRequest): The input text(s).
"""
if isinstance(input.messages, str):
messages = [
{
"role": "system",
"content": "You are a helpful assistant. Your goal is to provide accurate, detailed, and safe responses to the user's queries.",
},
{"role": "user", "content": input.messages},
]
else:
messages = input.messages

if input.stream:

async def stream_generator():
chat_response = ""
for res in self.client.chat.completions.create(
model=input.model,
messages=messages,
max_tokens=input.max_tokens,
temperature=input.temperature,
top_p=input.top_p,
top_k=input.top_k,
stream=True,
):
if "choices" in res["data"] and "delta" in res["data"]["choices"][0]:
delta_content = res["data"]["choices"][0]["delta"]["content"]
chat_response += delta_content
yield f"data: {delta_content}\n\n"
else:
yield "data: [DONE]\n\n"

return StreamingResponse(stream_generator(), media_type="text/event-stream")
else:
try:
response = self.client.chat.completions.create(
model=input.model,
messages=messages,
max_tokens=input.max_tokens,
temperature=input.temperature,
top_p=input.top_p,
top_k=input.top_k,
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

return response
3 changes: 3 additions & 0 deletions comps/llms/src/text-generation/opea_llm_microservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Union

from integrations.opea import OPEALLM
from integrations.predictionguard import OPEATextGen_Predictionguard

from comps import (
CustomLogger,
Expand All @@ -25,6 +26,8 @@


llm_component_name = os.getenv("LLM_COMPONENT_NAME", "OPEA_LLM")
if logflag:
logger.info(f"Get llm_component_name {llm_component_name}")
# Initialize OpeaComponentLoader
loader = OpeaComponentLoader(llm_component_name, description=f"OPEA LLM Component: {llm_component_name}")

Expand Down
2 changes: 2 additions & 0 deletions comps/llms/src/text-generation/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ openai==1.57.4
opentelemetry-api
opentelemetry-exporter-otlp
opentelemetry-sdk
Pillow
predictionguard
prometheus-fastapi-instrumentator
shortuuid
transformers
Expand Down
15 changes: 0 additions & 15 deletions comps/llms/text-generation/predictionguard/Dockerfile

This file was deleted.

2 changes: 0 additions & 2 deletions comps/llms/text-generation/predictionguard/__init__.py

This file was deleted.

20 changes: 0 additions & 20 deletions comps/llms/text-generation/predictionguard/docker_compose_llm.yaml

This file was deleted.

8 changes: 0 additions & 8 deletions comps/llms/text-generation/predictionguard/entrypoint.sh

This file was deleted.

85 changes: 0 additions & 85 deletions comps/llms/text-generation/predictionguard/llm_predictionguard.py

This file was deleted.

12 changes: 0 additions & 12 deletions comps/llms/text-generation/predictionguard/requirements.txt

This file was deleted.

20 changes: 11 additions & 9 deletions tests/llms/test_llms_text-generation_predictionguard.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,43 +13,45 @@ fi
function build_docker_images() {
cd $WORKPATH
echo $(pwd)
docker build --no-cache -t opea/llm-pg:comps -f comps/llms/text-generation/predictionguard/Dockerfile .
docker build --no-cache -t opea/llm-textgen:comps -f comps/llms/src/text-generation/Dockerfile .
if [ $? -ne 0 ]; then
echo "opea/llm-pg built failed"
echo "opea/llm-textgen built failed"
exit 1
else
echo "opea/llm-pg built successfully"
echo "opea/llm-textgen built successfully"
fi
}

function start_service() {
llm_service_port=9000
unset http_proxy
docker run -d --name=test-comps-llm-pg-server \
docker run -d --name=test-comps-llm-textgen-pg-server \
-e http_proxy= -e https_proxy= \
-e PREDICTIONGUARD_API_KEY=${PREDICTIONGUARD_API_KEY} \
-p 9000:9000 --ipc=host opea/llm-pg:comps
-e LLM_COMPONENT_NAME="OPEATextGen_Predictionguard" \
-e LOGFLAG=True \
-p 9000:9000 --ipc=host opea/llm-textgen:comps
sleep 60 # Sleep for 1 minute to allow the service to start
}

function validate_microservice() {
llm_service_port=9000
result=$(http_proxy="" curl http://${ip_address}:${llm_service_port}/v1/chat/completions \
-X POST \
-d '{"model": "Hermes-2-Pro-Llama-3-8B", "query": "What is AI?", "stream": false, "max_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \
-d '{"model": "Hermes-2-Pro-Llama-3-8B", "messages": "What is AI?", "stream": false, "max_tokens": 100, "temperature": 0.7, "top_p": 1.0, "top_k": 50}' \
-H 'Content-Type: application/json')

if [[ $result == *"text"* ]]; then
if [[ $result == *"content"* ]]; then
echo "Service response is correct."
else
echo "Result wrong. Received was $result"
docker logs test-comps-llm-pg-server
docker logs test-comps-llm-textgen-pg-server
exit 1
fi
}

function stop_docker() {
cid=$(docker ps -aq --filter "name=test-comps-llm-pg-*")
cid=$(docker ps -aq --filter "name=test-comps-llm-textgen-pg-*")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

Expand Down

0 comments on commit 4c21738

Please sign in to comment.