Skip to content

Commit

Permalink
opea bedrock integration
Browse files Browse the repository at this point in the history
Signed-off-by: vihanth sura <[email protected]>
  • Loading branch information
Vihanth committed Dec 13, 2024
1 parent c36c503 commit 929e6a9
Show file tree
Hide file tree
Showing 5 changed files with 219 additions and 0 deletions.
26 changes: 26 additions & 0 deletions comps/llms/text-generation/bedrock/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

FROM python:3.11-slim

RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
curl \
libgl1-mesa-glx \
libjemalloc-dev

RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/

USER user

COPY comps /home/user/comps

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/comps/llms/text-generation/bedrock/requirements.txt

ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /home/user/comps/llms/text-generation/bedrock

ENTRYPOINT ["python", "llm.py"]
41 changes: 41 additions & 0 deletions comps/llms/text-generation/bedrock/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Introduction

[Bedrock](https://aws.amazon.com/bedrock) Amazon Bedrock is a fully managed service that offers a choice of high-performing foundation models (FMs) from leading AI companies like AI21 Labs, Anthropic, Cohere, Meta, Mistral AI, Stability AI, and Amazon through a single API, along with a broad set of capabilities you need to build generative AI applications with security, privacy, and responsible AI.

## Get Started

## Setup Environment Variables

In order to start Bedrock service, you need to setup the following environment variables first.

```bash
export AWS_ACCESS_KEY_ID=${aws_access_key_id}
export AWS_SECRET_ACCESS_KEY=${aws_secret_access_key}
```

## Build Docker Image

```bash
cd GenAIComps/
docker build --no-cache -t opea/bedrock:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/bedrock/Dockerfile .
```

## Run the Bedrock Microservice

```bash
docker run -d --name bedrock -p 9009:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY opea/bedrock:latest
```

## Consume the Bedrock Microservice

```bash
curl http://${host_ip}:9009/v1/chat/completions \
-X POST \
-d '{"model": "us.anthropic.claude-3-5-haiku-20241022-v1:0", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
-H 'Content-Type: application/json'

curl http://${host_ip}:9009/v1/chat/completions \
-X POST \
-d '{"model": "us.anthropic.claude-3-5-haiku-20241022-v1:0", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17, "stream": "true"}' \
-H 'Content-Type: application/json'
```
2 changes: 2 additions & 0 deletions comps/llms/text-generation/bedrock/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
133 changes: 133 additions & 0 deletions comps/llms/text-generation/bedrock/llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import json
import os
import time
from typing import Union

import boto3
from fastapi.responses import StreamingResponse

from comps import (
CustomLogger,
GeneratedDoc,
LLMParamsDoc,
SearchedDoc,
ServiceType,
opea_microservices,
register_microservice,
register_statistics,
statistics_dict,
)
from comps.cores.proto.api_protocol import ChatCompletionRequest

logger = CustomLogger("llm_bedrock")
logflag = os.getenv("LOGFLAG", True)

region = os.getenv("BEDROCK_REGION", "us-west-2")
bedrock_runtime = boto3.client(service_name="bedrock-runtime", region_name=region)

model_kwargs = {
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 1000,
}

sse_headers = {"x-accel-buffering": "no", "cache-control": "no-cache", "content-type": "text/event-stream"}


@register_microservice(
name="opea_service@llm_bedrock",
service_type=ServiceType.LLM,
endpoint="/v1/chat/completions",
host="0.0.0.0",
port=9000,
)
def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]):
if logflag:
logger.info(input)

# Parse out arguments for Bedrock converse API
model_id = input.model if input.model else model
if logflag:
logger.info(f"[llm - chat] Using model {model_id}")

bedrock_args = {"modelId": model_id}

inference_config = {}
if input.max_tokens:
inference_config["maxTokens"] = input.max_tokens

if input.stop:
inference_config["stopSequences"] = input.stop

if input.temperature:
inference_config["temperature"] = input.temperature

if input.top_p:
inference_config["topP"] = input.top_p

if len(inference_config) > 0:
bedrock_args["inferenceConfig"] = inference_config

if logflag and len(inference_config) > 0:
logger.info(f"[llm - chat] inference_config: {inference_config}")

# Parse messages from HuggingFace TGI format to bedrock messages format
# tgi: [{role: "system" | "user", content: "text"}]
# bedrock: [role: "assistant" | "user", content: {text: "content"}]
messages = [
{"role": "assistant" if i.get("role") == "system" else "user", "content": [{"text": i.get("content", "")}]}
for i in input.messages
]

# Bedrock requires that conversations start with a user prompt
# TGI allows the first message to be an assistant prompt, defining assistant behavior
# If the message list starts with an assistant prompt, move that message to the bedrock system prompt
if len(messages) > 0 and messages[0]["role"] == "assistant":
system_prompt = messages[0]["content"][0]["text"]
bedrock_args["system"] = [{"text": system_prompt}]
messages.pop(0)

bedrock_args["messages"] = messages

if logflag:
logger.info(f"[llm - chat] Bedrock args: {bedrock_args}")

if input.stream:
response = bedrock_runtime.converse_stream(**bedrock_args)

def stream_generator():
chat_response = ""
for chunk in response["stream"]:
if "contentBlockDelta" in chunk:
text = chunk.get("contentBlockDelta", {}).get("delta", {}).get("text", "")
if logflag:
logger.info(f"[llm - chat_stream] chunk:{text}")

tgi_format_out = {
"object": "chat.completion.chunk",
"model": model_id,
"created": int(time.time()),
"choices": [
{"index": 0, "delta": {"role": "assistant", "content": text}, "finish_reason": None}
],
}
yield f"data: {json.dumps(tgi_format_out)}\n\n"
if logflag:
logger.info(f"[llm - chat_stream] stream response: {chat_response}")
yield "data: [DONE]\n\n"

return StreamingResponse(stream_generator(), headers=sse_headers)

response = bedrock_runtime.converse(**bedrock_args)
output_content = response.get("output", {}).get("message", {}).get("content", [])
output_text = output_content[0].get("text", "") if len(output_content) > 0 else ""
prompt = messages[-1].get("content", [{"text": ""}])[0].get("text", "")

return GeneratedDoc(text=output_text, prompt=prompt)


if __name__ == "__main__":
model = os.getenv("MODEL_ID", "us.anthropic.claude-3-haiku-20240307-v1:0")
opea_microservices["opea_service@llm_bedrock"].start()
17 changes: 17 additions & 0 deletions comps/llms/text-generation/bedrock/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
aiohttp
boto3
docarray[full]
fastapi
httpx
huggingface_hub
langchain
langchain_aws
numpy
openai==1.35.13
opentelemetry-api
opentelemetry-exporter-otlp
opentelemetry-sdk
prometheus-fastapi-instrumentator
shortuuid
transformers
uvicorn

0 comments on commit 929e6a9

Please sign in to comment.