opea bedrock integration

Signed-off-by: vihanth sura <[email protected]>
opea-project · Dec 13, 2024 · 929e6a9 · 929e6a9
1 parent c36c503
commit 929e6a9
Show file tree

Hide file tree

Showing 5 changed files with 219 additions and 0 deletions.
diff --git a/comps/llms/text-generation/bedrock/Dockerfile b/comps/llms/text-generation/bedrock/Dockerfile
@@ -0,0 +1,26 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    curl \
+    libgl1-mesa-glx \
+    libjemalloc-dev
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/llms/text-generation/bedrock/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/llms/text-generation/bedrock
+
+ENTRYPOINT ["python", "llm.py"]
diff --git a/comps/llms/text-generation/bedrock/README.md b/comps/llms/text-generation/bedrock/README.md
@@ -0,0 +1,41 @@
+# Introduction
+
+[Bedrock](https://aws.amazon.com/bedrock) Amazon Bedrock is a fully managed service that offers a choice of high-performing foundation models (FMs) from leading AI companies like AI21 Labs, Anthropic, Cohere, Meta, Mistral AI, Stability AI, and Amazon through a single API, along with a broad set of capabilities you need to build generative AI applications with security, privacy, and responsible AI.
+
+## Get Started
+
+## Setup Environment Variables
+
+In order to start Bedrock service, you need to setup the following environment variables first.
+
+```bash
+export AWS_ACCESS_KEY_ID=${aws_access_key_id}
+export AWS_SECRET_ACCESS_KEY=${aws_secret_access_key}
+```
+
+## Build Docker Image
+
+```bash
+cd GenAIComps/
+docker build --no-cache -t opea/bedrock:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/bedrock/Dockerfile .
+```
+
+## Run the Bedrock Microservice
+
+```bash
+docker run -d --name bedrock -p  9009:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy  -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY  opea/bedrock:latest
+```
+
+## Consume the Bedrock Microservice
+
+```bash
+curl http://${host_ip}:9009/v1/chat/completions \
+  -X POST \
+  -d '{"model": "us.anthropic.claude-3-5-haiku-20241022-v1:0", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
+  -H 'Content-Type: application/json'
+
+curl http://${host_ip}:9009/v1/chat/completions \
+ -X POST \
+ -d '{"model": "us.anthropic.claude-3-5-haiku-20241022-v1:0", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17, "stream": "true"}' \
+ -H 'Content-Type: application/json'
+```
diff --git a/comps/llms/text-generation/bedrock/__init__.py b/comps/llms/text-generation/bedrock/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/llms/text-generation/bedrock/llm.py b/comps/llms/text-generation/bedrock/llm.py
@@ -0,0 +1,133 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import os
+import time
+from typing import Union
+
+import boto3
+from fastapi.responses import StreamingResponse
+
+from comps import (
+    CustomLogger,
+    GeneratedDoc,
+    LLMParamsDoc,
+    SearchedDoc,
+    ServiceType,
+    opea_microservices,
+    register_microservice,
+    register_statistics,
+    statistics_dict,
+)
+from comps.cores.proto.api_protocol import ChatCompletionRequest
+
+logger = CustomLogger("llm_bedrock")
+logflag = os.getenv("LOGFLAG", True)
+
+region = os.getenv("BEDROCK_REGION", "us-west-2")
+bedrock_runtime = boto3.client(service_name="bedrock-runtime", region_name=region)
+
+model_kwargs = {
+    "anthropic_version": "bedrock-2023-05-31",
+    "max_tokens": 1000,
+}
+
+sse_headers = {"x-accel-buffering": "no", "cache-control": "no-cache", "content-type": "text/event-stream"}
+
+
+@register_microservice(
+    name="opea_service@llm_bedrock",
+    service_type=ServiceType.LLM,
+    endpoint="/v1/chat/completions",
+    host="0.0.0.0",
+    port=9000,
+)
+def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]):
+    if logflag:
+        logger.info(input)
+
+    # Parse out arguments for Bedrock converse API
+    model_id = input.model if input.model else model
+    if logflag:
+        logger.info(f"[llm - chat] Using model {model_id}")
+
+    bedrock_args = {"modelId": model_id}
+
+    inference_config = {}
+    if input.max_tokens:
+        inference_config["maxTokens"] = input.max_tokens
+
+    if input.stop:
+        inference_config["stopSequences"] = input.stop
+
+    if input.temperature:
+        inference_config["temperature"] = input.temperature
+
+    if input.top_p:
+        inference_config["topP"] = input.top_p
+
+    if len(inference_config) > 0:
+        bedrock_args["inferenceConfig"] = inference_config
+
+    if logflag and len(inference_config) > 0:
+        logger.info(f"[llm - chat] inference_config: {inference_config}")
+
+    # Parse messages from HuggingFace TGI format to bedrock messages format
+    # tgi: [{role: "system" | "user", content: "text"}]
+    # bedrock: [role: "assistant" | "user", content: {text: "content"}]
+    messages = [
+        {"role": "assistant" if i.get("role") == "system" else "user", "content": [{"text": i.get("content", "")}]}
+        for i in input.messages
+    ]
+
+    # Bedrock requires that conversations start with a user prompt
+    # TGI allows the first message to be an assistant prompt, defining assistant behavior
+    # If the message list starts with an assistant prompt, move that message to the bedrock system prompt
+    if len(messages) > 0 and messages[0]["role"] == "assistant":
+        system_prompt = messages[0]["content"][0]["text"]
+        bedrock_args["system"] = [{"text": system_prompt}]
+        messages.pop(0)
+
+    bedrock_args["messages"] = messages
+
+    if logflag:
+        logger.info(f"[llm - chat] Bedrock args: {bedrock_args}")
+
+    if input.stream:
+        response = bedrock_runtime.converse_stream(**bedrock_args)
+
+        def stream_generator():
+            chat_response = ""
+            for chunk in response["stream"]:
+                if "contentBlockDelta" in chunk:
+                    text = chunk.get("contentBlockDelta", {}).get("delta", {}).get("text", "")
+                    if logflag:
+                        logger.info(f"[llm - chat_stream] chunk:{text}")
+
+                    tgi_format_out = {
+                        "object": "chat.completion.chunk",
+                        "model": model_id,
+                        "created": int(time.time()),
+                        "choices": [
+                            {"index": 0, "delta": {"role": "assistant", "content": text}, "finish_reason": None}
+                        ],
+                    }
+                    yield f"data: {json.dumps(tgi_format_out)}\n\n"
+            if logflag:
+                logger.info(f"[llm - chat_stream] stream response: {chat_response}")
+            yield "data: [DONE]\n\n"
+
+        return StreamingResponse(stream_generator(), headers=sse_headers)
+
+    response = bedrock_runtime.converse(**bedrock_args)
+    output_content = response.get("output", {}).get("message", {}).get("content", [])
+    output_text = output_content[0].get("text", "") if len(output_content) > 0 else ""
+    prompt = messages[-1].get("content", [{"text": ""}])[0].get("text", "")
+
+    return GeneratedDoc(text=output_text, prompt=prompt)
+
+
+if __name__ == "__main__":
+    model = os.getenv("MODEL_ID", "us.anthropic.claude-3-haiku-20240307-v1:0")
+    opea_microservices["opea_service@llm_bedrock"].start()
diff --git a/comps/llms/text-generation/bedrock/requirements.txt b/comps/llms/text-generation/bedrock/requirements.txt
@@ -0,0 +1,17 @@
+aiohttp
+boto3
+docarray[full]
+fastapi
+httpx
+huggingface_hub
+langchain
+langchain_aws
+numpy
+openai==1.35.13
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+prometheus-fastapi-instrumentator
+shortuuid
+transformers
+uvicorn
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Copyright (C) 2024 Intel Corporation
		# SPDX-License-Identifier: Apache-2.0