[WIP] Feature/claude 3 (#126)

Closes #125 , Closes #89 --------- Co-authored-by: Yusuke Wada <[email protected]>
aws-samples · Mar 7, 2024 · 69098df · 69098df
1 parent f333cc8
commit 69098df
Show file tree

Hide file tree

Showing 41 changed files with 1,909 additions and 1,035 deletions.
diff --git a/README.md b/README.md
@@ -3,17 +3,22 @@
 ![](https://github.com/aws-samples/bedrock-claude-chat/actions/workflows/test.yml/badge.svg)
 
 > [!Tip]
-> 🔔**RAG (Retrieval Augmented Generation) Feature released**. See [Release](https://github.com/aws-samples/bedrock-claude-chat/releases/tag/v0.4.0) for the detail.
+> 🔔**Multi-modal chat by [Claude v3 (Sonnet)](https://aws.amazon.com/jp/about-aws/whats-new/2024/03/anthropics-claude-3-sonnet-model-amazon-bedrock/) is available for now**. See [Release](https://github.com/aws-samples/bedrock-claude-chat/releases/tag/v0.4.2) for the detail.
 
 > [!Warning]
 > The current version (`v0.4.x`) has no compatibility with ex version (~`v0.3.0`) due to the change of DynamoDB table schema. **Please note that UPDATE (i.e. `cdk deploy`) FROM EX VERSION TO `v0.4.x` WILL DESTROY ALL OF EXISTING CONVERSATIONS.**
 
-This repository is a sample chatbot using the Anthropic company's LLM [Claude 2](https://www.anthropic.com/index/claude-2), one of the foundational models provided by [Amazon Bedrock](https://aws.amazon.com/bedrock/) for generative AI.
+This repository is a sample chatbot using the Anthropic company's LLM [Claude](https://www.anthropic.com/), one of the foundational models provided by [Amazon Bedrock](https://aws.amazon.com/bedrock/) for generative AI.
 
 ### Basic Conversation
 
+Not only text but also images are available with [Anthropic's Claude 3 Sonnet](https://www.anthropic.com/news/claude-3-family).
+
 ![](./docs/imgs/demo.gif)
 
+> [!Note]
+> Currently the image will be compressed into 800px jpeg due to DynamoDB [item size limitation](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/ServiceQuotas.html#limits-items). [Issue](https://github.com/aws-samples/bedrock-claude-chat/issues/131)
+
 ### Bot Personalization
 
 Add your own instruction and give external knowledge as URL or files (a.k.a [RAG](./docs/RAG.md)). The bot can be shared among application users.
@@ -30,7 +35,7 @@ Add your own instruction and give external knowledge as URL or files (a.k.a [RAG
 
 ## 🚀 Super-easy Deployment
 
-- On us-east-1 region, open [Bedrock Model access](https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/modelaccess) > `Manage model access` > Check `Anthropic / Claude`, `Anthropic / Claude Instant` and `Cohere / Embed Multilingual` then `Save changes`.
+- On us-east-1 region, open [Bedrock Model access](https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/modelaccess) > `Manage model access` > Check `Anthropic / Claude`, `Anthropic / Claude Instant`, `Anthropic / Claude 3 Sonnet` and `Cohere / Embed Multilingual` then `Save changes`.
 
 <details>
 <summary>Screenshot</summary>
@@ -68,7 +73,6 @@ It's an architecture built on AWS managed services, eliminating the need for inf
 
 - [Amazon DynamoDB](https://aws.amazon.com/dynamodb/): NoSQL database for conversation history storage
 - [Amazon API Gateway](https://aws.amazon.com/api-gateway/) + [AWS Lambda](https://aws.amazon.com/lambda/): Backend API endpoint ([AWS Lambda Web Adapter](https://github.com/awslabs/aws-lambda-web-adapter), [FastAPI](https://fastapi.tiangolo.com/))
-- [Amazon SNS](https://aws.amazon.com/sns/): Used to decouple streaming calls between API Gateway and Bedrock because streaming responses can take over 30 seconds in total, exceeding the limitations of HTTP integration (See [quota](https://docs.aws.amazon.com/apigateway/latest/developerguide/limits.html)).
 - [Amazon CloudFront](https://aws.amazon.com/cloudfront/) + [S3](https://aws.amazon.com/s3/): Frontend application delivery ([React](https://react.dev/), [Tailwind CSS](https://tailwindcss.com/))
 - [AWS WAF](https://aws.amazon.com/waf/): IP address restriction
 - [Amazon Cognito](https://aws.amazon.com/cognito/): User authentication
@@ -179,18 +183,19 @@ BedrockChatStack.FrontendURL = https://xxxxx.cloudfront.net
 Edit [config.py](./backend/app/config.py) and run `cdk deploy`.
 
 ```py
+# See: https://docs.anthropic.com/claude/reference/complete_post
 GENERATION_CONFIG = {
-    "max_tokens_to_sample": 500,
-    "temperature": 0.6,
+    "max_tokens": 2000,
     "top_k": 250,
     "top_p": 0.999,
+    "temperature": 0.6,
     "stop_sequences": ["Human: ", "Assistant: "],
 }
 
 EMBEDDING_CONFIG = {
-    "model_id": "amazon.titan-embed-text-v1",
+    "model_id": "cohere.embed-multilingual-v3",
     "chunk_size": 1000,
-    "chunk_overlap": 100,
+    "chunk_overlap": 200,
 }
 ```
 

diff --git a/backend/app/bedrock.py b/backend/app/bedrock.py
@@ -1,30 +1,56 @@
 import json
 
 from app.config import EMBEDDING_CONFIG, GENERATION_CONFIG
+from app.repositories.model import MessageModel
 from app.utils import get_bedrock_client
 
 client = get_bedrock_client()
 
 
-def _create_body(model: str, prompt: str):
-    if model in ("claude-instant-v1", "claude-v2"):
-        parameter = GENERATION_CONFIG
-        parameter["prompt"] = prompt
-        return json.dumps(parameter)
-    else:
-        raise NotImplementedError()
-
-
-def _extract_output_text(model: str, response) -> str:
-    if model in ("claude-instant-v1", "claude-v2"):
-        output = json.loads(response.get("body").read())
-        output_txt = output["completion"]
-        if output_txt[0] == " ":
-            # claude outputs a space at the beginning of the text
-            output_txt = output_txt[1:]
-        return output_txt
-    else:
-        raise NotImplementedError()
+def compose_args_for_anthropic_client(
+    messages: list[MessageModel],
+    model: str,
+    instruction: str | None = None,
+    stream: bool = False,
+) -> dict:
+    """Compose arguments for Anthropic client.
+    Ref: https://docs.anthropic.com/claude/reference/messages_post
+    """
+    arg_messages = []
+    for message in messages:
+        if message.role not in ["system", "instruction"]:
+            content = []
+            for c in message.content:
+                if c.content_type == "text":
+                    content.append(
+                        {
+                            "type": "text",
+                            "text": c.body,
+                        }
+                    )
+                elif c.content_type == "image":
+                    content.append(
+                        {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": c.media_type,
+                                "data": c.body,
+                            },
+                        }
+                    )
+            m = {"role": message.role, "content": content}
+            arg_messages.append(m)
+
+    args = {
+        **GENERATION_CONFIG,
+        "model": get_model_id(model),
+        "messages": arg_messages,
+        "stream": stream,
+    }
+    if instruction:
+        args["system"] = instruction
+    return args
 
 
 def get_model_id(model: str) -> str:
@@ -33,26 +59,12 @@ def get_model_id(model: str) -> str:
         return "anthropic.claude-v2"
     elif model == "claude-instant-v1":
         return "anthropic.claude-instant-v1"
+    elif model == "claude-v3-sonnet":
+        return "anthropic.claude-3-sonnet-20240229-v1:0"
     else:
         raise NotImplementedError()
 
 
-def invoke(prompt: str, model: str) -> str:
-    payload = _create_body(model, prompt)
-
-    model_id = get_model_id(model)
-    accept = "application/json"
-    content_type = "application/json"
-
-    response = client.invoke_model(
-        body=payload, modelId=model_id, accept=accept, contentType=content_type
-    )
-
-    output_txt = _extract_output_text(model, response)
-
-    return output_txt
-
-
 def calculate_query_embedding(question: str) -> list[float]:
     model_id = EMBEDDING_CONFIG["model_id"]
 

diff --git a/backend/app/config.py b/backend/app/config.py
@@ -2,10 +2,10 @@
 # Adjust the values according to your application.
 # See: https://docs.anthropic.com/claude/reference/complete_post
 GENERATION_CONFIG = {
-    "max_tokens_to_sample": 2000,
-    "temperature": 0.6,
+    "max_tokens": 2000,
     "top_k": 250,
     "top_p": 0.999,
+    "temperature": 0.6,
     "stop_sequences": ["Human: ", "Assistant: "],
 }
 

diff --git a/backend/app/repositories/conversation.py b/backend/app/repositories/conversation.py
@@ -127,9 +127,24 @@ def find_conversation_by_id(user_id: str, conversation_id: str) -> ConversationM
         message_map={
             k: MessageModel(
                 role=v["role"],
-                content=ContentModel(
-                    content_type=v["content"]["content_type"],
-                    body=v["content"]["body"],
+                content=(
+                    [
+                        ContentModel(
+                            content_type=c["content_type"],
+                            body=c["body"],
+                            media_type=c["media_type"],
+                        )
+                        for c in v["content"]
+                    ]
+                    if type(v["content"]) == list
+                    else [
+                        # For backward compatibility
+                        ContentModel(
+                            content_type=v["content"]["content_type"],
+                            body=v["content"]["body"],
+                            media_type=None,
+                        )
+                    ]
                 ),
                 model=v["model"],
                 children=v["children"],

diff --git a/backend/app/repositories/model.py b/backend/app/repositories/model.py
@@ -1,11 +1,12 @@
-from typing import Literal
+from typing import Literal, Optional
 
 from app.route_schema import type_sync_status
 from pydantic import BaseModel
 
 
 class ContentModel(BaseModel):
-    content_type: Literal["text"]
+    content_type: Literal["text", "image"]
+    media_type: Optional[str]
     body: str
 
 
@@ -17,8 +18,8 @@ class KnowledgeModel(BaseModel):
 
 class MessageModel(BaseModel):
     role: str
-    content: ContentModel
-    model: Literal["claude-instant-v1", "claude-v2"]
+    content: list[ContentModel]
+    model: Literal["claude-instant-v1", "claude-v2", "claude-v3-sonnet"]
     children: list[str]
     parent: str | None
     create_time: float

diff --git a/backend/app/route_schema.py b/backend/app/route_schema.py
@@ -17,8 +17,12 @@ class Config:
 
 
 class Content(BaseSchema):
-    content_type: Literal["text"]
-    body: str
+    content_type: Literal["text", "image"]
+    media_type: Optional[str] = Field(
+        None,
+        description="MIME type of the image. Must be specified if `content_type` is `image`.",
+    )
+    body: str = Field(..., description="Content body. Text or base64 encoded image.")
 
 
 class Knowledge(BaseSchema):
@@ -38,16 +42,16 @@ class KnowledgeDiffInput(BaseSchema):
 
 class MessageInput(BaseSchema):
     role: str
-    content: Content
-    model: Literal["claude-instant-v1", "claude-v2"]
+    content: list[Content]
+    model: Literal["claude-instant-v1", "claude-v2", "claude-v3-sonnet"]
     parent_message_id: str | None
 
 
 class MessageOutput(BaseSchema):
     role: str
-    content: Content
+    content: list[Content]
     # NOTE: "claude" will be deprecated (same as "claude-v2")
-    model: Literal["claude-instant-v1", "claude-v2", "claude"]
+    model: Literal["claude-instant-v1", "claude-v2", "claude", "claude-v3-sonnet"]
     children: list[str]
     parent: str | None