From 18be0df8bcc9725e7ec677c6b92af94f68186f83 Mon Sep 17 00:00:00 2001
From: Omar Leonardo Sanchez Granados <omarlsg98@gmail.com>
Date: Sat, 5 Oct 2024 04:44:50 -0400
Subject: [PATCH] Fix/bedrock issues (#2718)

### What problem does this PR solve?

Adding a Bedrock API key for Claude Sonnet was broken. I find the issue
came up when trying to test the LLM configuration, the system is a
required parameter in boto3.

As well, there were problems in Bedrock implementation for embeddings
when trying to encode queries.

### Type of change

- [X] Bug Fix (non-breaking change which fixes an issue)
---
 rag/llm/chat_model.py      | 5 +++--
 rag/llm/embedding_model.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 11956ad81a..f5b66872ed 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -630,7 +630,7 @@ def chat(self, system, history, gen_conf):
                 modelId=self.model_name,
                 messages=history,
                 inferenceConfig=gen_conf,
-                system=[{"text": system}] if system else None,
+                system=[{"text": (system if system else "Answer the user's message.")}] ,
             )
             
             # Extract and print the response text.
@@ -675,7 +675,8 @@ def chat_streamly(self, system, history, gen_conf):
             streaming_response = self.client.converse_stream(
                 modelId=self.model_name,
                 messages=history,
-                inferenceConfig=gen_conf
+                inferenceConfig=gen_conf,
+                system=[{"text": system if system else ""}],
             )
 
             # Extract and print the streamed response text in real-time.
diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py
index 4189a022ff..39704590a7 100644
--- a/rag/llm/embedding_model.py
+++ b/rag/llm/embedding_model.py
@@ -443,7 +443,7 @@ def encode_queries(self, text):
 
         response = self.client.invoke_model(modelId=self.model_name, body=json.dumps(body))
         model_response = json.loads(response["body"].read())
-        embeddings.extend([model_response["embedding"]])
+        embeddings.extend(model_response["embedding"])
 
         return np.array(embeddings), token_count