Updating old examples (2/2)

SolaceDev · Aug 27, 2024 · 1665494 · 1665494
1 parent 645cd67
commit 1665494
Show file tree

Hide file tree

Showing 3 changed files with 107 additions and 50 deletions.
diff --git a/examples/milvus_store.yaml b/examples/milvus_store.yaml
@@ -1,21 +1,24 @@
 ---
 # Example configuration file for adding a Milvus vector store and a Cohere embedding model
 # The input comes from STDIN and goes to STDOUT
+# 
+# Dependencies:
+# pip install langchain_milvus pymilvus
+#
+# required ENV variables:
+# - MILVUS_HOST
+# - MILVUS_PORT
+# - MILVUS_COLLECTION_NAME
+# - ACCESS_KEY: AWS access key
+# - SECRET_KEY: AWS secret key
+# - AWS_BEDROCK_COHERE_EMBED_MODEL_ID
+# - AWS_BEDROCK_COHERE_EMBED_REGION
+
 log:
   stdout_log_level: DEBUG
   log_file_level: DEBUG
   log_file: solace_ai_connector.log
 
-shared_config:
-  - broker_config: &broker_connection
-      broker_connection_share: ${SOLACE_BROKER_URL}
-      broker_type: solace
-      broker_url: ${SOLACE_BROKER_URL}
-      broker_username: ${SOLACE_BROKER_USERNAME}
-      broker_password: ${SOLACE_BROKER_PASSWORD}
-      broker_vpn: ${SOLACE_BROKER_VPN}
-
-
 # List of flows
 flows:
   - name: test_flow
@@ -24,25 +27,24 @@ flows:
       # Test input from STDIN
       - component_name: stdin
         component_module: stdin_input
-        component_config:
 
       - component_name: milvus_cohere_embed
         component_module: langchain_vector_store_embedding_index
         component_config:
-          vector_store_component_path: langchain_community.vectorstores
+          vector_store_component_path: langchain_milvus
           vector_store_component_name: Milvus
           vector_store_component_config:
-            collection_name: collection_2
+            auto_id: true
+            collection_name: ${MILVUS_COLLECTION_NAME}
             connection_args:
               host: ${MILVUS_HOST}
               port: ${MILVUS_PORT}
-          # vector_store_index_name: solace-index-3
-          embedding_component_path: langchain_community.embeddings
+          embedding_component_path: langchain_aws
           embedding_component_name: BedrockEmbeddings
           embedding_component_config:
             model_id: ${AWS_BEDROCK_COHERE_EMBED_MODEL_ID}
             region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION}
-            credentials_profile_name: default
+            credentials_profile_name: default # Profile name in ~/.aws/credentials
         input_transforms:
           - type: copy
             source_value: 
@@ -51,31 +53,30 @@ flows:
                 function: system
             dest_expression: user_data.vector_input:metadata.system
           - type: copy
-            source_value: efunneko
+            source_value: username
             dest_expression: user_data.vector_input:metadata.user
           - type: copy
-            source_value: input.payload
-            dest_expression: user_data.vector_input:text
+            source_expression: input.payload:text
+            dest_expression: user_data.vector_input:texts
         component_input:
           source_expression: user_data.vector_input
 
       - component_name: milvus_cohere_embed_search
         component_module: langchain_vector_store_embedding_search
         component_config:
-          vector_store_component_path: langchain_community.vectorstores
+          vector_store_component_path: langchain_milvus
           vector_store_component_name: Milvus
           vector_store_component_config:
-            collection_name: collection_1
+            collection_name: ${MILVUS_COLLECTION_NAME}
             connection_args:
               host: ${MILVUS_HOST}
               port: ${MILVUS_PORT}
-          # vector_store_index_name: solace-index-3
-          embedding_component_path: langchain_community.embeddings
+          embedding_component_path: langchain_aws
           embedding_component_name: BedrockEmbeddings
           embedding_component_config:
             model_id: ${AWS_BEDROCK_COHERE_EMBED_MODEL_ID}
             region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION}
-            credentials_profile_name: default
+            credentials_profile_name: default # Profile name in ~/.aws/credentials
           max_results: 5
         component_input:
           source_expression: input.payload

diff --git a/examples/vector_store_search.yaml b/examples/vector_store_search.yaml
@@ -1,28 +1,40 @@
 ---
-# Example that uses Cohere embeddings and OpenSearch for vector store 
+# Example that uses Cohere embeddings and Amazon OpenSearch Service Serverless for vector store 
 # This also shows how to use AWS credentials and AWS4Auth for OpenSearch
 # which involves using 'invoke' to create the required auth objects
+# 
+# 
+# Follow Boto3 documentation for AWS credentials: 
+# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration
+# https://python.langchain.com/v0.2/docs/integrations/vectorstores/opensearch/#using-aoss-amazon-opensearch-service-serverless
+#
+# Dependencies:
+# pip install -U langchain_community opensearch-py requests_aws4auth
+#
+# required ENV variables:
+# - AWS_BEDROCK_COHERE_EMBED_MODEL_ID
+# - AWS_BEDROCK_COHERE_EMBED_REGION
+# - AWS_OPENSEARCH_INDEX_NAME
+# - AWS_OPENSEARCH_ENDPOINT
+
 log:
   stdout_log_level: DEBUG
   log_file_level: DEBUG
   log_file: solace_ai_connector.log
 
 shared_config:
-  - broker_config: &broker_connection
-      broker_connection_share: ${SOLACE_BROKER_URL}
-      broker_type: solace
-      broker_url: ${SOLACE_BROKER_URL}
-      broker_username: ${SOLACE_BROKER_USERNAME}
-      broker_password: ${SOLACE_BROKER_PASSWORD}
-      broker_vpn: ${SOLACE_BROKER_VPN}
-
-  # Get AWS credentials object
+  # Get AWS credentials object from .aws credentials
+  # You can pass the ACCESS/SECRET/SESSION keys directly as ENV variables as well
+  # eg: aws_secret_access_key: ${AWS_SECRET_ACCESS_KEY}
   - aws_credentials: &aws_credentials
       invoke:
         object:
           invoke:
             module: boto3
             function: Session
+            params:
+              keyword:
+                profile_name: default # The profile to choose from .aws/credentials
         function: get_credentials
 
   # Get AWS4Auth object
@@ -47,42 +59,57 @@ shared_config:
                 object: *aws_credentials
                 attribute: token
 
+  # Create a bedrock client for use with AWS components
+  - bedrock_client_config: &bedrock_client_config
+      invoke:
+        module: boto3
+        function: client
+        params:
+          keyword:
+            service_name: bedrock-runtime
+            region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION}
+            aws_access_key_id:
+              invoke:
+                object: *aws_credentials
+                attribute: access_key
+            aws_secret_access_key:
+              invoke:
+                object: *aws_credentials
+                attribute: secret_key
+
 # List of flows
 flows:
   - name: test_flow
     trace_level: DEBUG
     components:
 
-      # Input from a Solace broker
+      # Input from a standard in
       - component_name: stdin
         component_module: stdin_input
-        component_config:
 
       - component_name: opensearch_cohere_embed
         component_module: langchain_vector_store_embedding_search
         component_config:
           vector_store_component_path: langchain_community.vectorstores
           vector_store_component_name: OpenSearchVectorSearch
+          vector_store_index_name: ${AWS_OPENSEARCH_INDEX_NAME}
           vector_store_component_config:
-            index_name: ${AWS_OPENSEARCH_JIRA_INDEX_NAME}
-            opensearch_url: ${AWS_OPENSEARCH_JIRA_ENDPOINT}
+            opensearch_url: ${AWS_OPENSEARCH_ENDPOINT}
             connection_class: 
               invoke:
                 module: opensearchpy
                 attribute: RequestsHttpConnection
             http_auth: *aws_4_auth_aoss
             timeout: 300
-          vector_store_index_name: solace-index-3
-          embedding_component_path: langchain_community.embeddings
+          embedding_component_path: langchain_aws
           embedding_component_name: BedrockEmbeddings
           embedding_component_config:
+            client: *bedrock_client_config
             model_id: ${AWS_BEDROCK_COHERE_EMBED_MODEL_ID}
             region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION}
-            credentials_profile_name: default
           max_results: 7
         component_input:
           source_expression: input.payload
 
-
       - component_name: stdout
         component_module: stdout_output
diff --git a/...solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py b/...solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py
@@ -1,5 +1,5 @@
 # This is the base class for vector store embedding classes
-
+import inspect
 from .langchain_base import (
     LangChainBase,
 )
@@ -36,19 +36,48 @@ def init(self):
             self.vector_store_info["path"], self.vector_store_info["name"]
         )
 
-        if "index" not in self.vector_store_info["config"]:
-            self.vector_store_info["config"]["index"] = self.vector_store_info["index"]
-        self.vector_store_info["config"]["embeddings"] = self.embedding
-        self.vector_store_info["config"]["embedding_function"] = self.embedding
+        # Get the expected parameter names of the vector store class
+        class_init_signature = inspect.signature(vector_store_class.__init__)
+        class_param_names = [
+            param.name
+            for param in class_init_signature.parameters.values()
+            if param.name != "self"
+        ]
+
+        # index is optional - not using it if "index" or "index_name" is provided in the config
+        if self.vector_store_info["index"] and (
+            "index" not in self.vector_store_info["config"]
+            or "index_name" not in self.vector_store_info["config"]
+        ):
+            # Checking if the class expects 'index' or 'index_name' as a parameter
+            if "index" in class_param_names:
+                self.vector_store_info["config"]["index"] = self.vector_store_info[
+                    "index"
+                ]
+            elif "index_name" in class_param_names:
+                self.vector_store_info["config"]["index_name"] = self.vector_store_info[
+                    "index"
+                ]
+            else:
+                # If not defined, used "index" as a parameter
+                self.vector_store_info["config"]["index"] = self.vector_store_info[
+                    "index"
+                ]
 
-        # index is optional - remove it from the config if it is None
-        if self.vector_store_info["config"]["index"] is None:
-            del self.vector_store_info["config"]["index"]
+        # Checking if the vector store uses "embedding_function" or "embeddings" as a parameter
+        if "embedding_function" in class_param_names:
+            self.vector_store_info["config"]["embedding_function"] = self.embedding
+        elif "embeddings" in class_param_names:
+            self.vector_store_info["config"]["embeddings"] = self.embedding
+        else:
+            # If not defined, used "embeddings" as a parameter
+            self.vector_store_info["config"]["embeddings"] = self.embedding
 
         try:
             self.vector_store = self.create_component(
                 self.vector_store_info["config"], vector_store_class
             )
+            print(self.vector_store_info["config"])
         except Exception:  # pylint: disable=broad-except
             del self.vector_store_info["config"]["embeddings"]
             del self.vector_store_info["config"]["embedding_function"]