From 16654940f284c3e6fbc80bf2196804a21f373880 Mon Sep 17 00:00:00 2001 From: Cyrus Mobini Date: Tue, 27 Aug 2024 11:46:31 -0400 Subject: [PATCH] Updating old examples (2/2) --- examples/milvus_store.yaml | 49 ++++++++------- examples/vector_store_search.yaml | 63 +++++++++++++------ .../langchain_vector_store_embedding_base.py | 45 ++++++++++--- 3 files changed, 107 insertions(+), 50 deletions(-) diff --git a/examples/milvus_store.yaml b/examples/milvus_store.yaml index 6cc8d10..4c6211b 100644 --- a/examples/milvus_store.yaml +++ b/examples/milvus_store.yaml @@ -1,21 +1,24 @@ --- # Example configuration file for adding a Milvus vector store and a Cohere embedding model # The input comes from STDIN and goes to STDOUT +# +# Dependencies: +# pip install langchain_milvus pymilvus +# +# required ENV variables: +# - MILVUS_HOST +# - MILVUS_PORT +# - MILVUS_COLLECTION_NAME +# - ACCESS_KEY: AWS access key +# - SECRET_KEY: AWS secret key +# - AWS_BEDROCK_COHERE_EMBED_MODEL_ID +# - AWS_BEDROCK_COHERE_EMBED_REGION + log: stdout_log_level: DEBUG log_file_level: DEBUG log_file: solace_ai_connector.log -shared_config: - - broker_config: &broker_connection - broker_connection_share: ${SOLACE_BROKER_URL} - broker_type: solace - broker_url: ${SOLACE_BROKER_URL} - broker_username: ${SOLACE_BROKER_USERNAME} - broker_password: ${SOLACE_BROKER_PASSWORD} - broker_vpn: ${SOLACE_BROKER_VPN} - - # List of flows flows: - name: test_flow @@ -24,25 +27,24 @@ flows: # Test input from STDIN - component_name: stdin component_module: stdin_input - component_config: - component_name: milvus_cohere_embed component_module: langchain_vector_store_embedding_index component_config: - vector_store_component_path: langchain_community.vectorstores + vector_store_component_path: langchain_milvus vector_store_component_name: Milvus vector_store_component_config: - collection_name: collection_2 + auto_id: true + collection_name: ${MILVUS_COLLECTION_NAME} connection_args: host: ${MILVUS_HOST} port: ${MILVUS_PORT} - # vector_store_index_name: solace-index-3 - embedding_component_path: langchain_community.embeddings + embedding_component_path: langchain_aws embedding_component_name: BedrockEmbeddings embedding_component_config: model_id: ${AWS_BEDROCK_COHERE_EMBED_MODEL_ID} region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION} - credentials_profile_name: default + credentials_profile_name: default # Profile name in ~/.aws/credentials input_transforms: - type: copy source_value: @@ -51,31 +53,30 @@ flows: function: system dest_expression: user_data.vector_input:metadata.system - type: copy - source_value: efunneko + source_value: username dest_expression: user_data.vector_input:metadata.user - type: copy - source_value: input.payload - dest_expression: user_data.vector_input:text + source_expression: input.payload:text + dest_expression: user_data.vector_input:texts component_input: source_expression: user_data.vector_input - component_name: milvus_cohere_embed_search component_module: langchain_vector_store_embedding_search component_config: - vector_store_component_path: langchain_community.vectorstores + vector_store_component_path: langchain_milvus vector_store_component_name: Milvus vector_store_component_config: - collection_name: collection_1 + collection_name: ${MILVUS_COLLECTION_NAME} connection_args: host: ${MILVUS_HOST} port: ${MILVUS_PORT} - # vector_store_index_name: solace-index-3 - embedding_component_path: langchain_community.embeddings + embedding_component_path: langchain_aws embedding_component_name: BedrockEmbeddings embedding_component_config: model_id: ${AWS_BEDROCK_COHERE_EMBED_MODEL_ID} region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION} - credentials_profile_name: default + credentials_profile_name: default # Profile name in ~/.aws/credentials max_results: 5 component_input: source_expression: input.payload diff --git a/examples/vector_store_search.yaml b/examples/vector_store_search.yaml index e427617..ca4b3f8 100644 --- a/examples/vector_store_search.yaml +++ b/examples/vector_store_search.yaml @@ -1,28 +1,40 @@ --- -# Example that uses Cohere embeddings and OpenSearch for vector store +# Example that uses Cohere embeddings and Amazon OpenSearch Service Serverless for vector store # This also shows how to use AWS credentials and AWS4Auth for OpenSearch # which involves using 'invoke' to create the required auth objects +# +# +# Follow Boto3 documentation for AWS credentials: +# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration +# https://python.langchain.com/v0.2/docs/integrations/vectorstores/opensearch/#using-aoss-amazon-opensearch-service-serverless +# +# Dependencies: +# pip install -U langchain_community opensearch-py requests_aws4auth +# +# required ENV variables: +# - AWS_BEDROCK_COHERE_EMBED_MODEL_ID +# - AWS_BEDROCK_COHERE_EMBED_REGION +# - AWS_OPENSEARCH_INDEX_NAME +# - AWS_OPENSEARCH_ENDPOINT + log: stdout_log_level: DEBUG log_file_level: DEBUG log_file: solace_ai_connector.log shared_config: - - broker_config: &broker_connection - broker_connection_share: ${SOLACE_BROKER_URL} - broker_type: solace - broker_url: ${SOLACE_BROKER_URL} - broker_username: ${SOLACE_BROKER_USERNAME} - broker_password: ${SOLACE_BROKER_PASSWORD} - broker_vpn: ${SOLACE_BROKER_VPN} - - # Get AWS credentials object + # Get AWS credentials object from .aws credentials + # You can pass the ACCESS/SECRET/SESSION keys directly as ENV variables as well + # eg: aws_secret_access_key: ${AWS_SECRET_ACCESS_KEY} - aws_credentials: &aws_credentials invoke: object: invoke: module: boto3 function: Session + params: + keyword: + profile_name: default # The profile to choose from .aws/credentials function: get_credentials # Get AWS4Auth object @@ -47,42 +59,57 @@ shared_config: object: *aws_credentials attribute: token + # Create a bedrock client for use with AWS components + - bedrock_client_config: &bedrock_client_config + invoke: + module: boto3 + function: client + params: + keyword: + service_name: bedrock-runtime + region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION} + aws_access_key_id: + invoke: + object: *aws_credentials + attribute: access_key + aws_secret_access_key: + invoke: + object: *aws_credentials + attribute: secret_key + # List of flows flows: - name: test_flow trace_level: DEBUG components: - # Input from a Solace broker + # Input from a standard in - component_name: stdin component_module: stdin_input - component_config: - component_name: opensearch_cohere_embed component_module: langchain_vector_store_embedding_search component_config: vector_store_component_path: langchain_community.vectorstores vector_store_component_name: OpenSearchVectorSearch + vector_store_index_name: ${AWS_OPENSEARCH_INDEX_NAME} vector_store_component_config: - index_name: ${AWS_OPENSEARCH_JIRA_INDEX_NAME} - opensearch_url: ${AWS_OPENSEARCH_JIRA_ENDPOINT} + opensearch_url: ${AWS_OPENSEARCH_ENDPOINT} connection_class: invoke: module: opensearchpy attribute: RequestsHttpConnection http_auth: *aws_4_auth_aoss timeout: 300 - vector_store_index_name: solace-index-3 - embedding_component_path: langchain_community.embeddings + embedding_component_path: langchain_aws embedding_component_name: BedrockEmbeddings embedding_component_config: + client: *bedrock_client_config model_id: ${AWS_BEDROCK_COHERE_EMBED_MODEL_ID} region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION} - credentials_profile_name: default max_results: 7 component_input: source_expression: input.payload - - component_name: stdout component_module: stdout_output diff --git a/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py index aaa6ad1..0c3b39f 100644 --- a/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py @@ -1,5 +1,5 @@ # This is the base class for vector store embedding classes - +import inspect from .langchain_base import ( LangChainBase, ) @@ -36,19 +36,48 @@ def init(self): self.vector_store_info["path"], self.vector_store_info["name"] ) - if "index" not in self.vector_store_info["config"]: - self.vector_store_info["config"]["index"] = self.vector_store_info["index"] - self.vector_store_info["config"]["embeddings"] = self.embedding - self.vector_store_info["config"]["embedding_function"] = self.embedding + # Get the expected parameter names of the vector store class + class_init_signature = inspect.signature(vector_store_class.__init__) + class_param_names = [ + param.name + for param in class_init_signature.parameters.values() + if param.name != "self" + ] + + # index is optional - not using it if "index" or "index_name" is provided in the config + if self.vector_store_info["index"] and ( + "index" not in self.vector_store_info["config"] + or "index_name" not in self.vector_store_info["config"] + ): + # Checking if the class expects 'index' or 'index_name' as a parameter + if "index" in class_param_names: + self.vector_store_info["config"]["index"] = self.vector_store_info[ + "index" + ] + elif "index_name" in class_param_names: + self.vector_store_info["config"]["index_name"] = self.vector_store_info[ + "index" + ] + else: + # If not defined, used "index" as a parameter + self.vector_store_info["config"]["index"] = self.vector_store_info[ + "index" + ] - # index is optional - remove it from the config if it is None - if self.vector_store_info["config"]["index"] is None: - del self.vector_store_info["config"]["index"] + # Checking if the vector store uses "embedding_function" or "embeddings" as a parameter + if "embedding_function" in class_param_names: + self.vector_store_info["config"]["embedding_function"] = self.embedding + elif "embeddings" in class_param_names: + self.vector_store_info["config"]["embeddings"] = self.embedding + else: + # If not defined, used "embeddings" as a parameter + self.vector_store_info["config"]["embeddings"] = self.embedding try: self.vector_store = self.create_component( self.vector_store_info["config"], vector_store_class ) + print(self.vector_store_info["config"]) except Exception: # pylint: disable=broad-except del self.vector_store_info["config"]["embeddings"] del self.vector_store_info["config"]["embedding_function"]