diff --git a/docs/modules/indexes/vectorstores/examples/opensearch.ipynb b/docs/modules/indexes/vectorstores/examples/opensearch.ipynb index a78fde3fe93e8..1a14c30e2ee8b 100644 --- a/docs/modules/indexes/vectorstores/examples/opensearch.ipynb +++ b/docs/modules/indexes/vectorstores/examples/opensearch.ipynb @@ -12,7 +12,8 @@ "\n", "This notebook shows how to use functionality related to the `OpenSearch` database.\n", "\n", - "To run, you should have the opensearch instance up and running: [here](https://opensearch.org/docs/latest/install-and-configure/install-opensearch/index/)\n", + "To run, you should have an OpenSearch instance up and running: [see here for an easy Docker installation](https://hub.docker.com/r/opensearchproject/opensearch).\n", + "\n", "`similarity_search` by default performs the Approximate k-NN Search which uses one of the several algorithms like lucene, nmslib, faiss recommended for\n", "large datasets. To perform brute force search we have other search methods known as Script Scoring and Painless Scripting.\n", "Check [this](https://opensearch.org/docs/latest/search-plugins/knn/index/) for more details." @@ -23,7 +24,8 @@ "id": "94963977-9dfc-48b7-872a-53f2947f46c6", "metadata": {}, "source": [ - "## Installation" + "## Installation\n", + "Install the Python client." ] }, { @@ -61,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "aac9563e", "metadata": {}, "outputs": [], @@ -74,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "a3c3999a", "metadata": {}, "outputs": [], @@ -98,6 +100,32 @@ "`similarity_search` using `Approximate k-NN` Search with Custom Parameters" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "803fe12b", + "metadata": {}, + "outputs": [], + "source": [ + "docsearch = OpenSearchVectorSearch.from_documents(\n", + " docs, \n", + " embeddings, \n", + " opensearch_url=\"http://localhost:9200\"\n", + ")\n", + "\n", + "# If using the default Docker installation, use this instantiation instead:\n", + "# docsearch = OpenSearchVectorSearch.from_documents(\n", + "# docs, \n", + "# embeddings, \n", + "# opensearch_url=\"https://localhost:9200\", \n", + "# http_auth=(\"admin\", \"admin\"), \n", + "# use_ssl = False,\n", + "# verify_certs = False,\n", + "# ssl_assert_hostname = False,\n", + "# ssl_show_warn = False,\n", + "# )" + ] + }, { "cell_type": "code", "execution_count": null, @@ -109,10 +137,8 @@ }, "outputs": [], "source": [ - "docsearch = OpenSearchVectorSearch.from_documents(docs, embeddings, opensearch_url=\"http://localhost:9200\")\n", - "\n", "query = \"What did the president say about Ketanji Brown Jackson\"\n", - "docs = docsearch.similarity_search(query)" + "docs = docsearch.similarity_search(query, k=10)" ] }, { @@ -283,7 +309,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/langchain/vectorstores/opensearch_vector_search.py b/langchain/vectorstores/opensearch_vector_search.py index 624d62c5715a9..d33a39429a9a8 100644 --- a/langchain/vectorstores/opensearch_vector_search.py +++ b/langchain/vectorstores/opensearch_vector_search.py @@ -153,13 +153,12 @@ def _default_text_mapping( def _default_approximate_search_query( query_vector: List[float], - size: int = 4, k: int = 4, vector_field: str = "vector_field", ) -> Dict: """For Approximate k-NN Search, this is the default query.""" return { - "size": size, + "size": k, "query": {"knn": {vector_field: {"vector": query_vector, "k": k}}}, } @@ -167,14 +166,13 @@ def _default_approximate_search_query( def _approximate_search_query_with_boolean_filter( query_vector: List[float], boolean_filter: Dict, - size: int = 4, k: int = 4, vector_field: str = "vector_field", subquery_clause: str = "must", ) -> Dict: """For Approximate k-NN Search, with Boolean Filter.""" return { - "size": size, + "size": k, "query": { "bool": { "filter": boolean_filter, @@ -189,13 +187,12 @@ def _approximate_search_query_with_boolean_filter( def _approximate_search_query_with_lucene_filter( query_vector: List[float], lucene_filter: Dict, - size: int = 4, k: int = 4, vector_field: str = "vector_field", ) -> Dict: """For Approximate k-NN Search, with Lucene Filter.""" search_query = _default_approximate_search_query( - query_vector, size, k, vector_field + query_vector, k=k, vector_field=vector_field ) search_query["query"]["knn"][vector_field]["filter"] = lucene_filter return search_query @@ -382,8 +379,6 @@ def similarity_search( Optional Args for Approximate Search: search_type: "approximate_search"; default: "approximate_search" - size: number of results the query actually returns; default: 4 - boolean_filter: A Boolean filter consists of a Boolean query that contains a k-NN query and a filter. @@ -438,7 +433,6 @@ def similarity_search_with_score( vector_field = _get_kwargs_value(kwargs, "vector_field", "vector_field") if search_type == "approximate_search": - size = _get_kwargs_value(kwargs, "size", 4) boolean_filter = _get_kwargs_value(kwargs, "boolean_filter", {}) subquery_clause = _get_kwargs_value(kwargs, "subquery_clause", "must") lucene_filter = _get_kwargs_value(kwargs, "lucene_filter", {}) @@ -449,15 +443,19 @@ def similarity_search_with_score( ) if boolean_filter != {}: search_query = _approximate_search_query_with_boolean_filter( - embedding, boolean_filter, size, k, vector_field, subquery_clause + embedding, + boolean_filter, + k=k, + vector_field=vector_field, + subquery_clause=subquery_clause, ) elif lucene_filter != {}: search_query = _approximate_search_query_with_lucene_filter( - embedding, lucene_filter, size, k, vector_field + embedding, lucene_filter, k=k, vector_field=vector_field ) else: search_query = _default_approximate_search_query( - embedding, size, k, vector_field + embedding, k=k, vector_field=vector_field ) elif search_type == SCRIPT_SCORING_SEARCH: space_type = _get_kwargs_value(kwargs, "space_type", "l2")