Skip to content

Commit

Permalink
community[patch]: update for compatibility with latest Meilisearch ve…
Browse files Browse the repository at this point in the history
…rsion (langchain-ai#18970)

- **Description:** Updates Meilisearch vectorstore for compatibility
with v1.6 and above. Adds embedders settings and embedder_name which are
now required.

---------

Co-authored-by: Bagatur <[email protected]>
  • Loading branch information
2 people authored and gkorland committed Mar 30, 2024
1 parent ea6a6ce commit def0a27
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 21 deletions.
41 changes: 32 additions & 9 deletions docs/docs/integrations/vectorstores/meilisearch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,14 @@
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"embeddings = OpenAIEmbeddings()"
"embeddings = OpenAIEmbeddings()\n",
"embedders = {\n",
" \"default\": {\n",
" \"source\": \"userProvided\",\n",
" \"dimensions\": 1536,\n",
" }\n",
"}\n",
"embedder_name = \"default\""
]
},
{
Expand All @@ -152,7 +159,9 @@
"outputs": [],
"source": [
"# Use Meilisearch vector store to store texts & associated embeddings as vector\n",
"vector_store = Meilisearch.from_texts(texts=texts, embedding=embeddings)"
"vector_store = Meilisearch.from_texts(\n",
" texts=texts, embedding=embeddings, embedders=embedders, embedder_name=embedder_name\n",
")"
]
},
{
Expand Down Expand Up @@ -188,11 +197,16 @@
"docs = text_splitter.split_documents(documents)\n",
"\n",
"# Import documents & embeddings in the vector store\n",
"vector_store = Meilisearch.from_documents(documents=documents, embedding=embeddings)\n",
"vector_store = Meilisearch.from_documents(\n",
" documents=documents,\n",
" embedding=embeddings,\n",
" embedders=embedders,\n",
" embedder_name=embedder_name,\n",
")\n",
"\n",
"# Search in our vector store\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = vector_store.similarity_search(query)\n",
"docs = vector_store.similarity_search(query, embedder_name=embedder_name)\n",
"print(docs[0].page_content)"
]
},
Expand Down Expand Up @@ -221,7 +235,11 @@
"\n",
"client = meilisearch.Client(url=\"http://127.0.0.1:7700\", api_key=\"***\")\n",
"vector_store = Meilisearch(\n",
" embedding=embeddings, client=client, index_name=\"langchain_demo\", text_key=\"text\"\n",
" embedding=embeddings,\n",
" embedders=embedders,\n",
" client=client,\n",
" index_name=\"langchain_demo\",\n",
" text_key=\"text\",\n",
")\n",
"vector_store.add_documents(documents)"
]
Expand All @@ -232,7 +250,7 @@
"source": [
"## Similarity Search with score\n",
"\n",
"This specific method allows you to return the documents and the distance score of the query to them."
"This specific method allows you to return the documents and the distance score of the query to them. `embedder_name` is the name of the embedder that should be used for semantic search, defaults to \"default\"."
]
},
{
Expand All @@ -241,15 +259,18 @@
"metadata": {},
"outputs": [],
"source": [
"docs_and_scores = vector_store.similarity_search_with_score(query)\n",
"docs_and_scores = vector_store.similarity_search_with_score(\n",
" query, embedder_name=embedder_name\n",
")\n",
"docs_and_scores[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Similarity Search by vector"
"## Similarity Search by vector\n",
"`embedder_name` is the name of the embedder that should be used for semantic search, defaults to \"default\"."
]
},
{
Expand All @@ -259,7 +280,9 @@
"outputs": [],
"source": [
"embedding_vector = embeddings.embed_query(query)\n",
"docs_and_scores = vector_store.similarity_search_by_vector(embedding_vector)\n",
"docs_and_scores = vector_store.similarity_search_by_vector(\n",
" embedding_vector, embedder_name=embedder_name\n",
")\n",
"docs_and_scores[0]"
]
},
Expand Down
46 changes: 42 additions & 4 deletions libs/community/langchain_community/vectorstores/meilisearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,15 @@ class Meilisearch(VectorStore):
# api_key is optional; provide it if your meilisearch instance requires it
client = meilisearch.Client(url='http://127.0.0.1:7700', api_key='***')
embeddings = OpenAIEmbeddings()
embedders = {
"theEmbedderName": {
"source": "userProvided",
"dimensions": "1536"
}
}
vectorstore = Meilisearch(
embedding=embeddings,
embedders=embedders,
client=client,
index_name='langchain_demo',
text_key='text')
Expand All @@ -81,6 +88,8 @@ def __init__(
index_name: str = "langchain-demo",
text_key: str = "text",
metadata_key: str = "metadata",
*,
embedders: Optional[Dict[str, Any]] = None,
):
"""Initialize with Meilisearch client."""
client = _create_client(client=client, url=url, api_key=api_key)
Expand All @@ -90,18 +99,24 @@ def __init__(
self._embedding = embedding
self._text_key = text_key
self._metadata_key = metadata_key
self._embedders = embedders
self._embedders_settings = self._client.index(
str(self._index_name)
).update_embedders(embedders)

def add_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
ids: Optional[List[str]] = None,
embedder_name: Optional[str] = "default",
**kwargs: Any,
) -> List[str]:
"""Run more texts through the embedding and add them to the vector store.
Args:
texts (Iterable[str]): Iterable of strings/text to add to the vectorstore.
embedder_name: Name of the embedder. Defaults to "default".
metadatas (Optional[List[dict]]): Optional list of metadata.
Defaults to None.
ids Optional[List[str]]: Optional list of IDs.
Expand All @@ -128,7 +143,7 @@ def add_texts(
docs.append(
{
"id": id,
"_vectors": embedding,
"_vectors": {f"{embedder_name}": embedding},
f"{self._metadata_key}": metadata,
}
)
Expand All @@ -142,12 +157,14 @@ def similarity_search(
query: str,
k: int = 4,
filter: Optional[Dict[str, str]] = None,
embedder_name: Optional[str] = "default",
**kwargs: Any,
) -> List[Document]:
"""Return meilisearch documents most similar to the query.
Args:
query (str): Query text for which to find similar documents.
embedder_name: Name of the embedder to be used. Defaults to "default".
k (int): Number of documents to return. Defaults to 4.
filter (Optional[Dict[str, str]]): Filter by metadata.
Defaults to None.
Expand All @@ -158,6 +175,7 @@ def similarity_search(
"""
docs_and_scores = self.similarity_search_with_score(
query=query,
embedder_name=embedder_name,
k=k,
filter=filter,
kwargs=kwargs,
Expand All @@ -169,12 +187,14 @@ def similarity_search_with_score(
query: str,
k: int = 4,
filter: Optional[Dict[str, str]] = None,
embedder_name: Optional[str] = "default",
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""Return meilisearch documents most similar to the query, along with scores.
Args:
query (str): Query text for which to find similar documents.
embedder_name: Name of the embedder to be used. Defaults to "default".
k (int): Number of documents to return. Defaults to 4.
filter (Optional[Dict[str, str]]): Filter by metadata.
Defaults to None.
Expand All @@ -187,6 +207,7 @@ def similarity_search_with_score(

docs = self.similarity_search_by_vector_with_scores(
embedding=_query,
embedder_name=embedder_name,
k=k,
filter=filter,
kwargs=kwargs,
Expand All @@ -196,6 +217,7 @@ def similarity_search_with_score(
def similarity_search_by_vector_with_scores(
self,
embedding: List[float],
embedder_name: Optional[str] = "default",
k: int = 4,
filter: Optional[Dict[str, Any]] = None,
**kwargs: Any,
Expand All @@ -204,6 +226,7 @@ def similarity_search_by_vector_with_scores(
Args:
embedding (List[float]): Embedding to look up similar documents.
embedder_name: Name of the embedder to be used. Defaults to "default".
k (int): Number of documents to return. Defaults to 4.
filter (Optional[Dict[str, str]]): Filter by metadata.
Defaults to None.
Expand All @@ -214,7 +237,13 @@ def similarity_search_by_vector_with_scores(
"""
docs = []
results = self._client.index(str(self._index_name)).search(
"", {"vector": embedding, "limit": k, "filter": filter}
"",
{
"vector": embedding,
"hybrid": {"semanticRatio": 1.0, "embedder": embedder_name},
"limit": k,
"filter": filter,
},
)

for result in results["hits"]:
Expand All @@ -233,12 +262,14 @@ def similarity_search_by_vector(
embedding: List[float],
k: int = 4,
filter: Optional[Dict[str, str]] = None,
embedder_name: Optional[str] = "default",
**kwargs: Any,
) -> List[Document]:
"""Return meilisearch documents most similar to embedding vector.
Args:
embedding (List[float]): Embedding to look up similar documents.
embedder_name: Name of the embedder to be used. Defaults to "default".
k (int): Number of documents to return. Defaults to 4.
filter (Optional[Dict[str, str]]): Filter by metadata.
Defaults to None.
Expand All @@ -249,6 +280,7 @@ def similarity_search_by_vector(
"""
docs = self.similarity_search_by_vector_with_scores(
embedding=embedding,
embedder_name=embedder_name,
k=k,
filter=filter,
kwargs=kwargs,
Expand All @@ -268,6 +300,8 @@ def from_texts(
ids: Optional[List[str]] = None,
text_key: Optional[str] = "text",
metadata_key: Optional[str] = "metadata",
embedders: Dict[str, Any] = {},
embedder_name: Optional[str] = "default",
**kwargs: Any,
) -> Meilisearch:
"""Construct Meilisearch wrapper from raw documents.
Expand All @@ -288,21 +322,25 @@ def from_texts(
# The environment should be the one specified next to the API key
# in your Meilisearch console
client = meilisearch.Client(url='http://127.0.0.1:7700', api_key='***')
embeddings = OpenAIEmbeddings()
embedding = OpenAIEmbeddings()
embedders: Embedders index setting.
embedder_name: Name of the embedder. Defaults to "default".
docsearch = Meilisearch.from_texts(
client=client,
embeddings=embeddings,
embedding=embedding,
)
"""
client = _create_client(client=client, url=url, api_key=api_key)

vectorstore = cls(
embedding=embedding,
embedders=embedders,
client=client,
index_name=index_name,
)
vectorstore.add_texts(
texts=texts,
embedder_name=embedder_name,
metadatas=metadatas,
ids=ids,
text_key=text_key,
Expand Down
Loading

0 comments on commit def0a27

Please sign in to comment.