forked from opea-project/GenAIComps
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding lancedb to langchain vectorstores (opea-project#291)
* adding lancedb to langchain vectorstores Signed-off-by: sharanshirodkar7 <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: sharanshirodkar7 <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: lvliang-intel <[email protected]> Signed-off-by: Dong, Bo1 <[email protected]>
- Loading branch information
Showing
1 changed file
with
139 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
# LanceDB | ||
|
||
LanceDB is an embedded vector database for AI applications. It is open source and distributed with an Apache-2.0 license. | ||
|
||
LanceDB datasets are persisted to disk and can be shared in Python. | ||
|
||
## Setup | ||
|
||
```bash | ||
npm install -S vectordb | ||
``` | ||
|
||
## Usage | ||
|
||
### Create a new index from texts | ||
|
||
```python | ||
import os | ||
import tempfile | ||
from langchain.vectorstores import LanceDB | ||
from langchain.embeddings.openai import OpenAIEmbeddings | ||
from vectordb import connect | ||
|
||
|
||
async def run(): | ||
dir = tempfile.mkdtemp(prefix="lancedb-") | ||
db = await connect(dir) | ||
table = await db.create_table("vectors", [{"vector": [0] * 1536, "text": "sample", "id": 1}]) | ||
|
||
vector_store = await LanceDB.from_texts( | ||
["Hello world", "Bye bye", "hello nice world"], | ||
[{"id": 2}, {"id": 1}, {"id": 3}], | ||
OpenAIEmbeddings(), | ||
table=table, | ||
) | ||
|
||
result_one = await vector_store.similarity_search("hello world", 1) | ||
print(result_one) | ||
# [ Document(page_content='hello nice world', metadata={'id': 3}) ] | ||
|
||
|
||
# Run the function | ||
import asyncio | ||
|
||
asyncio.run(run()) | ||
``` | ||
|
||
API Reference: | ||
|
||
- `LanceDB` from `@langchain/community/vectorstores/lancedb` | ||
- `OpenAIEmbeddings` from `@langchain/openai` | ||
|
||
### Create a new index from a loader | ||
|
||
```python | ||
import os | ||
import tempfile | ||
from langchain.vectorstores import LanceDB | ||
from langchain.embeddings.openai import OpenAIEmbeddings | ||
from langchain.document_loaders.fs import TextLoader | ||
from vectordb import connect | ||
|
||
# Create docs with a loader | ||
loader = TextLoader("src/document_loaders/example_data/example.txt") | ||
docs = loader.load() | ||
|
||
|
||
async def run(): | ||
dir = tempfile.mkdtemp(prefix="lancedb-") | ||
db = await connect(dir) | ||
table = await db.create_table("vectors", [{"vector": [0] * 1536, "text": "sample", "source": "a"}]) | ||
|
||
vector_store = await LanceDB.from_documents(docs, OpenAIEmbeddings(), table=table) | ||
|
||
result_one = await vector_store.similarity_search("hello world", 1) | ||
print(result_one) | ||
# [ | ||
# Document(page_content='Foo\nBar\nBaz\n\n', metadata={'source': 'src/document_loaders/example_data/example.txt'}) | ||
# ] | ||
|
||
|
||
# Run the function | ||
import asyncio | ||
|
||
asyncio.run(run()) | ||
``` | ||
|
||
API Reference: | ||
|
||
- `LanceDB` from `@langchain/community/vectorstores/lancedb` | ||
- `OpenAIEmbeddings` from `@langchain/openai` | ||
- `TextLoader` from `langchain/document_loaders/fs/text` | ||
|
||
### Open an existing dataset | ||
|
||
```python | ||
import os | ||
import tempfile | ||
from langchain.vectorstores import LanceDB | ||
from langchain.embeddings.openai import OpenAIEmbeddings | ||
from vectordb import connect | ||
|
||
|
||
async def run(): | ||
uri = await create_test_db() | ||
db = await connect(uri) | ||
table = await db.open_table("vectors") | ||
|
||
vector_store = LanceDB(OpenAIEmbeddings(), table=table) | ||
|
||
result_one = await vector_store.similarity_search("hello world", 1) | ||
print(result_one) | ||
# [ Document(page_content='Hello world', metadata={'id': 1}) ] | ||
|
||
|
||
async def create_test_db(): | ||
dir = tempfile.mkdtemp(prefix="lancedb-") | ||
db = await connect(dir) | ||
await db.create_table( | ||
"vectors", | ||
[ | ||
{"vector": [0] * 1536, "text": "Hello world", "id": 1}, | ||
{"vector": [0] * 1536, "text": "Bye bye", "id": 2}, | ||
{"vector": [0] * 1536, "text": "hello nice world", "id": 3}, | ||
], | ||
) | ||
return dir | ||
|
||
|
||
# Run the function | ||
import asyncio | ||
|
||
asyncio.run(run()) | ||
``` | ||
|
||
API Reference: | ||
|
||
- `LanceDB` from `@langchain/community/vectorstores/lancedb` | ||
- `OpenAIEmbeddings` from `@langchain/openai` |