Skip to content

Commit

Permalink
Fixes: community: fix LanceDB return no metadata (#27024)
Browse files Browse the repository at this point in the history
- [ x ] Fix when lancedb return table without metadata column
- **Description:** Check the table schema, if not has metadata column,
init the Document with metadata argument equal to empty dict
    - **Issue:** #27005

- [ x ] **Add tests and docs**

---------

Co-authored-by: ccurme <[email protected]>
  • Loading branch information
binhnd102 and ccurme authored Dec 18, 2024
1 parent 91d28ef commit f723a84
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 2 deletions.
8 changes: 6 additions & 2 deletions libs/community/langchain_community/vectorstores/lancedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,14 @@ def results_to_docs(self, results: Any, score: bool = False) -> Any:
score_col = "_relevance_score"
else:
score_col = None
# Check if 'metadata' is in the columns
has_metadata = "metadata" in columns

if score_col is None or not score:
return [
Document(
page_content=results[self._text_key][idx].as_py(),
metadata=results["metadata"][idx].as_py(),
metadata=results["metadata"][idx].as_py() if has_metadata else {},
)
for idx in range(len(results))
]
Expand All @@ -165,7 +167,9 @@ def results_to_docs(self, results: Any, score: bool = False) -> Any:
(
Document(
page_content=results[self._text_key][idx].as_py(),
metadata=results["metadata"][idx].as_py(),
metadata=results["metadata"][idx].as_py()
if has_metadata
else {},
),
results[score_col][idx].as_py(),
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,41 @@ def test_lancedb_all_searches() -> None:
)
assert len(result_3[0]) == 2 # type: ignore
assert "text 1" in result_3[0][0].page_content # type: ignore


@pytest.mark.requires("lancedb")
def test_lancedb_no_metadata() -> None:
lancedb = import_lancedb()
embeddings = FakeEmbeddings()
# Connect to a temporary LanceDB instance
db = lancedb.connect("/tmp/lancedb_no_metadata_test")
# Create data without the 'metadata' field
texts = ["text 1", "text 2", "item 3"]
data = []
for idx, text in enumerate(texts):
embedding = embeddings.embed_documents([text])[0]
data.append(
{
"vector": embedding,
"id": str(idx),
"text": text,
# Note: We're deliberately not including 'metadata' here
}
)
# Create the table without 'metadata' column
db.create_table("vectorstore_no_metadata", data=data)
# Initialize LanceDB with the existing connection and table name
store = LanceDB(
connection=db,
embedding=embeddings,
table_name="vectorstore_no_metadata",
)
# Perform a similarity search
result = store.similarity_search("text 1")
# Verify that the metadata in the Document objects is an empty dictionary
for doc in result:
assert (
doc.metadata == {}
), "Expected empty metadata when 'metadata' column is missing"
# Clean up by deleting the table (optional)
db.drop_table("vectorstore_no_metadata")

0 comments on commit f723a84

Please sign in to comment.