Skip to content

Commit

Permalink
fix: Elasticsearch / OpenSearch brownfield function does not incorpor…
Browse files Browse the repository at this point in the history
…ate meta (#3572)

* fix meta bug

* adjust brownfield test
  • Loading branch information
tstadel authored Nov 15, 2022
1 parent 3098440 commit 6ce2d29
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
13 changes: 7 additions & 6 deletions haystack/document_stores/es_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,25 +222,26 @@ def elasticsearch_index_to_document_store(
# Get content and metadata of current record
content = record["_source"].pop(original_content_field, "")
if content:
record_doc = Document(content=content, meta={}, id_hash_keys=id_hash_keys)

meta = {}
if original_name_field is not None:
if original_name_field in record["_source"]:
record_doc.meta["name"] = record["_source"].pop(original_name_field)
meta["name"] = record["_source"].pop(original_name_field)
# Only add selected metadata fields
if included_metadata_fields is not None:
for metadata_field in included_metadata_fields:
if metadata_field in record["_source"]:
record_doc.meta[metadata_field] = record["_source"][metadata_field]
meta[metadata_field] = record["_source"][metadata_field]
# Add all metadata fields except for those in excluded_metadata_fields
else:
if excluded_metadata_fields is not None:
for metadata_field in excluded_metadata_fields:
record["_source"].pop(metadata_field, None)
record_doc.meta.update(record["_source"])
meta.update(record["_source"])

if store_original_ids:
record_doc.meta["_original_es_id"] = record["_id"]
meta["_original_es_id"] = record["_id"]

record_doc = Document(content=content, meta=meta, id_hash_keys=id_hash_keys)

# Apply preprocessor if provided
preprocessed_docs = [record_doc]
Expand Down
2 changes: 2 additions & 0 deletions test/document_stores/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1275,6 +1275,7 @@ def test_elasticsearch_brownfield_support(document_store_with_docs):
original_name_field="name",
included_metadata_fields=["date_field"],
index="test_brownfield_support",
id_hash_keys=["content", "meta"],
)

original_documents = document_store_with_docs.get_all_documents(index="haystack_test")
Expand All @@ -1284,6 +1285,7 @@ def test_elasticsearch_brownfield_support(document_store_with_docs):
assert all("date_field" in doc.meta for doc in transferred_documents)
assert all("meta_field" not in doc.meta for doc in transferred_documents)
assert all("numeric_field" not in doc.meta for doc in transferred_documents)
assert all(doc.id == doc._get_id(["content", "meta"]) for doc in transferred_documents)

original_content = set([doc.content for doc in original_documents])
transferred_content = set([doc.content for doc in transferred_documents])
Expand Down

0 comments on commit 6ce2d29

Please sign in to comment.