From 6ce2d296f4cd34059905817f0a2717801ba27c61 Mon Sep 17 00:00:00 2001 From: tstadel <60758086+tstadel@users.noreply.github.com> Date: Tue, 15 Nov 2022 12:13:21 +0100 Subject: [PATCH] fix: Elasticsearch / OpenSearch brownfield function does not incorporate meta (#3572) * fix meta bug * adjust brownfield test --- haystack/document_stores/es_converter.py | 13 +++++++------ test/document_stores/test_document_store.py | 2 ++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/haystack/document_stores/es_converter.py b/haystack/document_stores/es_converter.py index 8b3360345b..0f5edb05b2 100644 --- a/haystack/document_stores/es_converter.py +++ b/haystack/document_stores/es_converter.py @@ -222,25 +222,26 @@ def elasticsearch_index_to_document_store( # Get content and metadata of current record content = record["_source"].pop(original_content_field, "") if content: - record_doc = Document(content=content, meta={}, id_hash_keys=id_hash_keys) - + meta = {} if original_name_field is not None: if original_name_field in record["_source"]: - record_doc.meta["name"] = record["_source"].pop(original_name_field) + meta["name"] = record["_source"].pop(original_name_field) # Only add selected metadata fields if included_metadata_fields is not None: for metadata_field in included_metadata_fields: if metadata_field in record["_source"]: - record_doc.meta[metadata_field] = record["_source"][metadata_field] + meta[metadata_field] = record["_source"][metadata_field] # Add all metadata fields except for those in excluded_metadata_fields else: if excluded_metadata_fields is not None: for metadata_field in excluded_metadata_fields: record["_source"].pop(metadata_field, None) - record_doc.meta.update(record["_source"]) + meta.update(record["_source"]) if store_original_ids: - record_doc.meta["_original_es_id"] = record["_id"] + meta["_original_es_id"] = record["_id"] + + record_doc = Document(content=content, meta=meta, id_hash_keys=id_hash_keys) # Apply preprocessor if provided preprocessed_docs = [record_doc] diff --git a/test/document_stores/test_document_store.py b/test/document_stores/test_document_store.py index 2b51665783..bd8d1d4815 100644 --- a/test/document_stores/test_document_store.py +++ b/test/document_stores/test_document_store.py @@ -1275,6 +1275,7 @@ def test_elasticsearch_brownfield_support(document_store_with_docs): original_name_field="name", included_metadata_fields=["date_field"], index="test_brownfield_support", + id_hash_keys=["content", "meta"], ) original_documents = document_store_with_docs.get_all_documents(index="haystack_test") @@ -1284,6 +1285,7 @@ def test_elasticsearch_brownfield_support(document_store_with_docs): assert all("date_field" in doc.meta for doc in transferred_documents) assert all("meta_field" not in doc.meta for doc in transferred_documents) assert all("numeric_field" not in doc.meta for doc in transferred_documents) + assert all(doc.id == doc._get_id(["content", "meta"]) for doc in transferred_documents) original_content = set([doc.content for doc in original_documents]) transferred_content = set([doc.content for doc in transferred_documents])