Skip to content

Commit

Permalink
Add update_document_meta to InMemoryDocumentStore (deepset-ai#2689)
Browse files Browse the repository at this point in the history
* Add update_document_meta to InMemoryDocumentStore

* Fix typo

* Update Documentation & Code Style

* Add update_document_meta to BaseDocumentStore

* Update Documentation & Code Style

* Fix mypy

* Update Documentation & Code Style

* Add update_document_meta to MockDocumentStore

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
2 people authored and andrch-FS committed Jul 26, 2022
1 parent 776b493 commit 288ddcf
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 6 deletions.
35 changes: 34 additions & 1 deletion docs/_src/api/api/document_store.md
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ Check out https://www.elastic.co/guide/en/elasticsearch/reference/current/http-c
#### BaseElasticsearchDocumentStore.update\_document\_meta

```python
def update_document_meta(id: str, meta: Dict[str, str], headers: Optional[Dict[str, str]] = None, index: str = None)
def update_document_meta(id: str, meta: Dict[str, str], index: str = None, headers: Optional[Dict[str, str]] = None)
```

Update the metadata dictionary of a document by specifying its string id
Expand Down Expand Up @@ -1907,6 +1907,22 @@ def get_document_count(filters: Optional[Dict[str, Any]] = None, index: Optional

Return the number of documents in the document store.

<a id="memory.InMemoryDocumentStore.update_document_meta"></a>

#### InMemoryDocumentStore.update\_document\_meta

```python
def update_document_meta(id: str, meta: Dict[str, Any], index: str = None)
```

Update the metadata dictionary of a document by specifying its string id.

**Arguments**:

- `id`: The ID of the Document whose metadata is being updated.
- `meta`: A dictionary with key-value pairs that should be added / changed for the provided Document ID.
- `index`: Name of the index the Document is located at.

<a id="memory.InMemoryDocumentStore.get_embedding_count"></a>

#### InMemoryDocumentStore.get\_embedding\_count
Expand Down Expand Up @@ -4590,6 +4606,23 @@ exists.

None

<a id="deepsetcloud.DeepsetCloudDocumentStore.update_document_meta"></a>

#### DeepsetCloudDocumentStore.update\_document\_meta

```python
@disable_and_log
def update_document_meta(id: str, meta: Dict[str, Any], index: str = None)
```

Update the metadata dictionary of a document by specifying its string id.

**Arguments**:

- `id`: The ID of the Document whose metadata is being updated.
- `meta`: A dictionary with key-value pairs that should be added / changed for the provided Document ID.
- `index`: Name of the index the Document is located at.

<a id="deepsetcloud.DeepsetCloudDocumentStore.get_evaluation_sets"></a>

#### DeepsetCloudDocumentStore.get\_evaluation\_sets
Expand Down
6 changes: 5 additions & 1 deletion haystack/document_stores/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# pylint: disable=too-many-public-methods

from typing import Generator, Optional, Dict, List, Set, Union
from typing import Generator, Optional, Dict, List, Set, Union, Any

import logging
import collections
Expand Down Expand Up @@ -607,6 +607,10 @@ def get_documents_by_id(
) -> List[Document]:
pass

@abstractmethod
def update_document_meta(self, id: str, meta: Dict[str, Any], index: str = None):
pass

def _drop_duplicate_documents(self, documents: List[Document], index: Optional[str] = None) -> List[Document]:
"""
Drop duplicates documents based on same hash ID
Expand Down
13 changes: 12 additions & 1 deletion haystack/document_stores/deepsetcloud.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from functools import wraps
from typing import List, Optional, Union, Dict, Generator
from typing import List, Optional, Union, Dict, Generator, Any

import json
import logging
Expand Down Expand Up @@ -602,6 +602,17 @@ def write_documents(
"""
pass

@disable_and_log
def update_document_meta(self, id: str, meta: Dict[str, Any], index: str = None):
"""
Update the metadata dictionary of a document by specifying its string id.
:param id: The ID of the Document whose metadata is being updated.
:param meta: A dictionary with key-value pairs that should be added / changed for the provided Document ID.
:param index: Name of the index the Document is located at.
"""
pass

def get_evaluation_sets(self) -> List[dict]:
"""
Returns a list of uploaded evaluation sets to deepset cloud.
Expand Down
2 changes: 1 addition & 1 deletion haystack/document_stores/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def write_labels(
bulk(self.client, labels_to_index, request_timeout=300, refresh=self.refresh_type, headers=headers)

def update_document_meta(
self, id: str, meta: Dict[str, str], headers: Optional[Dict[str, str]] = None, index: str = None
self, id: str, meta: Dict[str, str], index: str = None, headers: Optional[Dict[str, str]] = None
):
"""
Update the metadata dictionary of a document by specifying its string id
Expand Down
13 changes: 13 additions & 0 deletions haystack/document_stores/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,19 @@ def get_document_count(
)
return len(documents)

def update_document_meta(self, id: str, meta: Dict[str, Any], index: str = None):
"""
Update the metadata dictionary of a document by specifying its string id.
:param id: The ID of the Document whose metadata is being updated.
:param meta: A dictionary with key-value pairs that should be added / changed for the provided Document ID.
:param index: Name of the index the Document is located at.
"""
if index is None:
index = self.index
for key, value in meta.items():
self.indexes[index][id].meta[key] = value

def get_embedding_count(self, filters: Optional[Dict[str, List[str]]] = None, index: Optional[str] = None) -> int:
"""
Return the count of embeddings in the document store.
Expand Down
3 changes: 3 additions & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ def write_labels(self, *a, **k):
def delete_index(self, *a, **k):
pass

def update_document_meta(self, *a, **kw):
pass


class MockRetriever(BaseRetriever):
outgoing_edges = 1
Expand Down
5 changes: 3 additions & 2 deletions test/document_stores/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,8 +1185,9 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
assert multi_label.filters == l.filters


@pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "milvus1", "weaviate", "pinecone"], indirect=True)
# Currently update_document_meta() is not implemented for Memory doc store
@pytest.mark.parametrize(
"document_store", ["elasticsearch", "faiss", "milvus1", "weaviate", "pinecone", "memory"], indirect=True
)
def test_update_meta(document_store: BaseDocumentStore):
documents = [
Document(content="Doc1", meta={"meta_key_1": "1", "meta_key_2": "1"}),
Expand Down

0 comments on commit 288ddcf

Please sign in to comment.