diff --git a/docs/_src/api/api/retriever.md b/docs/_src/api/api/retriever.md index bb75023bb9..dc46aec859 100644 --- a/docs/_src/api/api/retriever.md +++ b/docs/_src/api/api/retriever.md @@ -229,9 +229,9 @@ that are most relevant to the query. **Arguments**: -- `query`: The query +- `query`: Has no effect, can pass in empty string - `filters`: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field -- `top_k`: How many documents to return per query. +- `top_k`: Has no effect, pass in any int or None - `index`: The name of the index in the DocumentStore from which to retrieve documents - `headers`: Custom HTTP headers to pass to elasticsearch client (e.g. {'Authorization': 'Basic YWRtaW46cm9vdA=='}) Check out https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html for more information. diff --git a/haystack/nodes/retriever/sparse.py b/haystack/nodes/retriever/sparse.py index 4271f09131..29bc35a57c 100644 --- a/haystack/nodes/retriever/sparse.py +++ b/haystack/nodes/retriever/sparse.py @@ -157,20 +157,16 @@ def retrieve( Scan through documents in DocumentStore and return a small number documents that are most relevant to the query. - :param query: The query + :param query: Has no effect, can pass in empty string :param filters: A dictionary where the keys specify a metadata field and the value is a list of accepted values for that field - :param top_k: How many documents to return per query. + :param top_k: Has no effect, pass in any int or None :param index: The name of the index in the DocumentStore from which to retrieve documents :param headers: Custom HTTP headers to pass to elasticsearch client (e.g. {'Authorization': 'Basic YWRtaW46cm9vdA=='}) Check out https://www.elastic.co/guide/en/elasticsearch/reference/current/http-clients.html for more information. """ - if top_k is None: - top_k = self.top_k if index is None: index = self.document_store.index - documents = self.document_store.query( - query=None, filters=filters, top_k=top_k, custom_query=self.custom_query, index=index, headers=headers - ) + documents = self.document_store.get_all_documents(filters=filters, index=index, headers=headers) return documents diff --git a/test/test_retriever.py b/test/test_retriever.py index c31faa5353..d0e8509117 100644 --- a/test/test_retriever.py +++ b/test/test_retriever.py @@ -563,3 +563,25 @@ def test_embeddings_encoder_of_embedding_retriever_should_warn_about_model_forma "You may need to set 'model_format='sentence_transformers' to ensure correct loading of model." in caplog.text ) + + +@pytest.mark.parametrize("retriever", ["es_filter_only"], indirect=True) +@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True) +def test_es_filter_only(document_store, retriever): + docs = [ + Document(content="Doc1", meta={"f1": "0"}), + Document(content="Doc2", meta={"f1": "0"}), + Document(content="Doc3", meta={"f1": "0"}), + Document(content="Doc4", meta={"f1": "0"}), + Document(content="Doc5", meta={"f1": "0"}), + Document(content="Doc6", meta={"f1": "0"}), + Document(content="Doc7", meta={"f1": "1"}), + Document(content="Doc8", meta={"f1": "0"}), + Document(content="Doc9", meta={"f1": "0"}), + Document(content="Doc10", meta={"f1": "0"}), + Document(content="Doc11", meta={"f1": "0"}), + Document(content="Doc12", meta={"f1": "0"}), + ] + document_store.write_documents(docs) + retrieved_docs = retriever.retrieve(query="", filters={"f1": ["0"]}) + assert len(retrieved_docs) == 11