deepset-ai · tholor · Jan 29, 2021 · Jan 25, 2021 · Jan 25, 2021 · Jan 25, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -17,6 +17,9 @@ jobs:
     - name: Run Elasticsearch
       run: docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx128m" elasticsearch:7.9.2
 
+    - name: Run Milvus
+      run: docker run -d -p 19530:19530 -p 19121:19121 milvusdb/milvus:0.10.5-cpu-d010621-4eda95
+
     - name: Run Apache Tika
       run: docker run -d -p 9998:9998 -e "TIKA_CHILD_JAVA_OPTS=-JXms128m" -e "TIKA_CHILD_JAVA_OPTS=-JXmx128m" apache/tika:1.24.1
 

diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md
@@ -346,7 +346,7 @@ None
 #### delete\_all\_documents
 
 ```python
- | delete_all_documents(index: str, filters: Optional[Dict[str, List[str]]] = None)
+ | delete_all_documents(index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None)
 ```
 
 Delete documents in an index. All documents are deleted if no filters are passed.
@@ -763,7 +763,7 @@ the vector embeddings are indexed in a FAISS Index.
 #### \_\_init\_\_
 
 ```python
- | __init__(sql_url: str = "sqlite:///", vector_dim: int = 768, faiss_index_factory_str: str = "Flat", faiss_index: Optional[faiss.swigfaiss.Index] = None, return_embedding: bool = False, update_existing_documents: bool = False, index: str = "document", similarity: str = "dot_product", **kwargs, ,)
+ | __init__(sql_url: str = "sqlite:///", vector_dim: int = 768, faiss_index_factory_str: str = "Flat", faiss_index: Optional[faiss.swigfaiss.Index] = None, return_embedding: bool = False, update_existing_documents: bool = False, index: str = "document", similarity: str = "dot_product", embedding_field: str = "embedding", **kwargs, ,)
 ```
 
 **Arguments**:
@@ -796,6 +796,7 @@ added already exists.
 - `index`: Name of index in document store to use.
 - `similarity`: The similarity function used to compare document vectors. 'dot_product' is the default sine it is
 more performant with DPR embeddings. 'cosine' is recommended if you are using a Sentence BERT model.
+- `embedding_field`: Name of field containing an embedding vector.
 
 <a name="faiss.FAISSDocumentStore.write_documents"></a>
 #### write\_documents
@@ -881,7 +882,7 @@ None
 #### delete\_all\_documents
 
 ```python
- | delete_all_documents(index=None)
+ | delete_all_documents(index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None)
 ```
 
 Delete all documents from the document store.

diff --git a/haystack/document_store/base.py b/haystack/document_store/base.py
@@ -198,6 +198,6 @@ def add_eval_data(self, filename: str, doc_index: str = "eval_document", label_i
             logger.error("File needs to be in json or jsonl format.")
 
     @abstractmethod
-    def delete_all_documents(self, index: str, filters: Optional[Dict[str, List[str]]] = None):
+    def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
         pass
 
diff --git a/haystack/document_store/elasticsearch.py b/haystack/document_store/elasticsearch.py
@@ -757,14 +757,15 @@ def update_embeddings(self, retriever: BaseRetriever, index: Optional[str] = Non
 
             bulk(self.client, doc_updates, request_timeout=300, refresh=self.refresh_type)
 
-    def delete_all_documents(self, index: str, filters: Optional[Dict[str, List[str]]] = None):
+    def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
         """
         Delete documents in an index. All documents are deleted if no filters are passed.
 
         :param index: Index name to delete the document from.
         :param filters: Optional filters to narrow down the documents to be deleted.
         :return: None
         """
+        index = index or self.index
         query: Dict[str, Any] = {"query": {}}
         if filters:
             filter_clause = []

diff --git a/haystack/document_store/faiss.py b/haystack/document_store/faiss.py
@@ -41,6 +41,7 @@ def __init__(
         update_existing_documents: bool = False,
         index: str = "document",
         similarity: str = "dot_product",
+        embedding_field: str = "embedding",
         **kwargs,
     ):
         """
@@ -72,6 +73,7 @@ def __init__(
         :param index: Name of index in document store to use.
         :param similarity: The similarity function used to compare document vectors. 'dot_product' is the default sine it is
                    more performant with DPR embeddings. 'cosine' is recommended if you are using a Sentence BERT model.
+        :param embedding_field: Name of field containing an embedding vector.
         """
         self.vector_dim = vector_dim
 
@@ -83,6 +85,7 @@ def __init__(
                 self.faiss_index.set_direct_map_type(faiss.DirectMap.Hashtable)
 
         self.return_embedding = return_embedding
+        self.embedding_field = embedding_field
         if similarity == "dot_product":
             self.similarity = similarity
         else:
@@ -154,7 +157,7 @@ def write_documents(
 
     def _create_document_field_map(self) -> Dict:
         return {
-            self.index: "embedding",
+            self.index: self.embedding_field,
         }
 
     def update_embeddings(self, retriever: BaseRetriever, index: Optional[str] = None, batch_size: int = 10_000):
@@ -275,13 +278,13 @@ def train_index(self, documents: Optional[Union[List[dict], List[Document]]], em
             embeddings = np.array(embeddings, dtype="float32")
         self.faiss_index.train(embeddings)
 
-    def delete_all_documents(self, index=None):
+    def delete_all_documents(self, index: Optional[str] = None, filters: Optional[Dict[str, List[str]]] = None):
         """
         Delete all documents from the document store.
         """
         index = index or self.index
         self.faiss_index.reset()
-        super().delete_all_documents(index=index)
+        super().delete_all_documents(index=index, filters=filters)
 
     def query_by_embedding(self,
                            query_emb: np.array,