deepset-ai · tholor · Jul 17, 2020 · Jul 17, 2020
diff --git a/haystack/database/elasticsearch.py b/haystack/database/elasticsearch.py
@@ -4,6 +4,7 @@
 from typing import List, Optional, Union, Dict, Any
 from elasticsearch import Elasticsearch
 from elasticsearch.helpers import bulk, scan
+import numpy as np
 
 from haystack.database.base import BaseDocumentStore, Document
 
@@ -236,7 +237,7 @@ def query(
         return documents
 
     def query_by_embedding(self,
-                           query_emb: List[float],
+                           query_emb: np.array,
                            filters: Optional[dict] = None,
                            top_k: int = 10,
                            index: Optional[str] = None) -> List[Document]:
@@ -255,7 +256,7 @@ def query_by_embedding(self,
                         "script": {
                             "source": f"cosineSimilarity(params.query_vector,doc['{self.embedding_field}']) + 1.0",
                             "params": {
-                                "query_vector": query_emb
+                                "query_vector": query_emb.tolist()
                             }
                         }
                     }

diff --git a/haystack/retriever/dense.py b/haystack/retriever/dense.py
@@ -246,7 +246,7 @@ def retrieve(self, query: str, filters: dict = None, top_k: int = 10, index: str
                                                            top_k=top_k, index=index)
         return documents
 
-    def embed(self, texts: Union[List[str], str]) -> List[List[float]]:
+    def embed(self, texts: Union[List[str], str]) -> List[np.array]:
         """
         Create embeddings for each text in a list of texts using the retrievers model (`self.embedding_model`)
         :param texts: texts to embed
@@ -259,13 +259,14 @@ def embed(self, texts: Union[List[str], str]) -> List[List[float]]:
         assert type(texts) == list, "Expecting a list of texts, i.e. create_embeddings(texts=['text1',...])"
 
         if self.model_format == "farm" or self.model_format == "transformers":
-            res = self.embedding_model.inference_from_dicts(dicts=[{"text": t} for t in texts])  # type: ignore
-            emb = [list(r["vec"]) for r in res] #cast from numpy
+            emb = self.embedding_model.inference_from_dicts(dicts=[{"text": t} for t in texts])  # type: ignore
+            emb = [(r["vec"]) for r in emb]
         elif self.model_format == "sentence_transformers":
             # text is single string, sentence-transformers needs a list of strings
             # get back list of numpy embedding vectors
-            res = self.embedding_model.encode(texts)  # type: ignore
-            emb = [list(r.astype('float64')) for r in res] #cast from numpy
+            emb = self.embedding_model.encode(texts)  # type: ignore
+            # cast to float64 as float32 can cause trouble when serializing for ES
+            emb = [(r.astype('float64')) for r in emb]
         return emb
 
     def embed_queries(self, texts: List[str]) -> List[np.array]: