diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md index 85f7895d63..61b69bf7d8 100644 --- a/docs/_src/api/api/document_store.md +++ b/docs/_src/api/api/document_store.md @@ -212,8 +212,6 @@ TODO drop params #### normalize\_embedding ```python -@staticmethod -@njit def normalize_embedding(emb: np.ndarray) -> None ``` diff --git a/haystack/document_stores/base.py b/haystack/document_stores/base.py index 6b6f810e5c..f66e9c3791 100644 --- a/haystack/document_stores/base.py +++ b/haystack/document_stores/base.py @@ -341,9 +341,7 @@ def get_document_count( ) -> int: pass - @staticmethod - @njit # (fastmath=True) - def normalize_embedding(emb: np.ndarray) -> None: + def normalize_embedding(self, emb: np.ndarray) -> None: """ Performs L2 normalization of embeddings vector inplace. Input can be a single vector (1D array) or a matrix (2D array). @@ -352,16 +350,26 @@ def normalize_embedding(emb: np.ndarray) -> None: # Single vec if len(emb.shape) == 1: - norm = np.sqrt(emb.dot(emb)) # faster than np.linalg.norm() - if norm != 0.0: - emb /= norm + self._normalize_embedding_1D(emb) # 2D matrix else: - for vec in emb: - vec = np.ascontiguousarray(vec) - norm = np.sqrt(vec.dot(vec)) - if norm != 0.0: - vec /= norm + self._normalize_embedding_2D(emb) + + @staticmethod + @njit # (fastmath=True) + def _normalize_embedding_1D(emb: np.ndarray) -> None: + norm = np.sqrt(emb.dot(emb)) # faster than np.linalg.norm() + if norm != 0.0: + emb /= norm + + @staticmethod + @njit # (fastmath=True) + def _normalize_embedding_2D(emb: np.ndarray) -> None: + for vec in emb: + vec = np.ascontiguousarray(vec) + norm = np.sqrt(vec.dot(vec)) + if norm != 0.0: + vec /= norm def finalize_raw_score(self, raw_score: float, similarity: Optional[str]) -> float: if similarity == "cosine":