Improve: Multi property lookup

unum-cloud · Nov 30, 2023 · e8bf02c · e8bf02c
1 parent 55464fb
commit e8bf02c
Showing 4 changed files with 50 additions and 13 deletions.
diff --git a/python/lib.cpp b/python/lib.cpp
@@ -1074,6 +1074,7 @@ PYBIND11_MODULE(compiled, m) {
 
     i.def("__len__", &dense_index_py_t::size);
     i.def_property_readonly("size", &dense_index_py_t::size);
+    i.def_property_readonly("multi", &dense_index_py_t::multi);
     i.def_property_readonly("connectivity", &dense_index_py_t::connectivity);
     i.def_property_readonly("capacity", &dense_index_py_t::capacity);
     i.def_property_readonly("ndim",

diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 from usearch.io import load_matrix, save_matrix
-from usearch.eval import random_vectors
+from usearch.eval import random_vectors, self_recall, SearchStats
 from usearch.index import search
 
 from usearch.index import (
@@ -96,6 +96,25 @@ def test_index_search(ndim, metric, quantization, dtype, batch_size):
         assert np.all(np.sort(index.keys) == np.sort(keys))
 
 
+@pytest.mark.parametrize("ndim", [3, 97, 256])
+@pytest.mark.parametrize("batch_size", [1, 7, 1024])
+def test_index_self_recall(ndim: int, batch_size: int):
+    """
+    Test self-recall evaluation scripts.
+    """
+    original = np.random.rand(batch_size, ndim)
+    index = Index(ndim=ndim, multi=False)
+    keys = np.arange(batch_size)
+    vectors = random_vectors(count=batch_size, ndim=ndim)
+    index.add(keys, vectors, threads=threads)
+
+    stats_all: SearchStats = self_recall(index, keys=keys)
+    stats_quarter: SearchStats = self_recall(index, sample=0.25, count=10)
+
+    assert stats_all.computed_distances > 0
+    assert stats_quarter.computed_distances > 0
+
+
 @pytest.mark.parametrize("batch_size", [1, 7, 1024])
 def test_index_duplicates(batch_size):
     ndim = 8

diff --git a/python/usearch/eval.py b/python/usearch/eval.py
@@ -89,15 +89,15 @@ def mean_recall(self) -> float:
         return self.count_matches / self.count_queries
 
 
-def self_recall(index: Index, sample: float = 1, **kwargs) -> SearchStats:
+def self_recall(index: Index, sample: Union[float, int] = 1.0, **kwargs) -> SearchStats:
     """Simplest benchmark for a quality of search, which queries every
     existing member of the index, to make sure approximate search finds
     the point itself.
 
     :param index: Non-empty pre-constructed index
     :type index: Index
-    :param sample: Share of vectors to search, defaults to 1
-    :type sample: float
+    :param sample: Share (or number) of vectors to search, defaults to 1.0
+    :type sample: Union[float, int]
     :return: Evaluation report with key metrics
     :rtype: SearchStats
     """
@@ -107,16 +107,22 @@ def self_recall(index: Index, sample: float = 1, **kwargs) -> SearchStats:
         kwargs["count"] = 1
 
     if "keys" in kwargs:
-        keys = kwargs["keys"]
+        keys = kwargs.pop("keys")
     else:
         keys = np.array(index.keys)
 
-    if sample != 1:
-        keys = np.random.choice(keys, int(ceil(len(keys) * sample)))
+    if sample != 1.0:
+        if isinstance(sample, float):
+            sample = int(ceil(len(keys) * sample))
+        keys = np.random.choice(keys, sample)
 
     queries = index.get(keys, index.dtype)
-    matches: BatchMatches = index.search(queries, **kwargs)
-    count_matches: float = matches.count_matches(keys)
+    matches = index.search(queries, **kwargs)
+    count_matches: int = (
+        matches.count_matches(keys)
+        if isinstance(matches, BatchMatches)
+        else int(matches.keys[0] == keys[0])
+    )
     return SearchStats(
         index_size=len(index),
         count_queries=len(keys),
@@ -255,13 +261,13 @@ def build(
                 if k is not None:
                     d.neighbors = d.neighbors[:, :k]
             else:
-                assert k is None, "Cant ovveride `k`, will retrieve one neighbor"
+                assert k is None, "Cant override `k`, will retrieve one neighbor"
                 d.neighbors = np.reshape(d.keys, (count, 1))
 
         else:
             assert ndim is not None
             assert count is not None
-            assert k is None, "Cant ovveride `k`, will retrieve one neighbor"
+            assert k is None, "Cant override `k`, will retrieve one neighbor"
 
             d.vectors = random_vectors(count=count, ndim=ndim)
             d.queries = d.vectors

diff --git a/python/usearch/index.py b/python/usearch/index.py
@@ -760,7 +760,11 @@ def cast(result):
             keys = keys.astype(Key)
 
         results = self._compiled.get_many(keys, dtype)
-        results = [cast(result) for result in results]
+        results = (
+            cast(results)
+            if isinstance(results, np.ndarray)
+            else [cast(result) for result in results]
+        )
         return results[0] if is_one else results
 
     def __getitem__(
@@ -1142,6 +1146,10 @@ def max_level(self) -> int:
     def nlevels(self) -> int:
         return self._compiled.max_level + 1
 
+    @property
+    def multi(self) -> bool:
+        return self._compiled.multi
+
     @property
     def stats(self) -> _CompiledIndexStats:
         """Get the accumulated statistics for the entire multi-level graph.
@@ -1193,6 +1201,7 @@ def specs(self) -> Dict[str, Union[str, int, bool]]:
         return {
             "type": "usearch.Index",
             "ndim": self.ndim,
+            "multi": self.multi,
             "connectivity": self.connectivity,
             "expansion_add": self.expansion_add,
             "expansion_search": self.expansion_search,
@@ -1210,11 +1219,12 @@ def specs(self) -> Dict[str, Union[str, int, bool]]:
     def __repr__(self) -> str:
         if not hasattr(self, "_compiled"):
             return "usearch.Index(failed)"
-        f = "usearch.Index({} x {}, {}, connectivity: {}, expansion: {} & {}, {:,} vectors in {} levels, {} hardware acceleration)"
+        f = "usearch.Index({} x {}, {}, multi: {}, connectivity: {}, expansion: {} & {}, {:,} vectors in {} levels, {} hardware acceleration)"
         return f.format(
             self.dtype,
             self.ndim,
             self.metric_kind,
+            self.multi,
             self.connectivity,
             self.expansion_add,
             self.expansion_search,
@@ -1236,6 +1246,7 @@ def _repr_pretty_(self, printer, cycle) -> str:
                 f"-- data type: {self.dtype}",
                 f"-- dimensions: {self.ndim}",
                 f"-- metric: {self.metric_kind}",
+                f"-- multi: {self.multi}",
                 f"-- connectivity: {self.connectivity}",
                 f"-- expansion on addition:{self.expansion_add} candidates",
                 f"-- expansion on search: {self.expansion_search} candidates",