Skip to content

Commit

Permalink
Showing 4 changed files with 50 additions and 13 deletions.
1 change: 1 addition & 0 deletions python/lib.cpp
Original file line number Diff line number Diff line change
@@ -1074,6 +1074,7 @@ PYBIND11_MODULE(compiled, m) {

i.def("__len__", &dense_index_py_t::size);
i.def_property_readonly("size", &dense_index_py_t::size);
i.def_property_readonly("multi", &dense_index_py_t::multi);
i.def_property_readonly("connectivity", &dense_index_py_t::connectivity);
i.def_property_readonly("capacity", &dense_index_py_t::capacity);
i.def_property_readonly("ndim",
21 changes: 20 additions & 1 deletion python/scripts/test_index.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@
import numpy as np

from usearch.io import load_matrix, save_matrix
from usearch.eval import random_vectors
from usearch.eval import random_vectors, self_recall, SearchStats
from usearch.index import search

from usearch.index import (
@@ -96,6 +96,25 @@ def test_index_search(ndim, metric, quantization, dtype, batch_size):
assert np.all(np.sort(index.keys) == np.sort(keys))


@pytest.mark.parametrize("ndim", [3, 97, 256])
@pytest.mark.parametrize("batch_size", [1, 7, 1024])
def test_index_self_recall(ndim: int, batch_size: int):
"""
Test self-recall evaluation scripts.
"""
original = np.random.rand(batch_size, ndim)
index = Index(ndim=ndim, multi=False)
keys = np.arange(batch_size)
vectors = random_vectors(count=batch_size, ndim=ndim)
index.add(keys, vectors, threads=threads)

stats_all: SearchStats = self_recall(index, keys=keys)
stats_quarter: SearchStats = self_recall(index, sample=0.25, count=10)

assert stats_all.computed_distances > 0
assert stats_quarter.computed_distances > 0


@pytest.mark.parametrize("batch_size", [1, 7, 1024])
def test_index_duplicates(batch_size):
ndim = 8
26 changes: 16 additions & 10 deletions python/usearch/eval.py
Original file line number Diff line number Diff line change
@@ -89,15 +89,15 @@ def mean_recall(self) -> float:
return self.count_matches / self.count_queries


def self_recall(index: Index, sample: float = 1, **kwargs) -> SearchStats:
def self_recall(index: Index, sample: Union[float, int] = 1.0, **kwargs) -> SearchStats:
"""Simplest benchmark for a quality of search, which queries every
existing member of the index, to make sure approximate search finds
the point itself.
:param index: Non-empty pre-constructed index
:type index: Index
:param sample: Share of vectors to search, defaults to 1
:type sample: float
:param sample: Share (or number) of vectors to search, defaults to 1.0
:type sample: Union[float, int]
:return: Evaluation report with key metrics
:rtype: SearchStats
"""
@@ -107,16 +107,22 @@ def self_recall(index: Index, sample: float = 1, **kwargs) -> SearchStats:
kwargs["count"] = 1

if "keys" in kwargs:
keys = kwargs["keys"]
keys = kwargs.pop("keys")
else:
keys = np.array(index.keys)

if sample != 1:
keys = np.random.choice(keys, int(ceil(len(keys) * sample)))
if sample != 1.0:
if isinstance(sample, float):
sample = int(ceil(len(keys) * sample))
keys = np.random.choice(keys, sample)

queries = index.get(keys, index.dtype)
matches: BatchMatches = index.search(queries, **kwargs)
count_matches: float = matches.count_matches(keys)
matches = index.search(queries, **kwargs)
count_matches: int = (
matches.count_matches(keys)
if isinstance(matches, BatchMatches)
else int(matches.keys[0] == keys[0])
)
return SearchStats(
index_size=len(index),
count_queries=len(keys),
@@ -255,13 +261,13 @@ def build(
if k is not None:
d.neighbors = d.neighbors[:, :k]
else:
assert k is None, "Cant ovveride `k`, will retrieve one neighbor"
assert k is None, "Cant override `k`, will retrieve one neighbor"
d.neighbors = np.reshape(d.keys, (count, 1))

else:
assert ndim is not None
assert count is not None
assert k is None, "Cant ovveride `k`, will retrieve one neighbor"
assert k is None, "Cant override `k`, will retrieve one neighbor"

d.vectors = random_vectors(count=count, ndim=ndim)
d.queries = d.vectors
15 changes: 13 additions & 2 deletions python/usearch/index.py
Original file line number Diff line number Diff line change
@@ -760,7 +760,11 @@ def cast(result):
keys = keys.astype(Key)

results = self._compiled.get_many(keys, dtype)
results = [cast(result) for result in results]
results = (
cast(results)
if isinstance(results, np.ndarray)
else [cast(result) for result in results]
)
return results[0] if is_one else results

def __getitem__(
@@ -1142,6 +1146,10 @@ def max_level(self) -> int:
def nlevels(self) -> int:
return self._compiled.max_level + 1

@property
def multi(self) -> bool:
return self._compiled.multi

@property
def stats(self) -> _CompiledIndexStats:
"""Get the accumulated statistics for the entire multi-level graph.
@@ -1193,6 +1201,7 @@ def specs(self) -> Dict[str, Union[str, int, bool]]:
return {
"type": "usearch.Index",
"ndim": self.ndim,
"multi": self.multi,
"connectivity": self.connectivity,
"expansion_add": self.expansion_add,
"expansion_search": self.expansion_search,
@@ -1210,11 +1219,12 @@ def specs(self) -> Dict[str, Union[str, int, bool]]:
def __repr__(self) -> str:
if not hasattr(self, "_compiled"):
return "usearch.Index(failed)"
f = "usearch.Index({} x {}, {}, connectivity: {}, expansion: {} & {}, {:,} vectors in {} levels, {} hardware acceleration)"
f = "usearch.Index({} x {}, {}, multi: {}, connectivity: {}, expansion: {} & {}, {:,} vectors in {} levels, {} hardware acceleration)"
return f.format(
self.dtype,
self.ndim,
self.metric_kind,
self.multi,
self.connectivity,
self.expansion_add,
self.expansion_search,
@@ -1236,6 +1246,7 @@ def _repr_pretty_(self, printer, cycle) -> str:
f"-- data type: {self.dtype}",
f"-- dimensions: {self.ndim}",
f"-- metric: {self.metric_kind}",
f"-- multi: {self.multi}",
f"-- connectivity: {self.connectivity}",
f"-- expansion on addition:{self.expansion_add} candidates",
f"-- expansion on search: {self.expansion_search} candidates",

0 comments on commit e8bf02c

Please sign in to comment.