Skip to content

Commit

Permalink
Remove unused code, fix doc
Browse files Browse the repository at this point in the history
  • Loading branch information
tfeher committed Nov 7, 2023
1 parent e9924e4 commit 9439818
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 54 deletions.
6 changes: 3 additions & 3 deletions docs/source/ann_benchmarks_dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ If you have a dataset, but no corresponding ground truth file, then you can gene
```bash
# With existing query file
python generate_groundtruth.py --dataset /dataset/base.1B.fbin --output=groundtruth_dir --queries=/dataset/query.public.10K.fbin
python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.fbin --output=groundtruth_dir --queries=/dataset/query.public.10K.fbin
# With randomly generated queries
python generate_groundtruth.py --dataset /dataset/base.1B.fbin --output=groundtruth_dir --queries=random --n_queries=10000
python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.fbin --output=groundtruth_dir --queries=random --n_queries=10000
# Using only a subset of the dataset. Define queries by randomly
# selecting vectors from the (subset of the) dataset.
python generate_groundtruth.py --dataset /dataset/base.1B.fbin --nrows=2000000 --output=groundtruth_dir --queries=random-choice --n_queries=10000
python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.fbin --nrows=2000000 --output=groundtruth_dir --queries=random-choice --n_queries=10000
```
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,17 @@ def main():
"The input and output files are in big-ann-benchmark's binary format.",
epilog="""Example usage
# With existing query file
python -m generate_groundtruth --dataset /dataset/base.1B.fbin \
--output=groundtruth_dir --queries=/dataset/query.public.10K.fbin
python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.\
fbin --output=groundtruth_dir --queries=/dataset/query.public.10K.fbin
# With randomly generated queries
python -m generate_groundtruth --dataset /dataset/base.1B.fbin \
--output=groundtruth_dir --queries=random --n_queries=10000
python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.\
fbin --output=groundtruth_dir --queries=random --n_queries=10000
# Using only a subset of the dataset. Define queries by randomly
# selecting vectors from the (subset of the) dataset.
python -m generate_groundtruth --dataset /dataset/base.1B.fbin \
--nrows=2000000 --cols=128 --output=groundtruth_dir \
python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.\
fbin --nrows=2000000 --cols=128 --output=groundtruth_dir \
--queries=random-choice --n_queries=10000
""",
formatter_class=argparse.RawDescriptionHelpFormatter,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
#

import os
import time

import cupy as cp
import numpy as np


Expand Down Expand Up @@ -103,46 +101,3 @@ def write_bin(fname, data):
with open(fname, "wb") as f:
np.asarray(data.shape, dtype=np.uint32).tofile(f)
data.tofile(f)


def calc_recall(ann_idx, true_nn_idx):
# ann_idx = np.asarray(ann_idx)
ann_idx = cp.asnumpy(ann_idx)
if ann_idx.shape != true_nn_idx.shape:
raise RuntimeError(
"Incompatible shapes {} vs {}".format(
ann_idx.shape, true_nn_idx.shape
)
)
n = 0
for i in range(ann_idx.shape[0]):
n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size
recall = n / ann_idx.size
return recall


class BenchmarkTimer:
"""Provides a context manager that runs a code block `reps` times
and records results to the instance variable `timings`. Use like:
.. code-block:: python
timer = BenchmarkTimer(rep=5)
for _ in timer.benchmark_runs():
... do something ...
print(np.min(timer.timings))
This class is part of the rapids/cuml benchmark suite
"""

def __init__(self, reps=1, warmup=0):
self.warmup = warmup
self.reps = reps
self.timings = []

def benchmark_runs(self):
for r in range(self.reps + self.warmup):
t0 = time.time()
yield r
t1 = time.time()
self.timings.append(t1 - t0)
if r >= self.warmup:
self.timings.append(t1 - t0)

0 comments on commit 9439818

Please sign in to comment.