Remove unused code, fix doc

rapidsai · Nov 7, 2023 · 9439818 · 9439818
1 parent e9924e4
commit 9439818
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 54 deletions.
diff --git a/docs/source/ann_benchmarks_dataset.md b/docs/source/ann_benchmarks_dataset.md
@@ -52,12 +52,12 @@ If you have a dataset, but no corresponding ground truth file, then you can gene
 
 ```bash
 # With existing query file
-python generate_groundtruth.py --dataset /dataset/base.1B.fbin --output=groundtruth_dir --queries=/dataset/query.public.10K.fbin
+python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.fbin --output=groundtruth_dir --queries=/dataset/query.public.10K.fbin
 
 # With randomly generated queries
-python generate_groundtruth.py --dataset /dataset/base.1B.fbin --output=groundtruth_dir --queries=random --n_queries=10000
+python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.fbin --output=groundtruth_dir --queries=random --n_queries=10000
 
 # Using only a subset of the dataset. Define queries by randomly
 # selecting vectors from the (subset of the) dataset.
-python generate_groundtruth.py --dataset /dataset/base.1B.fbin --nrows=2000000 --output=groundtruth_dir --queries=random-choice --n_queries=10000
+python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.fbin --nrows=2000000 --output=groundtruth_dir --queries=random-choice --n_queries=10000
 ```
diff --git a/python/raft-ann-bench/src/raft-ann-bench/generate_groundtruth/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/generate_groundtruth/__main__.py
@@ -100,17 +100,17 @@ def main():
         "The input and output files are in big-ann-benchmark's binary format.",
         epilog="""Example usage
     # With existing query file
-    python -m generate_groundtruth --dataset /dataset/base.1B.fbin \
---output=groundtruth_dir --queries=/dataset/query.public.10K.fbin
+    python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.\
+fbin --output=groundtruth_dir --queries=/dataset/query.public.10K.fbin
 
     # With randomly generated queries
-    python -m generate_groundtruth --dataset /dataset/base.1B.fbin \
---output=groundtruth_dir --queries=random --n_queries=10000
+    python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.\
+fbin --output=groundtruth_dir --queries=random --n_queries=10000
 
     # Using only a subset of the dataset. Define queries by randomly
     # selecting vectors from the (subset of the) dataset.
-    python -m generate_groundtruth --dataset /dataset/base.1B.fbin \
---nrows=2000000 --cols=128 --output=groundtruth_dir \
+    python -m raft-ann-bench.generate_groundtruth --dataset /dataset/base.\
+fbin --nrows=2000000 --cols=128 --output=groundtruth_dir \
 --queries=random-choice --n_queries=10000
     """,
         formatter_class=argparse.RawDescriptionHelpFormatter,

diff --git a/python/raft-ann-bench/src/raft-ann-bench/generate_groundtruth/utils.py b/python/raft-ann-bench/src/raft-ann-bench/generate_groundtruth/utils.py
@@ -15,9 +15,7 @@
 #
 
 import os
-import time
 
-import cupy as cp
 import numpy as np
 
 
@@ -103,46 +101,3 @@ def write_bin(fname, data):
     with open(fname, "wb") as f:
         np.asarray(data.shape, dtype=np.uint32).tofile(f)
         data.tofile(f)
-
-
-def calc_recall(ann_idx, true_nn_idx):
-    # ann_idx = np.asarray(ann_idx)
-    ann_idx = cp.asnumpy(ann_idx)
-    if ann_idx.shape != true_nn_idx.shape:
-        raise RuntimeError(
-            "Incompatible shapes {} vs {}".format(
-                ann_idx.shape, true_nn_idx.shape
-            )
-        )
-    n = 0
-    for i in range(ann_idx.shape[0]):
-        n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size
-    recall = n / ann_idx.size
-    return recall
-
-
-class BenchmarkTimer:
-    """Provides a context manager that runs a code block `reps` times
-    and records results to the instance variable `timings`. Use like:
-    .. code-block:: python
-        timer = BenchmarkTimer(rep=5)
-        for _ in timer.benchmark_runs():
-            ... do something ...
-        print(np.min(timer.timings))
-
-        This class is part of the rapids/cuml benchmark suite
-    """
-
-    def __init__(self, reps=1, warmup=0):
-        self.warmup = warmup
-        self.reps = reps
-        self.timings = []
-
-    def benchmark_runs(self):
-        for r in range(self.reps + self.warmup):
-            t0 = time.time()
-            yield r
-            t1 = time.time()
-            self.timings.append(t1 - t0)
-            if r >= self.warmup:
-                self.timings.append(t1 - t0)