Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use squeuclidean for metric name in ivf_pq python bindings #1160

Merged
merged 1 commit into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
# cython: embedsignature = True
# cython: language_level = 3

import warnings

import numpy as np

from cython.operator cimport dereference as deref
Expand Down Expand Up @@ -63,17 +65,22 @@ from pylibraft.neighbors.ivf_pq.cpp.c_ivf_pq cimport (

def _get_metric(metric):
SUPPORTED_DISTANCES = {
"l2_expanded": DistanceType.L2Expanded,
"sqeuclidean": DistanceType.L2Expanded,
"euclidean": DistanceType.L2SqrtExpanded,
"inner_product": DistanceType.InnerProduct
}
if metric not in SUPPORTED_DISTANCES:
if metric == "l2_expanded":
warnings.warn("Using l2_expanded as a metric name is deprecated,"
" use sqeuclidean instead", FutureWarning)
return DistanceType.L2Expanded

raise ValueError("metric %s is not supported" % metric)
return SUPPORTED_DISTANCES[metric]


cdef _get_metric_string(DistanceType metric):
return {DistanceType.L2Expanded : "l2_expanded",
return {DistanceType.L2Expanded : "sqeuclidean",
DistanceType.InnerProduct: "inner_product",
DistanceType.L2SqrtExpanded: "euclidean"}[metric]

Expand Down Expand Up @@ -118,7 +125,7 @@ cdef class IndexParams:

def __init__(self, *,
n_lists=1024,
metric="l2_expanded",
metric="sqeuclidean",
kmeans_n_iters=20,
kmeans_trainset_fraction=0.5,
pq_bits=8,
Expand All @@ -133,10 +140,10 @@ cdef class IndexParams:
----------
n_list : int, default = 1024
The number of clusters used in the coarse quantizer.
metric : string denoting the metric type, default="l2_expanded"
Valid values for metric: ["l2_expanded", "inner_product",
metric : string denoting the metric type, default="sqeuclidean"
Valid values for metric: ["sqeuclidean", "inner_product",
"euclidean"], where
- l2_expanded is the euclidean distance without the square root
- sqeuclidean is the euclidean distance without the square root
operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,
- euclidean is the euclidean distance
- inner product distance is defined as
Expand Down Expand Up @@ -251,7 +258,7 @@ cdef class Index:
# We create a placeholder object. The actual parameter values do
# not matter, it will be replaced with a built index object later.
self.index = new c_ivf_pq.index[uint64_t](
deref(handle_), _get_metric("l2_expanded"),
deref(handle_), _get_metric("sqeuclidean"),
c_ivf_pq.codebook_gen.PER_SUBSPACE,
<uint32_t>1,
<uint32_t>4,
Expand Down Expand Up @@ -347,7 +354,7 @@ def build(IndexParams index_params, dataset, handle=None):
>>> handle = Handle()
>>> index_params = ivf_pq.IndexParams(
... n_lists=1024,
... metric="l2_expanded",
... metric="sqeuclidean",
... pq_dim=10)
>>> index = ivf_pq.build(index_params, dataset, handle=handle)

Expand Down
6 changes: 3 additions & 3 deletions python/pylibraft/pylibraft/neighbors/refine.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -215,7 +215,7 @@ cdef host_matrix_view[int8_t, uint64_t, row_major] \
@auto_sync_handle
@auto_convert_output
def refine(dataset, queries, candidates, k=None, indices=None, distances=None,
metric="l2_expanded", handle=None):
metric="sqeuclidean", handle=None):
"""
Refine nearest neighbor search.

Expand Down Expand Up @@ -271,7 +271,7 @@ def refine(dataset, queries, candidates, k=None, indices=None, distances=None,
>>> dataset = cp.random.random_sample((n_samples, n_features),
... dtype=cp.float32)
>>> handle = Handle()
>>> index_params = ivf_pq.IndexParams(n_lists=1024, metric="l2_expanded",
>>> index_params = ivf_pq.IndexParams(n_lists=1024, metric="sqeuclidean",
... pq_dim=10)
>>> index = ivf_pq.build(index_params, dataset, handle=handle)

Expand Down
26 changes: 13 additions & 13 deletions python/pylibraft/pylibraft/test/test_ivf_pq.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def check_distances(dataset, queries, metric, out_idx, out_dist, eps=None):
for i in range(queries.shape[0]):
X = queries[np.newaxis, i, :]
Y = dataset[out_idx[i, :], :]
if metric == "l2_expanded":
if metric == "sqeuclidean":
dist[i, :] = pairwise_distances(X, Y, "sqeuclidean")
elif metric == "euclidean":
dist[i, :] = pairwise_distances(X, Y, "euclidean")
Expand Down Expand Up @@ -177,7 +177,7 @@ def run_ivf_pq_build_search_test(

# Calculate reference values with sklearn
skl_metric = {
"l2_expanded": "sqeuclidean",
"sqeuclidean": "sqeuclidean",
"inner_product": "cosine",
"euclidean": "euclidean",
}[metric]
Expand All @@ -204,14 +204,14 @@ def test_ivf_pq_dtypes(
n_rows, n_cols, n_queries, n_lists, dtype, inplace, array_type
):
# Note that inner_product tests use normalized input which we cannot
# represent in int8, therefore we test only l2_expanded metric here.
# represent in int8, therefore we test only sqeuclidean metric here.
run_ivf_pq_build_search_test(
n_rows=n_rows,
n_cols=n_cols,
n_queries=n_queries,
k=10,
n_lists=n_lists,
metric="l2_expanded",
metric="sqeuclidean",
dtype=dtype,
inplace=inplace,
array_type=array_type,
Expand Down Expand Up @@ -246,14 +246,14 @@ def test_ivf_pq_n(params):
n_queries=params["n_queries"],
k=params["k"],
n_lists=params["n_lists"],
metric="l2_expanded",
metric="sqeuclidean",
dtype=np.float32,
compare=False,
)


@pytest.mark.parametrize(
"metric", ["l2_expanded", "inner_product", "euclidean"]
"metric", ["sqeuclidean", "inner_product", "euclidean"]
)
@pytest.mark.parametrize("dtype", [np.float32])
@pytest.mark.parametrize("codebook_kind", ["subspace", "cluster"])
Expand Down Expand Up @@ -298,7 +298,7 @@ def test_ivf_pq_params(params):
n_queries=1000,
k=10,
n_lists=params["n_lists"],
metric="l2_expanded",
metric="sqeuclidean",
dtype=np.float32,
pq_bits=params["pq_bits"],
pq_dim=params["pq_dims"],
Expand Down Expand Up @@ -344,7 +344,7 @@ def test_ivf_pq_search_params(params):
k=params["k"],
n_lists=100,
n_probes=params["n_probes"],
metric="l2_expanded",
metric="sqeuclidean",
dtype=np.float32,
lut_dtype=params["lut"],
internal_distance_dtype=params["idd"],
Expand All @@ -360,7 +360,7 @@ def test_extend(dtype, array_type):
n_queries=100,
k=10,
n_lists=100,
metric="l2_expanded",
metric="sqeuclidean",
dtype=dtype,
add_data_on_build=False,
array_type=array_type,
Expand All @@ -375,7 +375,7 @@ def test_build_assertions():
n_queries=100,
k=10,
n_lists=100,
metric="l2_expanded",
metric="sqeuclidean",
dtype=np.float64,
)

Expand All @@ -388,7 +388,7 @@ def test_build_assertions():

index_params = ivf_pq.IndexParams(
n_lists=50,
metric="l2_expanded",
metric="sqeuclidean",
kmeans_n_iters=20,
kmeans_trainset_fraction=1,
add_data_on_build=False,
Expand Down Expand Up @@ -482,7 +482,7 @@ def test_search_inputs(params):
out_dist_device = device_ndarray(out_dist)

index_params = ivf_pq.IndexParams(
n_lists=50, metric="l2_expanded", add_data_on_build=True
n_lists=50, metric="sqeuclidean", add_data_on_build=True
)

dataset = generate_data((n_rows, n_cols), dtype)
Expand Down Expand Up @@ -511,7 +511,7 @@ def test_save_load():
dataset = generate_data((n_rows, n_cols), dtype)
dataset_device = device_ndarray(dataset)

build_params = ivf_pq.IndexParams(n_lists=100, metric="l2_expanded")
build_params = ivf_pq.IndexParams(n_lists=100, metric="sqeuclidean")
index = ivf_pq.build(build_params, dataset_device)

assert index.trained
Expand Down
8 changes: 4 additions & 4 deletions python/pylibraft/pylibraft/test/test_refine.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def run_refine(
n_rows=500,
n_cols=50,
n_queries=100,
metric="l2_expanded",
metric="sqeuclidean",
k0=40,
k=10,
inplace=False,
Expand All @@ -49,7 +49,7 @@ def run_refine(
queries_device = device_ndarray(queries)

# Calculate reference values with sklearn
skl_metric = {"l2_expanded": "euclidean", "inner_product": "cosine"}[
skl_metric = {"sqeuclidean": "euclidean", "inner_product": "cosine"}[
metric
]
nn_skl = NearestNeighbors(
Expand Down Expand Up @@ -106,7 +106,7 @@ def run_refine(
if recall <= 0.999:
# We did not find the same neighbor indices.
# We could have found other neighbor with same distance.
if metric == "l2_expanded":
if metric == "sqeuclidean":
skl_dist = np.power(skl_dist[:, :k], 2)
elif metric == "inner_product":
skl_dist = 1 - skl_dist[:, :k]
Expand All @@ -120,7 +120,7 @@ def run_refine(

@pytest.mark.parametrize("n_queries", [100, 1024, 37])
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("metric", ["l2_expanded", "inner_product"])
@pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"])
@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8])
@pytest.mark.parametrize("memory_type", ["device", "host"])
def test_refine_dtypes(n_queries, dtype, inplace, metric, memory_type):
Expand Down