Skip to content

Commit

Permalink
remove random_seed parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
tfeher committed Jan 18, 2024
1 parent 61175e3 commit 615c9a6
Show file tree
Hide file tree
Showing 10 changed files with 16 additions and 58 deletions.
4 changes: 1 addition & 3 deletions cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -55,7 +55,6 @@ void parse_build_param(const nlohmann::json& conf,
param.n_lists = conf.at("nlist");
if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); }
if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); }
if (conf.contains("random_seed")) { param.random_seed = conf.at("random_seed"); }
}

template <typename T, typename IdxT>
Expand Down Expand Up @@ -88,7 +87,6 @@ void parse_build_param(const nlohmann::json& conf,
"', should be either 'cluster' or 'subspace'");
}
}
if (conf.contains("random_seed")) { param.random_seed = conf.at("random_seed"); }
}

template <typename T, typename IdxT>
Expand Down
3 changes: 2 additions & 1 deletion cpp/include/raft/neighbors/detail/ivf_flat_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -361,12 +361,13 @@ inline auto build(raft::resources const& handle,

// Train the kmeans clustering
{
int random_seed = 137;
auto trainset_ratio = std::max<size_t>(
1, n_rows / std::max<size_t>(params.kmeans_trainset_fraction * n_rows, index.n_lists()));
auto n_rows_train = n_rows / trainset_ratio;
auto trainset = make_device_matrix<T, IdxT>(handle, n_rows_train, index.dim());
raft::spatial::knn::detail::utils::subsample(
handle, dataset, n_rows, trainset.view(), params.random_seed);
handle, dataset, n_rows, trainset.view(), random_seed);
auto centers_view = raft::make_device_matrix_view<float, IdxT>(
index.centers().data_handle(), index.n_lists(), index.dim());
raft::cluster::kmeans_balanced_params kmeans_params;
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/raft/neighbors/detail/ivf_pq_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1751,6 +1751,7 @@ auto build(raft::resources const& handle,
utils::memzero(index.inds_ptrs().data_handle(), index.inds_ptrs().size(), stream);

{
int random_seed = 137;
auto trainset_ratio = std::max<size_t>(
1,
size_t(n_rows) / std::max<size_t>(params.kmeans_trainset_fraction * n_rows, index.n_lists()));
Expand All @@ -1766,14 +1767,14 @@ auto build(raft::resources const& handle,

if constexpr (std::is_same_v<T, float>) {
raft::spatial::knn::detail::utils::subsample(
handle, dataset, n_rows, trainset.view(), params.random_seed);
handle, dataset, n_rows, trainset.view(), random_seed);
} else {
// TODO(tfeher): Enable codebook generation with any type T, and then remove
// trainset tmp.
auto trainset_tmp =
make_device_mdarray<T>(handle, device_mr, make_extents<IdxT>(n_rows_train, dim));
raft::spatial::knn::detail::utils::subsample(
handle, dataset, n_rows, trainset_tmp.view(), params.random_seed);
handle, dataset, n_rows, trainset_tmp.view(), random_seed);
cudaDeviceSynchronize();
RAFT_LOG_INFO("Subsampling done, converting to float");
raft::linalg::unaryOp(trainset.data_handle(),
Expand Down
8 changes: 1 addition & 7 deletions cpp/include/raft/neighbors/ivf_flat_types.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -76,12 +76,6 @@ struct index_params : ann::index_params {
* flag to `true` if you prefer to use as little GPU memory for the database as possible.
*/
bool conservative_memory_allocation = false;
/**
* Seed used for random sampling if kmeans_trainset_fraction < 1.
*
* Value -1 disables random sampling, and results in sampling with a fixed stride.
*/
int random_seed = 0;
};

struct search_params : ann::search_params {
Expand Down
9 changes: 1 addition & 8 deletions cpp/include/raft/neighbors/ivf_pq_types.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -105,13 +105,6 @@ struct index_params : ann::index_params {
* flag to `true` if you prefer to use as little GPU memory for the database as possible.
*/
bool conservative_memory_allocation = false;

/**
* Seed used for random sampling if kmeans_trainset_fraction < 1.
*
* Value -1 disables random sampling, and results in sampling with a fixed stride.
*/
int random_seed = 0;
};

struct search_params : ann::search_params {
Expand Down
10 changes: 2 additions & 8 deletions cpp/test/neighbors/ann_ivf_flat.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -73,15 +73,14 @@ struct AnnIvfFlatInputs {
raft::distance::DistanceType metric;
bool adaptive_centers;
bool host_dataset;
int seed;
};

template <typename IdxT>
::std::ostream& operator<<(::std::ostream& os, const AnnIvfFlatInputs<IdxT>& p)
{
os << "{ " << p.num_queries << ", " << p.num_db_vecs << ", " << p.dim << ", " << p.k << ", "
<< p.nprobe << ", " << p.nlist << ", " << static_cast<int>(p.metric) << ", "
<< p.adaptive_centers << ", " << p.host_dataset << "," << p.seed << '}' << std::endl;
<< p.adaptive_centers << ", " << p.host_dataset << '}' << std::endl;
return os;
}

Expand Down Expand Up @@ -179,7 +178,6 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
index_params.add_data_on_build = false;
index_params.kmeans_trainset_fraction = 0.5;
index_params.metric_arg = 0;
index_params.random_seed = ps.seed;

ivf_flat::index<DataT, IdxT> idx(handle_, index_params, ps.dim);
ivf_flat::index<DataT, IdxT> index_2(handle_, index_params, ps.dim);
Expand Down Expand Up @@ -329,7 +327,6 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
index_params.add_data_on_build = false;
index_params.kmeans_trainset_fraction = 1.0;
index_params.metric_arg = 0;
index_params.random_seed = ps.seed;

auto database_view = raft::make_device_matrix_view<const DataT, IdxT>(
(const DataT*)database.data(), ps.num_db_vecs, ps.dim);
Expand Down Expand Up @@ -500,7 +497,6 @@ class AnnIVFFlatTest : public ::testing::TestWithParam<AnnIvfFlatInputs<IdxT>> {
index_params.add_data_on_build = true;
index_params.kmeans_trainset_fraction = 0.5;
index_params.metric_arg = 0;
index_params.random_seed = ps.seed;

// Create IVF Flat index
auto database_view = raft::make_device_matrix_view<const DataT, IdxT>(
Expand Down Expand Up @@ -611,8 +607,6 @@ const std::vector<AnnIvfFlatInputs<int64_t>> inputs = {
{20, 100000, 16, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, true},
{1000, 100000, 16, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, true},
{10000, 131072, 8, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, false},
{10000, 1000000, 96, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, false, true, -1},
{10000, 1000000, 96, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, false, false, -1},

// host input data
{1000, 10000, 16, 10, 40, 1024, raft::distance::DistanceType::L2Expanded, false, true},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -56,7 +56,6 @@ cdef extern from "raft/neighbors/ivf_flat_types.hpp" \
double kmeans_trainset_fraction
bool adaptive_centers
bool conservative_memory_allocation
int random_seed

cdef cppclass index[T, IdxT](ann_index):
index(const device_resources& handle,
Expand Down
15 changes: 2 additions & 13 deletions python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -113,11 +113,6 @@ cdef class IndexParams:
adding new data (through the classification of the added data);
that is, `index.centers()` "drift" together with the changing
distribution of the newly added data.
random_seed : int, default = 0
Seed used for random sampling if kmeans_trainset_fraction < 1.
Value -1 disables random sampling, and results in sampling with a
fixed stride.
"""
cdef c_ivf_flat.index_params params

Expand All @@ -127,16 +122,14 @@ cdef class IndexParams:
kmeans_n_iters=20,
kmeans_trainset_fraction=0.5,
add_data_on_build=True,
bool adaptive_centers=False,
random_seed=0):
bool adaptive_centers=False):
self.params.n_lists = n_lists
self.params.metric = _get_metric(metric)
self.params.metric_arg = 0
self.params.kmeans_n_iters = kmeans_n_iters
self.params.kmeans_trainset_fraction = kmeans_trainset_fraction
self.params.add_data_on_build = add_data_on_build
self.params.adaptive_centers = adaptive_centers
self.params.random_seed = random_seed

@property
def n_lists(self):
Expand All @@ -162,10 +155,6 @@ cdef class IndexParams:
def adaptive_centers(self):
return self.params.adaptive_centers

@property
def random_seed(self):
return self.params.random_seed


cdef class Index:
cdef readonly bool trained
Expand Down
3 changes: 1 addition & 2 deletions python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -78,7 +78,6 @@ cdef extern from "raft/neighbors/ivf_pq_types.hpp" \
codebook_gen codebook_kind
bool force_random_rotation
bool conservative_memory_allocation
int random_seed

cdef cppclass index[IdxT](ann_index):
index(const device_resources& handle,
Expand Down
14 changes: 2 additions & 12 deletions python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -156,10 +156,6 @@ cdef class IndexParams:
repeated calls to `extend` (extending the database).
To disable this behavior and use as little GPU memory for the
database as possible, set this flat to `True`.
random_seed : int, default = 0
Seed used for random sampling if kmeans_trainset_fraction < 1.
Value -1 disables random sampling, and results in sampling with a
fixed stride.
"""
def __init__(self, *,
n_lists=1024,
Expand All @@ -171,8 +167,7 @@ cdef class IndexParams:
codebook_kind="subspace",
force_random_rotation=False,
add_data_on_build=True,
conservative_memory_allocation=False,
random_seed=0):
conservative_memory_allocation=False):
self.params.n_lists = n_lists
self.params.metric = _get_metric(metric)
self.params.metric_arg = 0
Expand All @@ -190,7 +185,6 @@ cdef class IndexParams:
self.params.add_data_on_build = add_data_on_build
self.params.conservative_memory_allocation = \
conservative_memory_allocation
self.params.random_seed = random_seed

@property
def n_lists(self):
Expand Down Expand Up @@ -232,10 +226,6 @@ cdef class IndexParams:
def conservative_memory_allocation(self):
return self.params.conservative_memory_allocation

@property
def random_seed(self):
return self.params.random_seed


cdef class Index:
# We store a pointer to the index because it dose not have a trivial
Expand Down

0 comments on commit 615c9a6

Please sign in to comment.