From 615c9a6b0bd4f86d83913644de804959abc03385 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 18 Jan 2024 01:16:00 +0100 Subject: [PATCH] remove random_seed parameter --- .../ann/src/raft/raft_ann_bench_param_parser.h | 4 +--- .../raft/neighbors/detail/ivf_flat_build.cuh | 3 ++- .../raft/neighbors/detail/ivf_pq_build.cuh | 5 +++-- cpp/include/raft/neighbors/ivf_flat_types.hpp | 8 +------- cpp/include/raft/neighbors/ivf_pq_types.hpp | 9 +-------- cpp/test/neighbors/ann_ivf_flat.cuh | 10 ++-------- .../neighbors/ivf_flat/cpp/c_ivf_flat.pxd | 3 +-- .../pylibraft/neighbors/ivf_flat/ivf_flat.pyx | 15 ++------------- .../pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd | 3 +-- .../pylibraft/neighbors/ivf_pq/ivf_pq.pyx | 14 ++------------ 10 files changed, 16 insertions(+), 58 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index 38e473f9ae..2a021a8a12 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,7 +55,6 @@ void parse_build_param(const nlohmann::json& conf, param.n_lists = conf.at("nlist"); if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); } if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); } - if (conf.contains("random_seed")) { param.random_seed = conf.at("random_seed"); } } template @@ -88,7 +87,6 @@ void parse_build_param(const nlohmann::json& conf, "', should be either 'cluster' or 'subspace'"); } } - if (conf.contains("random_seed")) { param.random_seed = conf.at("random_seed"); } } template diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh index 0f710d5b81..ab30b4009d 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh @@ -361,12 +361,13 @@ inline auto build(raft::resources const& handle, // Train the kmeans clustering { + int random_seed = 137; auto trainset_ratio = std::max( 1, n_rows / std::max(params.kmeans_trainset_fraction * n_rows, index.n_lists())); auto n_rows_train = n_rows / trainset_ratio; auto trainset = make_device_matrix(handle, n_rows_train, index.dim()); raft::spatial::knn::detail::utils::subsample( - handle, dataset, n_rows, trainset.view(), params.random_seed); + handle, dataset, n_rows, trainset.view(), random_seed); auto centers_view = raft::make_device_matrix_view( index.centers().data_handle(), index.n_lists(), index.dim()); raft::cluster::kmeans_balanced_params kmeans_params; diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index b2bafd8372..65278d7944 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -1751,6 +1751,7 @@ auto build(raft::resources const& handle, utils::memzero(index.inds_ptrs().data_handle(), index.inds_ptrs().size(), stream); { + int random_seed = 137; auto trainset_ratio = std::max( 1, size_t(n_rows) / std::max(params.kmeans_trainset_fraction * n_rows, index.n_lists())); @@ -1766,14 +1767,14 @@ auto build(raft::resources const& handle, if constexpr (std::is_same_v) { raft::spatial::knn::detail::utils::subsample( - handle, dataset, n_rows, trainset.view(), params.random_seed); + handle, dataset, n_rows, trainset.view(), random_seed); } else { // TODO(tfeher): Enable codebook generation with any type T, and then remove // trainset tmp. auto trainset_tmp = make_device_mdarray(handle, device_mr, make_extents(n_rows_train, dim)); raft::spatial::knn::detail::utils::subsample( - handle, dataset, n_rows, trainset_tmp.view(), params.random_seed); + handle, dataset, n_rows, trainset_tmp.view(), random_seed); cudaDeviceSynchronize(); RAFT_LOG_INFO("Subsampling done, converting to float"); raft::linalg::unaryOp(trainset.data_handle(), diff --git a/cpp/include/raft/neighbors/ivf_flat_types.hpp b/cpp/include/raft/neighbors/ivf_flat_types.hpp index 317e10cf92..180fe2e21b 100644 --- a/cpp/include/raft/neighbors/ivf_flat_types.hpp +++ b/cpp/include/raft/neighbors/ivf_flat_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,12 +76,6 @@ struct index_params : ann::index_params { * flag to `true` if you prefer to use as little GPU memory for the database as possible. */ bool conservative_memory_allocation = false; - /** - * Seed used for random sampling if kmeans_trainset_fraction < 1. - * - * Value -1 disables random sampling, and results in sampling with a fixed stride. - */ - int random_seed = 0; }; struct search_params : ann::search_params { diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp index 51536583f8..45ab18c84f 100644 --- a/cpp/include/raft/neighbors/ivf_pq_types.hpp +++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -105,13 +105,6 @@ struct index_params : ann::index_params { * flag to `true` if you prefer to use as little GPU memory for the database as possible. */ bool conservative_memory_allocation = false; - - /** - * Seed used for random sampling if kmeans_trainset_fraction < 1. - * - * Value -1 disables random sampling, and results in sampling with a fixed stride. - */ - int random_seed = 0; }; struct search_params : ann::search_params { diff --git a/cpp/test/neighbors/ann_ivf_flat.cuh b/cpp/test/neighbors/ann_ivf_flat.cuh index 014048a068..39439d392d 100644 --- a/cpp/test/neighbors/ann_ivf_flat.cuh +++ b/cpp/test/neighbors/ann_ivf_flat.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,7 +73,6 @@ struct AnnIvfFlatInputs { raft::distance::DistanceType metric; bool adaptive_centers; bool host_dataset; - int seed; }; template @@ -81,7 +80,7 @@ template { os << "{ " << p.num_queries << ", " << p.num_db_vecs << ", " << p.dim << ", " << p.k << ", " << p.nprobe << ", " << p.nlist << ", " << static_cast(p.metric) << ", " - << p.adaptive_centers << ", " << p.host_dataset << "," << p.seed << '}' << std::endl; + << p.adaptive_centers << ", " << p.host_dataset << '}' << std::endl; return os; } @@ -179,7 +178,6 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { index_params.add_data_on_build = false; index_params.kmeans_trainset_fraction = 0.5; index_params.metric_arg = 0; - index_params.random_seed = ps.seed; ivf_flat::index idx(handle_, index_params, ps.dim); ivf_flat::index index_2(handle_, index_params, ps.dim); @@ -329,7 +327,6 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { index_params.add_data_on_build = false; index_params.kmeans_trainset_fraction = 1.0; index_params.metric_arg = 0; - index_params.random_seed = ps.seed; auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.num_db_vecs, ps.dim); @@ -500,7 +497,6 @@ class AnnIVFFlatTest : public ::testing::TestWithParam> { index_params.add_data_on_build = true; index_params.kmeans_trainset_fraction = 0.5; index_params.metric_arg = 0; - index_params.random_seed = ps.seed; // Create IVF Flat index auto database_view = raft::make_device_matrix_view( @@ -611,8 +607,6 @@ const std::vector> inputs = { {20, 100000, 16, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, true}, {1000, 100000, 16, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, true}, {10000, 131072, 8, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, false}, - {10000, 1000000, 96, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, false, true, -1}, - {10000, 1000000, 96, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, false, false, -1}, // host input data {1000, 10000, 16, 10, 40, 1024, raft::distance::DistanceType::L2Expanded, false, true}, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd b/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd index 035d2814fc..a281d33310 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_flat/cpp/c_ivf_flat.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -56,7 +56,6 @@ cdef extern from "raft/neighbors/ivf_flat_types.hpp" \ double kmeans_trainset_fraction bool adaptive_centers bool conservative_memory_allocation - int random_seed cdef cppclass index[T, IdxT](ann_index): index(const device_resources& handle, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx b/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx index 64ac1a9ce9..d8fbdc74da 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_flat/ivf_flat.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -113,11 +113,6 @@ cdef class IndexParams: adding new data (through the classification of the added data); that is, `index.centers()` "drift" together with the changing distribution of the newly added data. - random_seed : int, default = 0 - Seed used for random sampling if kmeans_trainset_fraction < 1. - Value -1 disables random sampling, and results in sampling with a - fixed stride. - """ cdef c_ivf_flat.index_params params @@ -127,8 +122,7 @@ cdef class IndexParams: kmeans_n_iters=20, kmeans_trainset_fraction=0.5, add_data_on_build=True, - bool adaptive_centers=False, - random_seed=0): + bool adaptive_centers=False): self.params.n_lists = n_lists self.params.metric = _get_metric(metric) self.params.metric_arg = 0 @@ -136,7 +130,6 @@ cdef class IndexParams: self.params.kmeans_trainset_fraction = kmeans_trainset_fraction self.params.add_data_on_build = add_data_on_build self.params.adaptive_centers = adaptive_centers - self.params.random_seed = random_seed @property def n_lists(self): @@ -162,10 +155,6 @@ cdef class IndexParams: def adaptive_centers(self): return self.params.adaptive_centers - @property - def random_seed(self): - return self.params.random_seed - cdef class Index: cdef readonly bool trained diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd index f40e5465b7..531c2428e9 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -78,7 +78,6 @@ cdef extern from "raft/neighbors/ivf_pq_types.hpp" \ codebook_gen codebook_kind bool force_random_rotation bool conservative_memory_allocation - int random_seed cdef cppclass index[IdxT](ann_index): index(const device_resources& handle, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index b8a3cf4887..0c1bbf6b9c 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -156,10 +156,6 @@ cdef class IndexParams: repeated calls to `extend` (extending the database). To disable this behavior and use as little GPU memory for the database as possible, set this flat to `True`. - random_seed : int, default = 0 - Seed used for random sampling if kmeans_trainset_fraction < 1. - Value -1 disables random sampling, and results in sampling with a - fixed stride. """ def __init__(self, *, n_lists=1024, @@ -171,8 +167,7 @@ cdef class IndexParams: codebook_kind="subspace", force_random_rotation=False, add_data_on_build=True, - conservative_memory_allocation=False, - random_seed=0): + conservative_memory_allocation=False): self.params.n_lists = n_lists self.params.metric = _get_metric(metric) self.params.metric_arg = 0 @@ -190,7 +185,6 @@ cdef class IndexParams: self.params.add_data_on_build = add_data_on_build self.params.conservative_memory_allocation = \ conservative_memory_allocation - self.params.random_seed = random_seed @property def n_lists(self): @@ -232,10 +226,6 @@ cdef class IndexParams: def conservative_memory_allocation(self): return self.params.conservative_memory_allocation - @property - def random_seed(self): - return self.params.random_seed - cdef class Index: # We store a pointer to the index because it dose not have a trivial