From c41fc449e8170cc7e3c78ea818f94ba3fc745b45 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Fri, 21 Oct 2022 17:18:41 +0200 Subject: [PATCH 1/2] Calculate max cluster size correctly --- .../raft/spatial/knn/detail/ivf_pq_build.cuh | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh index f13dcd8cc6..eee33dc8af 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh @@ -41,6 +41,7 @@ #include #include +#include #include #include #include @@ -430,11 +431,7 @@ auto calculate_offsets_and_indices(IdxT n_rows, IdxT* data_indices, rmm::cuda_stream_view stream) -> uint32_t { - auto exec_policy = rmm::exec_policy(stream); - uint32_t max_cluster_size = 0; - rmm::device_scalar max_cluster_size_dev_buf(stream); - auto max_cluster_size_dev = max_cluster_size_dev_buf.data(); - update_device(max_cluster_size_dev, &max_cluster_size, 1, stream); + auto exec_policy = rmm::exec_policy(stream); // Calculate the offsets IdxT cumsum = 0; update_device(cluster_offsets, &cumsum, 1, stream); @@ -442,14 +439,13 @@ auto calculate_offsets_and_indices(IdxT n_rows, cluster_sizes, cluster_sizes + n_lists, cluster_offsets + 1, - [max_cluster_size_dev] __device__(IdxT s, uint32_t l) { - atomicMax(max_cluster_size_dev, l); - return s + l; - }); + [] __device__(IdxT s, uint32_t l) { return s + l; }); update_host(&cumsum, cluster_offsets + n_lists, 1, stream); - update_host(&max_cluster_size, max_cluster_size_dev, 1, stream); + uint32_t max_cluster_size = + *thrust::max_element(exec_policy, cluster_sizes, cluster_sizes + n_lists); stream.synchronize(); RAFT_EXPECTS(cumsum == n_rows, "cluster sizes do not add up."); + RAFT_LOG_DEBUG("Max cluster size %d", max_cluster_size); rmm::device_uvector data_offsets_buf(n_lists, stream); auto data_offsets = data_offsets_buf.data(); copy(data_offsets, cluster_offsets, n_lists, stream); @@ -554,7 +550,7 @@ void train_per_cluster(const handle_t& handle, auto cluster_offsets = offsets_buf.data(); auto indices = indices_buf.data(); uint32_t max_cluster_size = calculate_offsets_and_indices( - n_rows, index.n_lists(), labels, cluster_sizes.data(), cluster_offsets, indices, stream); + IdxT(n_rows), index.n_lists(), labels, cluster_sizes.data(), cluster_offsets, indices, stream); rmm::device_uvector pq_labels(max_cluster_size * index.pq_dim(), stream, device_memory); rmm::device_uvector pq_cluster_sizes(index.pq_book_size(), stream, device_memory); From 0999a8ee5d24164435cbbf2b30c494b02c0488ff Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 24 Oct 2022 12:26:12 +0200 Subject: [PATCH 2/2] Use available functor for exclusive scan --- cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh index eee33dc8af..0577d24349 100644 --- a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh @@ -435,11 +435,8 @@ auto calculate_offsets_and_indices(IdxT n_rows, // Calculate the offsets IdxT cumsum = 0; update_device(cluster_offsets, &cumsum, 1, stream); - thrust::inclusive_scan(exec_policy, - cluster_sizes, - cluster_sizes + n_lists, - cluster_offsets + 1, - [] __device__(IdxT s, uint32_t l) { return s + l; }); + thrust::inclusive_scan( + exec_policy, cluster_sizes, cluster_sizes + n_lists, cluster_offsets + 1, thrust::plus{}); update_host(&cumsum, cluster_offsets + n_lists, 1, stream); uint32_t max_cluster_size = *thrust::max_element(exec_policy, cluster_sizes, cluster_sizes + n_lists); @@ -550,7 +547,7 @@ void train_per_cluster(const handle_t& handle, auto cluster_offsets = offsets_buf.data(); auto indices = indices_buf.data(); uint32_t max_cluster_size = calculate_offsets_and_indices( - IdxT(n_rows), index.n_lists(), labels, cluster_sizes.data(), cluster_offsets, indices, stream); + n_rows, index.n_lists(), labels, cluster_sizes.data(), cluster_offsets, indices, stream); rmm::device_uvector pq_labels(max_cluster_size * index.pq_dim(), stream, device_memory); rmm::device_uvector pq_cluster_sizes(index.pq_book_size(), stream, device_memory);