Skip to content

Commit

Permalink
Merge pull request #1701 from rapidsai/branch-23.08
Browse files Browse the repository at this point in the history
Forward-merge branch-23.08 to branch-23.10
  • Loading branch information
GPUtester authored Aug 1, 2023
2 parents 9720bf8 + f42fa40 commit affd29b
Show file tree
Hide file tree
Showing 8 changed files with 1,083 additions and 5 deletions.
74 changes: 74 additions & 0 deletions cpp/include/raft/neighbors/detail/ivf_flat_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <raft/linalg/add.cuh>
#include <raft/linalg/map.cuh>
#include <raft/linalg/norm.cuh>
#include <raft/neighbors/ivf_flat_codepacker.hpp>
#include <raft/neighbors/ivf_flat_types.hpp>
#include <raft/neighbors/ivf_list.hpp>
#include <raft/neighbors/ivf_list_types.hpp>
Expand Down Expand Up @@ -416,4 +417,77 @@ inline void fill_refinement_index(raft::resources const& handle,
refinement_index->veclen());
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

template <typename T>
__global__ void pack_interleaved_list_kernel(
const T* codes,
T* list_data,
uint32_t n_rows,
uint32_t dim,
uint32_t veclen,
std::variant<uint32_t, const uint32_t*> offset_or_indices)
{
uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x;
const uint32_t dst_ix = std::holds_alternative<uint32_t>(offset_or_indices)
? std::get<uint32_t>(offset_or_indices) + tid
: std::get<const uint32_t*>(offset_or_indices)[tid];
if (tid < n_rows) { codepacker::pack_1(codes + tid * dim, list_data, dim, veclen, dst_ix); }
}

template <typename T>
__global__ void unpack_interleaved_list_kernel(
const T* list_data,
T* codes,
uint32_t n_rows,
uint32_t dim,
uint32_t veclen,
std::variant<uint32_t, const uint32_t*> offset_or_indices)
{
uint32_t tid = blockIdx.x * blockDim.x + threadIdx.x;
const uint32_t src_ix = std::holds_alternative<uint32_t>(offset_or_indices)
? std::get<uint32_t>(offset_or_indices) + tid
: std::get<const uint32_t*>(offset_or_indices)[tid];
if (tid < n_rows) { codepacker::unpack_1(list_data, codes + tid * dim, dim, veclen, src_ix); }
}

template <typename T, typename IdxT>
void pack_list_data(
raft::resources const& res,
device_matrix_view<const T, uint32_t, row_major> codes,
uint32_t veclen,
std::variant<uint32_t, const uint32_t*> offset_or_indices,
device_mdspan<T, typename list_spec<uint32_t, T, IdxT>::list_extents, row_major> list_data)
{
uint32_t n_rows = codes.extent(0);
uint32_t dim = codes.extent(1);
if (n_rows == 0 || dim == 0) return;
static constexpr uint32_t kBlockSize = 256;
dim3 blocks(div_rounding_up_safe<uint32_t>(n_rows, kBlockSize), 1, 1);
dim3 threads(kBlockSize, 1, 1);
auto stream = resource::get_cuda_stream(res);
pack_interleaved_list_kernel<<<blocks, threads, 0, stream>>>(
codes.data_handle(), list_data.data_handle(), n_rows, dim, veclen, offset_or_indices);
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

template <typename T, typename IdxT>
void unpack_list_data(
raft::resources const& res,
device_mdspan<const T, typename list_spec<uint32_t, T, IdxT>::list_extents, row_major> list_data,
uint32_t veclen,
std::variant<uint32_t, const uint32_t*> offset_or_indices,
device_matrix_view<T, uint32_t, row_major> codes)
{
uint32_t n_rows = codes.extent(0);
uint32_t dim = codes.extent(1);
if (n_rows == 0 || dim == 0) return;
static constexpr uint32_t kBlockSize = 256;
dim3 blocks(div_rounding_up_safe<uint32_t>(n_rows, kBlockSize), 1, 1);
dim3 threads(kBlockSize, 1, 1);
auto stream = resource::get_cuda_stream(res);
unpack_interleaved_list_kernel<<<blocks, threads, 0, stream>>>(
list_data.data_handle(), codes.data_handle(), n_rows, dim, veclen, offset_or_indices);
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

} // namespace raft::neighbors::ivf_flat::detail
115 changes: 115 additions & 0 deletions cpp/include/raft/neighbors/ivf_flat_codepacker.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/core/device_mdspan.hpp>
#include <raft/core/resource/cuda_stream.hpp>
#include <raft/core/resources.hpp>
#include <raft/neighbors/ivf_flat_types.hpp>

#ifdef _RAFT_HAS_CUDA
#include <raft/util/pow2_utils.cuh>
#else
#include <raft/util/integer_utils.hpp>
#endif

namespace raft::neighbors::ivf_flat::codepacker {

template <typename T>
_RAFT_HOST_DEVICE inline auto roundDown(T x)
{
#if defined(_RAFT_HAS_CUDA)
return Pow2<kIndexGroupSize>::roundDown(x);
#else
return raft::round_down_safe(x, kIndexGroupSize);
#endif
}

template <typename T>
_RAFT_HOST_DEVICE inline auto mod(T x)
{
#if defined(_RAFT_HAS_CUDA)
return Pow2<kIndexGroupSize>::mod(x);
#else
return x % kIndexGroupSize;
#endif
}

/**
* Write one flat code into a block by the given offset. The offset indicates the id of the record
* in the list. This function interleaves the code and is intended to later copy the interleaved
* codes over to the IVF list on device. NB: no memory allocation happens here; the block must fit
* the record (offset + 1).
*
* @tparam T
*
* @param[in] flat_code input flat code
* @param[out] block block of memory to write interleaved codes to
* @param[in] dim dimension of the flat code
* @param[in] veclen size of interleaved data chunks
* @param[in] offset how many records to skip before writing the data into the list
*/
template <typename T>
_RAFT_HOST_DEVICE void pack_1(
const T* flat_code, T* block, uint32_t dim, uint32_t veclen, uint32_t offset)
{
// The data is written in interleaved groups of `index::kGroupSize` vectors
// using interleaved_group = Pow2<kIndexGroupSize>;

// Interleave dimensions of the source vector while recording it.
// NB: such `veclen` is selected, that `dim % veclen == 0`
auto group_offset = roundDown(offset);
auto ingroup_id = mod(offset) * veclen;

for (uint32_t l = 0; l < dim; l += veclen) {
for (uint32_t j = 0; j < veclen; j++) {
block[group_offset * dim + l * kIndexGroupSize + ingroup_id + j] = flat_code[l + j];
}
}
}

/**
* Unpack 1 record of a single list (cluster) in the index to fetch the flat code. The offset
* indicates the id of the record. This function fetches one flat code from an interleaved code.
*
* @tparam T
*
* @param[in] block interleaved block. The block can be thought of as the whole inverted list in
* interleaved format.
* @param[out] flat_code output flat code
* @param[in] dim dimension of the flat code
* @param[in] veclen size of interleaved data chunks
* @param[in] offset fetch the flat code by the given offset
*/
template <typename T>
_RAFT_HOST_DEVICE void unpack_1(
const T* block, T* flat_code, uint32_t dim, uint32_t veclen, uint32_t offset)
{
// The data is written in interleaved groups of `index::kGroupSize` vectors
// using interleaved_group = Pow2<kIndexGroupSize>;

// NB: such `veclen` is selected, that `dim % veclen == 0`
auto group_offset = roundDown(offset);
auto ingroup_id = mod(offset) * veclen;

for (uint32_t l = 0; l < dim; l += veclen) {
for (uint32_t j = 0; j < veclen; j++) {
flat_code[l + j] = block[group_offset * dim + l * kIndexGroupSize + ingroup_id + j];
}
}
}
} // namespace raft::neighbors::ivf_flat::codepacker
110 changes: 110 additions & 0 deletions cpp/include/raft/neighbors/ivf_flat_helpers.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/neighbors/detail/ivf_flat_build.cuh>
#include <raft/neighbors/ivf_flat_types.hpp>

#include <raft/core/device_mdspan.hpp>
#include <raft/core/resources.hpp>

namespace raft::neighbors::ivf_flat::helpers {
/**
* @defgroup ivf_flat_helpers Helper functions for manipulationg IVF Flat Index
* @{
*/

namespace codepacker {

/**
* Write flat codes into an existing list by the given offset.
*
* NB: no memory allocation happens here; the list must fit the data (offset + n_vec).
*
* Usage example:
* @code{.cpp}
* auto list_data = index.lists()[label]->data.view();
* // allocate the buffer for the input codes
* auto codes = raft::make_device_matrix<T>(res, n_vec, index.dim());
* ... prepare n_vecs to pack into the list in codes ...
* // write codes into the list starting from the 42nd position
* ivf_pq::helpers::codepacker::pack(
* res, make_const_mdspan(codes.view()), index.veclen(), 42, list_data);
* @endcode
*
* @tparam T
* @tparam IdxT
*
* @param[in] res
* @param[in] codes flat codes [n_vec, dim]
* @param[in] veclen size of interleaved data chunks
* @param[in] offset how many records to skip before writing the data into the list
* @param[inout] list_data block to write into
*/
template <typename T, typename IdxT>
void pack(
raft::resources const& res,
device_matrix_view<const T, uint32_t, row_major> codes,
uint32_t veclen,
uint32_t offset,
device_mdspan<T, typename list_spec<uint32_t, T, IdxT>::list_extents, row_major> list_data)
{
raft::neighbors::ivf_flat::detail::pack_list_data<T, IdxT>(res, codes, veclen, offset, list_data);
}

/**
* @brief Unpack `n_take` consecutive records of a single list (cluster) in the compressed index
* starting at given `offset`.
*
* Usage example:
* @code{.cpp}
* auto list_data = index.lists()[label]->data.view();
* // allocate the buffer for the output
* uint32_t n_take = 4;
* auto codes = raft::make_device_matrix<T>(res, n_take, index.dim());
* uint32_t offset = 0;
* // unpack n_take elements from the list
* ivf_pq::helpers::codepacker::unpack(res, list_data, index.veclen(), offset, codes.view());
* @endcode
*
* @tparam T
* @tparam IdxT
*
* @param[in] res raft resource
* @param[in] list_data block to read from
* @param[in] veclen size of interleaved data chunks
* @param[in] offset
* How many records in the list to skip.
* @param[inout] codes
* the destination buffer [n_take, index.dim()].
* The length `n_take` defines how many records to unpack,
* it must be <= the list size.
*/
template <typename T, typename IdxT>
void unpack(
raft::resources const& res,
device_mdspan<const T, typename list_spec<uint32_t, T, IdxT>::list_extents, row_major> list_data,
uint32_t veclen,
uint32_t offset,
device_matrix_view<T, uint32_t, row_major> codes)
{
raft::neighbors::ivf_flat::detail::unpack_list_data<T, IdxT>(
res, list_data, veclen, offset, codes);
}
} // namespace codepacker
/** @} */
} // namespace raft::neighbors::ivf_flat::helpers
Loading

0 comments on commit affd29b

Please sign in to comment.