Skip to content

Commit

Permalink
Split memory_pool, fused_l2_knn, coalesced_reduction, selection_faiss
Browse files Browse the repository at this point in the history
  • Loading branch information
ahendriksen committed Apr 14, 2023
1 parent 95638fd commit 8a4c4a8
Show file tree
Hide file tree
Showing 19 changed files with 620 additions and 80 deletions.
40 changes: 29 additions & 11 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@
set(RAPIDS_VERSION "23.04")
set(RAFT_VERSION "23.04.00")

include(FetchContent)
FetchContent_Declare(
rapids-cmake
GIT_REPOSITORY https://github.com/ahendriksen/rapids-cmake.git
GIT_TAG different-rmm
)
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.02/RAPIDS.cmake
${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake
)
include(${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake)

cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
include(../fetch_rapids.cmake)
include(rapids-cmake)
Expand Down Expand Up @@ -276,12 +287,14 @@ if(RAFT_COMPILE_LIBRARY)
src/cluster/cluster_cost_float.cu
src/cluster/cluster_cost_double.cu
src/core/logger.cpp
src/linalg/detail/coalesced_reduction.cu
src/neighbors/refine_d_int64_t_float.cu
src/neighbors/refine_d_int64_t_int8_t.cu
src/neighbors/refine_d_int64_t_uint8_t.cu
src/neighbors/refine_h_int64_t_float.cu
src/neighbors/refine_h_int64_t_int8_t.cu
src/neighbors/refine_h_int64_t_uint8_t.cu
src/neighbors/detail/selection_faiss.cu
src/neighbors/specializations/refine_d_int64_t_float.cu
src/neighbors/specializations/refine_d_int64_t_int8_t.cu
src/neighbors/specializations/refine_d_int64_t_uint8_t.cu
Expand Down Expand Up @@ -382,17 +395,17 @@ if(RAFT_COMPILE_LIBRARY)
src/random/rmat_rectangular_generator_int64_double.cu
src/random/rmat_rectangular_generator_int_float.cu
src/random/rmat_rectangular_generator_int64_float.cu
src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_2d.cu
src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_2d.cu
src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_3d.cu
src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_3d.cu
# src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_2d.cu
# src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_2d.cu
# src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_3d.cu
# src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_3d.cu
src/neighbors/specializations/ball_cover_all_knn_query.cu
src/neighbors/specializations/ball_cover_build_index.cu
src/neighbors/specializations/ball_cover_knn_query.cu
src/neighbors/specializations/fused_l2_knn_long_float_true.cu
src/neighbors/specializations/fused_l2_knn_long_float_false.cu
src/neighbors/specializations/fused_l2_knn_int_float_true.cu
src/neighbors/specializations/fused_l2_knn_int_float_false.cu
# src/neighbors/specializations/fused_l2_knn_long_float_true.cu
# src/neighbors/specializations/fused_l2_knn_long_float_false.cu
# src/neighbors/specializations/fused_l2_knn_int_float_true.cu
# src/neighbors/specializations/fused_l2_knn_int_float_false.cu
src/neighbors/ivf_flat_search.cu
src/neighbors/ivf_flat_build.cu
src/neighbors/specializations/ivfflat_build_float_int64_t.cu
Expand Down Expand Up @@ -445,6 +458,7 @@ if(RAFT_COMPILE_LIBRARY)
src/random/rmat_rectangular_generator_int_float.cu
src/random/rmat_rectangular_generator_int64_float.cu
src/spatial/knn/detail/ball_cover/registers.cu
src/util/memory_pool.cpp
)
set_target_properties(
raft_lib
Expand Down Expand Up @@ -618,7 +632,9 @@ rapids_export(
COMPONENTS ${raft_components}
COMPONENTS_EXPORT_SET ${raft_export_sets}
GLOBAL_TARGETS raft compiled distributed
NAMESPACE raft:: DOCUMENTATION doc_string FINAL_CODE_BLOCK code_string
NAMESPACE raft::
DOCUMENTATION doc_string
FINAL_CODE_BLOCK code_string
)

# ##################################################################################################
Expand All @@ -628,8 +644,10 @@ rapids_export(
EXPORT_SET raft-exports
COMPONENTS ${raft_components}
COMPONENTS_EXPORT_SET ${raft_export_sets}
GLOBAL_TARGETS raft
compiled distributed DOCUMENTATION doc_string NAMESPACE raft:: FINAL_CODE_BLOCK code_string
GLOBAL_TARGETS raft compiled distributed
DOCUMENTATION doc_string
NAMESPACE raft::
FINAL_CODE_BLOCK code_string
)

# ##################################################################################################
Expand Down
3 changes: 2 additions & 1 deletion cpp/include/raft/core/resource/device_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <raft/core/resource/resource_types.hpp>
#include <raft/core/resources.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <rmm/mr/device/per_device_resource.hpp>

namespace raft::resource {
class device_memory_resource : public resource {
Expand Down Expand Up @@ -72,4 +73,4 @@ inline void set_workspace_resource(resources const& res, rmm::mr::device_memory_
{
res.add_resource_factory(std::make_shared<workspace_resource_factory>(mr));
};
} // namespace raft::resource
} // namespace raft::resource
74 changes: 74 additions & 0 deletions cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "coalesced_reduction-types.cuh"
#include <raft/core/operators.hpp>

// Include inline definition as well. We cannot possibly cover all
// instantiations in this file.
#include "coalesced_reduction-inl.cuh"

#define instantiate_raft_linalg_detail_coalescedReduction( \
InType, OutType, IdxType, MainLambda, ReduceLambda, FinalLambda) \
extern template void raft::linalg::detail::coalescedReduction(OutType* dots, \
const InType* data, \
IdxType D, \
IdxType N, \
OutType init, \
cudaStream_t stream, \
bool inplace, \
MainLambda main_op, \
ReduceLambda reduce_op, \
FinalLambda final_op)

instantiate_raft_linalg_detail_coalescedReduction(
double, double, int, raft::identity_op, raft::min_op, raft::identity_op);
instantiate_raft_linalg_detail_coalescedReduction(
double, double, int, raft::sq_op, raft::add_op, raft::identity_op);
instantiate_raft_linalg_detail_coalescedReduction(
double, double, int, raft::sq_op, raft::add_op, raft::sqrt_op);
instantiate_raft_linalg_detail_coalescedReduction(
double, double, int, raft::abs_op, raft::add_op, raft::identity_op);
instantiate_raft_linalg_detail_coalescedReduction(
double, double, int, raft::abs_op, raft::max_op, raft::identity_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, size_t, raft::abs_op, raft::add_op, raft::sqrt_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, int, raft::abs_op, raft::add_op, raft::identity_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, int, raft::identity_op, raft::add_op, raft::identity_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, int, raft::identity_op, raft::min_op, raft::identity_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, int, raft::sq_op, raft::add_op, raft::identity_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, int, raft::sq_op, raft::add_op, raft::sqrt_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, long, raft::sq_op, raft::add_op, raft::identity_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, size_t, raft::identity_op, raft::add_op, raft::identity_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, size_t, raft::sq_op, raft::add_op, raft::identity_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, size_t, raft::abs_op, raft::max_op, raft::sqrt_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, size_t, raft::sq_op, raft::add_op, raft::sqrt_op);
instantiate_raft_linalg_detail_coalescedReduction(
float, float, unsigned int, raft::sq_op, raft::add_op, raft::identity_op);

#undef instantiate_raft_linalg_detail_coalescedReduction
21 changes: 4 additions & 17 deletions cpp/include/raft/linalg/detail/coalesced_reduction-inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,17 @@

#pragma once

#include "coalesced_reduction-types.cuh" // policy structs
#include <cub/cub.cuh>
#include <raft/common/nvtx.hpp>
#include <raft/core/nvtx.hpp>
#include <raft/core/operators.hpp>
#include <raft/util/cuda_utils.cuh>
#include <rmm/device_uvector.hpp>
#include <rmm/device_uvector.hpp> // device_uvector

namespace raft {
namespace linalg {
namespace detail {

template <int warpSize, int rpb>
struct ReductionThinPolicy {
static constexpr int LogicalWarpSize = warpSize;
static constexpr int RowsPerBlock = rpb;
static constexpr int ThreadsPerBlock = LogicalWarpSize * RowsPerBlock;
};

template <typename Policy,
typename InType,
typename OutType,
Expand Down Expand Up @@ -212,13 +206,6 @@ void coalescedReductionMediumDispatcher(OutType* dots,
dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
}

template <int tpb, int bpr>
struct ReductionThickPolicy {
static constexpr int ThreadsPerBlock = tpb;
static constexpr int BlocksPerRow = bpr;
static constexpr int BlockStride = tpb * bpr;
};

template <typename Policy,
typename InType,
typename OutType,
Expand Down Expand Up @@ -365,4 +352,4 @@ void coalescedReduction(OutType* dots,

} // namespace detail
} // namespace linalg
} // namespace raft
} // namespace raft
34 changes: 34 additions & 0 deletions cpp/include/raft/linalg/detail/coalesced_reduction-types.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

namespace raft::linalg::detail {

template <int warpSize, int rpb>
struct ReductionThinPolicy {
static constexpr int LogicalWarpSize = warpSize;
static constexpr int RowsPerBlock = rpb;
static constexpr int ThreadsPerBlock = LogicalWarpSize * RowsPerBlock;
};

template <int tpb, int bpr>
struct ReductionThickPolicy {
static constexpr int ThreadsPerBlock = tpb;
static constexpr int BlocksPerRow = bpr;
static constexpr int BlockStride = tpb * bpr;
};

} // namespace raft::linalg::detail
25 changes: 25 additions & 0 deletions cpp/include/raft/linalg/detail/coalesced_reduction.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#if defined(RAFT_COMPILED) && defined(RAFT_EXPLICIT_INSTANTIATE)
// Too many lambdas and complicated types to instantiate everything..
#include "coalesced_reduction-ext.cuh"
#include "coalesced_reduction-inl.cuh"
#else
#include "coalesced_reduction-inl.cuh"
#endif
1 change: 1 addition & 0 deletions cpp/include/raft/neighbors/detail/knn_brute_force.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <raft/neighbors/detail/selection_faiss.cuh>
#include <raft/spatial/knn/detail/fused_l2_knn.cuh>
#include <raft/spatial/knn/detail/haversine_distance.cuh>
#include <raft/spatial/knn/detail/processing.cuh>
#include <set>
#include <thrust/iterator/transform_iterator.h>

Expand Down
70 changes: 70 additions & 0 deletions cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cstddef> // size_t
#include <cstdint> // uint32_t
#include <raft/util/raft_explicit.hpp> // RAFT_EXPLICIT

#if defined(RAFT_EXPLICIT_INSTANTIATE)

namespace raft::neighbors::detail {
/**
* @brief Select the k-nearest neighbors from dense
* distance and index matrices.
*
* @param[in] inK partitioned knn distance matrix
* @param[in] inV partitioned knn index matrix
* @param[in] n_rows number of rows in distance and index matrices
* @param[in] n_cols number of columns in distance and index matrices
* @param[out] outK merged knn distance matrix
* @param[out] outV merged knn index matrix
* @param[in] select_min whether to select the min or the max distances
* @param[in] k number of neighbors per partition (also number of merged neighbors)
* @param[in] stream CUDA stream to use
*/
template <typename payload_t = int, typename key_t = float>
void select_k(const key_t* inK,
const payload_t* inV,
size_t n_rows,
size_t n_cols,
key_t* outK,
payload_t* outV,
bool select_min,
int k,
cudaStream_t stream) RAFT_EXPLICIT;
}; // namespace raft::neighbors::detail

#endif // RAFT_EXPLICIT_INSTANTIATE

#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t) \
extern template void raft::neighbors::detail::select_k(const key_t* inK, \
const payload_t* inV, \
size_t n_rows, \
size_t n_cols, \
key_t* outK, \
payload_t* outV, \
bool select_min, \
int k, \
cudaStream_t stream)

// @benfred: Not sure if this is correct. Should I not flip float and uint32_t?
// It seems weird that float is the key and uint32_t is the payload type.
instantiate_raft_neighbors_detail_select_k(uint32_t, float);
instantiate_raft_neighbors_detail_select_k(long, float);

#undef instantiate_raft_neighbors_detail_select_k
25 changes: 25 additions & 0 deletions cpp/include/raft/neighbors/detail/selection_faiss.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#if defined(RAFT_COMPILED)
#include "selection_faiss-ext.cuh"
#endif

#if !defined(RAFT_EXPLICIT_INSTANTIATE)
#include "selection_faiss-inl.cuh"
#endif
Loading

0 comments on commit 8a4c4a8

Please sign in to comment.