From 15bd1f5b2485a15a82a13cb0c5d5f9e2bd194baa Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Wed, 19 Apr 2023 18:56:30 +0200 Subject: [PATCH 1/8] Move raft_runtime sources (only select_k) This is necessary because the raft_runtime source files can sometimes be in the same place as where the template instantiation files will be. --- cpp/CMakeLists.txt | 2 +- cpp/src/{ => raft_runtime}/matrix/select_k_float_int64_t.cu | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cpp/src/{ => raft_runtime}/matrix/select_k_float_int64_t.cu (100%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 62f9ac604e..efc80c960b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -327,7 +327,6 @@ if(RAFT_COMPILE_LIBRARY) src/distance/specializations/fused_l2_nn_double_int64.cu src/distance/specializations/fused_l2_nn_float_int.cu src/distance/specializations/fused_l2_nn_float_int64.cu - src/matrix/select_k_float_int64_t.cu src/matrix/specializations/detail/select_k_float_uint32_t.cu src/matrix/specializations/detail/select_k_float_int64_t.cu src/matrix/specializations/detail/select_k_half_uint32_t.cu @@ -434,6 +433,7 @@ if(RAFT_COMPILE_LIBRARY) src/neighbors/specializations/detail/compute_similarity_half_half_fast.cu src/neighbors/specializations/detail/compute_similarity_half_half_no_basediff.cu src/neighbors/specializations/detail/compute_similarity_half_half_no_smem_lut.cu + src/raft_runtime/matrix/select_k_float_int64_t.cu src/random/rmat_rectangular_generator_int_double.cu src/random/rmat_rectangular_generator_int64_double.cu src/random/rmat_rectangular_generator_int_float.cu diff --git a/cpp/src/matrix/select_k_float_int64_t.cu b/cpp/src/raft_runtime/matrix/select_k_float_int64_t.cu similarity index 100% rename from cpp/src/matrix/select_k_float_int64_t.cu rename to cpp/src/raft_runtime/matrix/select_k_float_int64_t.cu From 73555a72ec4eadf9690433e54c258b697b496d08 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Wed, 19 Apr 2023 19:01:22 +0200 Subject: [PATCH 2/8] Change RAFT_COMPILED from INTERNAL to PUBLIC --- cpp/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index efc80c960b..9d4d94be86 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -463,7 +463,10 @@ if(RAFT_COMPILE_LIBRARY) raft_lib PRIVATE "$<$:${RAFT_CXX_FLAGS}>" "$<$:${RAFT_CUDA_FLAGS}>" ) - target_compile_definitions(raft_lib INTERFACE "RAFT_COMPILED") + + # RAFT_COMPILED is set during compilation of libraft.so as well as downstream + # libraries (due to "PUBLIC") + target_compile_definitions(raft_lib PUBLIC "RAFT_COMPILED") # ensure CUDA symbols aren't relocated to the middle of the debug build binaries target_link_options(raft_lib PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") From a63b99a5a3927c7ee1f9e7219543adcc3390867a Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Wed, 19 Apr 2023 19:01:51 +0200 Subject: [PATCH 3/8] Define RAFT_EXPLICIT_INSTANTIATE_ONLY --- cpp/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9d4d94be86..e579101692 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -467,6 +467,9 @@ if(RAFT_COMPILE_LIBRARY) # RAFT_COMPILED is set during compilation of libraft.so as well as downstream # libraries (due to "PUBLIC") target_compile_definitions(raft_lib PUBLIC "RAFT_COMPILED") + # RAFT_EXPLICIT_INSTANTIATE_ONLY is set during compilation of libraft.so (due + # to "PRIVATE") + target_compile_definitions(raft_lib PRIVATE "RAFT_EXPLICIT_INSTANTIATE_ONLY") # ensure CUDA symbols aren't relocated to the middle of the debug build binaries target_link_options(raft_lib PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") From 0f9d230f4ac3f0e102f23d5dbdf15f5457860479 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Wed, 19 Apr 2023 19:26:01 +0200 Subject: [PATCH 4/8] Add RAFT_EXPLICIT macro --- cpp/include/raft/util/raft_explicit.hpp | 89 +++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 cpp/include/raft/util/raft_explicit.hpp diff --git a/cpp/include/raft/util/raft_explicit.hpp b/cpp/include/raft/util/raft_explicit.hpp new file mode 100644 index 0000000000..7edb2f0b42 --- /dev/null +++ b/cpp/include/raft/util/raft_explicit.hpp @@ -0,0 +1,89 @@ +/* Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +/** + * @brief Prevents a function template from being implicitly instantiated + * + * This macro defines a function body that can be used for function template + * definitions of functions that should not be implicitly instantiated. + * + * When the template is erroneously implicitly instantiated, it provides a + * useful error message that tells the user how to avoid the implicit + * instantiation. + * + * The error message is generated using a static assert. It is generally tricky + * to have a static assert fire only when you want it, as documented in + * P2593: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2593r0.html + * + * We use the strategy from paragraph 1.3 here. We define a struct + * `not_allowed`, whose type is dependent on the template parameters of the + * enclosing function instance. We use this struct type to instantiate the + * `implicit_instantiation` template class, whose value is always false. We pass + * this value to static_assert. This way, the static assert only fires when the + * template is instantiated, since `implicit_instantiation` cannot be + * instantiated without all the types in the enclosing function template. + */ +#define RAFT_EXPLICIT \ + { \ + /* Type of `not_allowed` depends on template parameters of enclosing function. */ \ + struct not_allowed { \ + }; \ + static_assert( \ + raft::util::raft_explicit::implicit_instantiation::value, \ + "ACCIDENTAL_IMPLICIT_INSTANTIATION\n\n" \ + \ + "If you see this error, then you have implicitly instantiated a function\n" \ + "template. To keep compile times in check, libraft has the policy of\n" \ + "explicitly instantiating templates. To fix the compilation error, follow\n" \ + "these steps.\n\n" \ + \ + "If you scroll up or down a bit, you probably saw a line like the following:\n\n" \ + \ + "detected during instantiation of \"void raft::foo(T) [with T=float]\" at line [..]\n\n" \ + \ + "Simplest temporary solution:\n\n" \ + \ + " Add '#undef RAFT_EXPLICIT_INSTANTIATE_ONLY' at the top of your .cpp/.cu file.\n\n" \ + \ + "Best solution:\n\n" \ + \ + " 1. Add the following line to the file include/raft/foo.hpp:\n\n" \ + \ + " extern template void raft::foo(double);\n\n" \ + \ + " 2. Add the following line to the file src/raft/foo.cpp:\n\n" \ + \ + " template void raft::foo(double)\n"); \ + \ + /* Function may have non-void return type. */ \ + /* To prevent warnings/errors about missing returns, throw an exception. */ \ + throw "raft_explicit_error"; \ + } + +namespace raft::util::raft_explicit { +/** + * @brief Template that is always false + * + * This template is from paragraph 1.3 of P2593: + * https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2593r0.html + * + * The value of `value` is always false, but it depends on a template parameter. + */ +template +struct implicit_instantiation { + static constexpr bool value = false; +}; +} // namespace raft::util::raft_explicit From d0063ff6fa8ed8a205b5c2032d2a46debb085a22 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Wed, 19 Apr 2023 18:59:39 +0200 Subject: [PATCH 5/8] Move select_k.cuh => select_k-inl.cuh This commit does not compile, but it preserves the history for select_k.cuh --- cpp/include/raft/matrix/detail/{select_k.cuh => select_k-inl.cuh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cpp/include/raft/matrix/detail/{select_k.cuh => select_k-inl.cuh} (100%) diff --git a/cpp/include/raft/matrix/detail/select_k.cuh b/cpp/include/raft/matrix/detail/select_k-inl.cuh similarity index 100% rename from cpp/include/raft/matrix/detail/select_k.cuh rename to cpp/include/raft/matrix/detail/select_k-inl.cuh From ee8d78b844e3e1d3fe50662c37a8e7182f1925c9 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Wed, 19 Apr 2023 19:05:03 +0200 Subject: [PATCH 6/8] select_k: Replace specialization by split header --- cpp/CMakeLists.txt | 10 +-- .../raft/matrix/detail/select_k-ext.cuh | 65 +++++++++++++++++++ cpp/include/raft/matrix/detail/select_k.cuh | 25 +++++++ .../specializations/detail/select_k.cuh | 34 ++-------- .../matrix/detail/select_k_double_int64_t.cu | 33 ++++++++++ .../matrix/detail/select_k_double_uint32_t.cu | 34 ++++++++++ .../matrix/detail/select_k_float_int64_t.cu | 33 ++++++++++ .../matrix/detail/select_k_float_uint32_t.cu | 33 ++++++++++ .../matrix/detail/select_k_half_int64_t.cu | 33 ++++++++++ .../matrix/detail/select_k_half_uint32_t.cu | 33 ++++++++++ .../detail/select_k_float_int64_t.cu | 36 ---------- .../detail/select_k_float_uint32_t.cu | 36 ---------- .../detail/select_k_half_int64_t.cu | 36 ---------- .../detail/select_k_half_uint32_t.cu | 36 ---------- 14 files changed, 300 insertions(+), 177 deletions(-) create mode 100644 cpp/include/raft/matrix/detail/select_k-ext.cuh create mode 100644 cpp/include/raft/matrix/detail/select_k.cuh create mode 100644 cpp/src/matrix/detail/select_k_double_int64_t.cu create mode 100644 cpp/src/matrix/detail/select_k_double_uint32_t.cu create mode 100644 cpp/src/matrix/detail/select_k_float_int64_t.cu create mode 100644 cpp/src/matrix/detail/select_k_float_uint32_t.cu create mode 100644 cpp/src/matrix/detail/select_k_half_int64_t.cu create mode 100644 cpp/src/matrix/detail/select_k_half_uint32_t.cu delete mode 100644 cpp/src/matrix/specializations/detail/select_k_float_int64_t.cu delete mode 100644 cpp/src/matrix/specializations/detail/select_k_float_uint32_t.cu delete mode 100644 cpp/src/matrix/specializations/detail/select_k_half_int64_t.cu delete mode 100644 cpp/src/matrix/specializations/detail/select_k_half_uint32_t.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e579101692..d317c8e784 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -269,6 +269,12 @@ if(RAFT_COMPILE_LIBRARY) src/cluster/update_centroids_double.cu src/cluster/cluster_cost_float.cu src/cluster/cluster_cost_double.cu + src/matrix/detail/select_k_double_int64_t.cu + src/matrix/detail/select_k_double_uint32_t.cu + src/matrix/detail/select_k_float_int64_t.cu + src/matrix/detail/select_k_float_uint32_t.cu + src/matrix/detail/select_k_half_int64_t.cu + src/matrix/detail/select_k_half_uint32_t.cu src/neighbors/refine_d_int64_t_float.cu src/neighbors/refine_d_int64_t_int8_t.cu src/neighbors/refine_d_int64_t_uint8_t.cu @@ -327,10 +333,6 @@ if(RAFT_COMPILE_LIBRARY) src/distance/specializations/fused_l2_nn_double_int64.cu src/distance/specializations/fused_l2_nn_float_int.cu src/distance/specializations/fused_l2_nn_float_int64.cu - src/matrix/specializations/detail/select_k_float_uint32_t.cu - src/matrix/specializations/detail/select_k_float_int64_t.cu - src/matrix/specializations/detail/select_k_half_uint32_t.cu - src/matrix/specializations/detail/select_k_half_int64_t.cu src/neighbors/ivfpq_build.cu src/neighbors/ivfpq_deserialize.cu src/neighbors/ivfpq_serialize.cu diff --git a/cpp/include/raft/matrix/detail/select_k-ext.cuh b/cpp/include/raft/matrix/detail/select_k-ext.cuh new file mode 100644 index 0000000000..2b233c156d --- /dev/null +++ b/cpp/include/raft/matrix/detail/select_k-ext.cuh @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include // uint32_t +#include // __half +#include // RAFT_EXPLICIT +#include // rmm:cuda_stream_view +#include // rmm::mr::device_memory_resource + +#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY + +namespace raft::matrix::detail { + +template +void select_k(const T* in_val, + const IdxT* in_idx, + size_t batch_size, + size_t len, + int k, + T* out_val, + IdxT* out_idx, + bool select_min, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = nullptr) RAFT_EXPLICIT; +} // namespace raft::matrix::detail + +#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY + +#define instantiate_raft_matrix_detail_select_k(T, IdxT) \ + extern template void raft::matrix::detail::select_k(const T* in_val, \ + const IdxT* in_idx, \ + size_t batch_size, \ + size_t len, \ + int k, \ + T* out_val, \ + IdxT* out_idx, \ + bool select_min, \ + rmm::cuda_stream_view stream, \ + rmm::mr::device_memory_resource* mr) + +instantiate_raft_matrix_detail_select_k(__half, uint32_t); +instantiate_raft_matrix_detail_select_k(__half, int64_t); +instantiate_raft_matrix_detail_select_k(float, int64_t); +instantiate_raft_matrix_detail_select_k(float, uint32_t); +// We did not have these two for double before, but there are tests for them. We +// therefore include them here. +instantiate_raft_matrix_detail_select_k(double, int64_t); +instantiate_raft_matrix_detail_select_k(double, uint32_t); + +#undef instantiate_raft_matrix_detail_select_k diff --git a/cpp/include/raft/matrix/detail/select_k.cuh b/cpp/include/raft/matrix/detail/select_k.cuh new file mode 100644 index 0000000000..d011f23534 --- /dev/null +++ b/cpp/include/raft/matrix/detail/select_k.cuh @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#if !defined(RAFT_EXPLICIT_INSTANTIATE_ONLY) +#include "select_k-inl.cuh" +#endif + +#ifdef RAFT_COMPILED +#include "select_k-ext.cuh" +#endif diff --git a/cpp/include/raft/matrix/specializations/detail/select_k.cuh b/cpp/include/raft/matrix/specializations/detail/select_k.cuh index 3cb1a2d8dc..64d5b332b1 100644 --- a/cpp/include/raft/matrix/specializations/detail/select_k.cuh +++ b/cpp/include/raft/matrix/specializations/detail/select_k.cuh @@ -16,32 +16,8 @@ #pragma once -#include - -#include - -namespace raft::matrix::detail { - -#define RAFT_INST(T, IdxT) \ - extern template void select_k(const T*, \ - const IdxT*, \ - size_t, \ - size_t, \ - int, \ - T*, \ - IdxT*, \ - bool, \ - rmm::cuda_stream_view, \ - rmm::mr::device_memory_resource*); - -// Commonly used types -RAFT_INST(float, int64_t); -RAFT_INST(half, int64_t); - -// These instances are used in the ivf_pq::search parameterized by the internal_distance_dtype -RAFT_INST(float, uint32_t); -RAFT_INST(half, uint32_t); - -#undef RAFT_INST - -} // namespace raft::matrix::detail +#pragma message( \ + __FILE__ \ + " is deprecated and will be removed." \ + " Including specializations is not necessary any more." \ + " For more information, see: https://docs.rapids.ai/api/raft/nightly/using_libraft.html") diff --git a/cpp/src/matrix/detail/select_k_double_int64_t.cu b/cpp/src/matrix/detail/select_k_double_int64_t.cu new file mode 100644 index 0000000000..022627283a --- /dev/null +++ b/cpp/src/matrix/detail/select_k_double_int64_t.cu @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define instantiate_raft_matrix_detail_select_k(T, IdxT) \ + template void raft::matrix::detail::select_k(const T* in_val, \ + const IdxT* in_idx, \ + size_t batch_size, \ + size_t len, \ + int k, \ + T* out_val, \ + IdxT* out_idx, \ + bool select_min, \ + rmm::cuda_stream_view stream, \ + rmm::mr::device_memory_resource* mr) + +instantiate_raft_matrix_detail_select_k(double, int64_t); + +#undef instantiate_raft_matrix_detail_select_k diff --git a/cpp/src/matrix/detail/select_k_double_uint32_t.cu b/cpp/src/matrix/detail/select_k_double_uint32_t.cu new file mode 100644 index 0000000000..22c6989337 --- /dev/null +++ b/cpp/src/matrix/detail/select_k_double_uint32_t.cu @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // uint32_t +#include + +#define instantiate_raft_matrix_detail_select_k(T, IdxT) \ + template void raft::matrix::detail::select_k(const T* in_val, \ + const IdxT* in_idx, \ + size_t batch_size, \ + size_t len, \ + int k, \ + T* out_val, \ + IdxT* out_idx, \ + bool select_min, \ + rmm::cuda_stream_view stream, \ + rmm::mr::device_memory_resource* mr) + +instantiate_raft_matrix_detail_select_k(double, uint32_t); + +#undef instantiate_raft_matrix_detail_select_k diff --git a/cpp/src/matrix/detail/select_k_float_int64_t.cu b/cpp/src/matrix/detail/select_k_float_int64_t.cu new file mode 100644 index 0000000000..1f1d686048 --- /dev/null +++ b/cpp/src/matrix/detail/select_k_float_int64_t.cu @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define instantiate_raft_matrix_detail_select_k(T, IdxT) \ + template void raft::matrix::detail::select_k(const T* in_val, \ + const IdxT* in_idx, \ + size_t batch_size, \ + size_t len, \ + int k, \ + T* out_val, \ + IdxT* out_idx, \ + bool select_min, \ + rmm::cuda_stream_view stream, \ + rmm::mr::device_memory_resource* mr) + +instantiate_raft_matrix_detail_select_k(float, int64_t); + +#undef instantiate_raft_matrix_detail_select_k diff --git a/cpp/src/matrix/detail/select_k_float_uint32_t.cu b/cpp/src/matrix/detail/select_k_float_uint32_t.cu new file mode 100644 index 0000000000..3bb47acbf2 --- /dev/null +++ b/cpp/src/matrix/detail/select_k_float_uint32_t.cu @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define instantiate_raft_matrix_detail_select_k(T, IdxT) \ + template void raft::matrix::detail::select_k(const T* in_val, \ + const IdxT* in_idx, \ + size_t batch_size, \ + size_t len, \ + int k, \ + T* out_val, \ + IdxT* out_idx, \ + bool select_min, \ + rmm::cuda_stream_view stream, \ + rmm::mr::device_memory_resource* mr) + +instantiate_raft_matrix_detail_select_k(float, uint32_t); + +#undef instantiate_raft_matrix_detail_select_k diff --git a/cpp/src/matrix/detail/select_k_half_int64_t.cu b/cpp/src/matrix/detail/select_k_half_int64_t.cu new file mode 100644 index 0000000000..cf4e15959d --- /dev/null +++ b/cpp/src/matrix/detail/select_k_half_int64_t.cu @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define instantiate_raft_matrix_detail_select_k(T, IdxT) \ + template void raft::matrix::detail::select_k(const T* in_val, \ + const IdxT* in_idx, \ + size_t batch_size, \ + size_t len, \ + int k, \ + T* out_val, \ + IdxT* out_idx, \ + bool select_min, \ + rmm::cuda_stream_view stream, \ + rmm::mr::device_memory_resource* mr) + +instantiate_raft_matrix_detail_select_k(__half, int64_t); + +#undef instantiate_raft_matrix_detail_select_k diff --git a/cpp/src/matrix/detail/select_k_half_uint32_t.cu b/cpp/src/matrix/detail/select_k_half_uint32_t.cu new file mode 100644 index 0000000000..b18887bfc0 --- /dev/null +++ b/cpp/src/matrix/detail/select_k_half_uint32_t.cu @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define instantiate_raft_matrix_detail_select_k(T, IdxT) \ + template void raft::matrix::detail::select_k(const T* in_val, \ + const IdxT* in_idx, \ + size_t batch_size, \ + size_t len, \ + int k, \ + T* out_val, \ + IdxT* out_idx, \ + bool select_min, \ + rmm::cuda_stream_view stream, \ + rmm::mr::device_memory_resource* mr) + +instantiate_raft_matrix_detail_select_k(__half, uint32_t); + +#undef instantiate_raft_matrix_detail_select_k diff --git a/cpp/src/matrix/specializations/detail/select_k_float_int64_t.cu b/cpp/src/matrix/specializations/detail/select_k_float_int64_t.cu deleted file mode 100644 index 370ab1ba50..0000000000 --- a/cpp/src/matrix/specializations/detail/select_k_float_int64_t.cu +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -namespace raft::matrix::detail { - -#define RAFT_INST(T, IdxT) \ - template void select_k(const T*, \ - const IdxT*, \ - size_t, \ - size_t, \ - int, \ - T*, \ - IdxT*, \ - bool, \ - rmm::cuda_stream_view, \ - rmm::mr::device_memory_resource*); - -RAFT_INST(float, int64_t); - -} // namespace raft::matrix::detail diff --git a/cpp/src/matrix/specializations/detail/select_k_float_uint32_t.cu b/cpp/src/matrix/specializations/detail/select_k_float_uint32_t.cu deleted file mode 100644 index c6733c2a46..0000000000 --- a/cpp/src/matrix/specializations/detail/select_k_float_uint32_t.cu +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -namespace raft::matrix::detail { - -#define RAFT_INST(T, IdxT) \ - template void select_k(const T*, \ - const IdxT*, \ - size_t, \ - size_t, \ - int, \ - T*, \ - IdxT*, \ - bool, \ - rmm::cuda_stream_view, \ - rmm::mr::device_memory_resource*); - -RAFT_INST(float, uint32_t); - -} // namespace raft::matrix::detail diff --git a/cpp/src/matrix/specializations/detail/select_k_half_int64_t.cu b/cpp/src/matrix/specializations/detail/select_k_half_int64_t.cu deleted file mode 100644 index 38e28ac54d..0000000000 --- a/cpp/src/matrix/specializations/detail/select_k_half_int64_t.cu +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -namespace raft::matrix::detail { - -#define RAFT_INST(T, IdxT) \ - template void select_k(const T*, \ - const IdxT*, \ - size_t, \ - size_t, \ - int, \ - T*, \ - IdxT*, \ - bool, \ - rmm::cuda_stream_view, \ - rmm::mr::device_memory_resource*); - -RAFT_INST(half, int64_t); - -} // namespace raft::matrix::detail diff --git a/cpp/src/matrix/specializations/detail/select_k_half_uint32_t.cu b/cpp/src/matrix/specializations/detail/select_k_half_uint32_t.cu deleted file mode 100644 index 108bd30b49..0000000000 --- a/cpp/src/matrix/specializations/detail/select_k_half_uint32_t.cu +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -namespace raft::matrix::detail { - -#define RAFT_INST(T, IdxT) \ - template void select_k(const T*, \ - const IdxT*, \ - size_t, \ - size_t, \ - int, \ - T*, \ - IdxT*, \ - bool, \ - rmm::cuda_stream_view, \ - rmm::mr::device_memory_resource*); - -RAFT_INST(half, uint32_t); - -} // namespace raft::matrix::detail From 4b3c4b68a977de6a12a7e990b8fc1b8f15907d26 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Wed, 19 Apr 2023 19:40:09 +0200 Subject: [PATCH 7/8] Force explicit instantiation in tests and benchmarks --- cpp/bench/prims/CMakeLists.txt | 1 + cpp/test/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+) diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt index f6499623dd..c69ff116fd 100644 --- a/cpp/bench/prims/CMakeLists.txt +++ b/cpp/bench/prims/CMakeLists.txt @@ -54,6 +54,7 @@ function(ConfigureBench) ${BENCH_NAME} PRIVATE "$<$:${RAFT_CXX_FLAGS}>" "$<$:${RAFT_CUDA_FLAGS}>" ) + target_compile_definitions(${BENCH_NAME} PRIVATE "RAFT_EXPLICIT_INSTANTIATE_ONLY") target_include_directories( ${BENCH_NAME} PUBLIC "$" diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 22e8a9d73c..2c0c603bee 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -59,6 +59,8 @@ function(ConfigureTest) "$<$:${RAFT_CUDA_FLAGS}>" ) + target_compile_definitions(${TEST_NAME} PRIVATE "RAFT_EXPLICIT_INSTANTIATE_ONLY") + target_include_directories(${TEST_NAME} PUBLIC "$") install( From 3a1de4cf6282fc6a555f6c732edd34d8be352c6c Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Wed, 19 Apr 2023 19:46:25 +0200 Subject: [PATCH 8/8] Fix select_k test to use int64_t --- cpp/test/matrix/select_k.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/test/matrix/select_k.cu b/cpp/test/matrix/select_k.cu index 2a40d70abc..e92f6c05cc 100644 --- a/cpp/test/matrix/select_k.cu +++ b/cpp/test/matrix/select_k.cu @@ -232,9 +232,9 @@ struct SelectK // NOLINT auto& in_dists = ref.get_in_dists(); auto compare_ids = [&in_ids, &in_dists](const IdxT& i, const IdxT& j) { if (i == j) return true; - auto ix_i = uint64_t(std::find(in_ids.begin(), in_ids.end(), i) - in_ids.begin()); - auto ix_j = uint64_t(std::find(in_ids.begin(), in_ids.end(), j) - in_ids.begin()); - if (ix_i >= in_ids.size() || ix_j >= in_ids.size()) return false; + auto ix_i = int64_t(std::find(in_ids.begin(), in_ids.end(), i) - in_ids.begin()); + auto ix_j = int64_t(std::find(in_ids.begin(), in_ids.end(), j) - in_ids.begin()); + if (size_t(ix_i) >= in_ids.size() || size_t(ix_j) >= in_ids.size()) return false; auto dist_i = in_dists[ix_i]; auto dist_j = in_dists[ix_j]; if (dist_i == dist_j) return true; @@ -434,7 +434,7 @@ INSTANTIATE_TEST_CASE_P( // NOLINT select::Algo::kWarpDistributedShm))); using ReferencedRandomDoubleSizeT = - SelectK::params_random>; + SelectK::params_random>; TEST_P(ReferencedRandomDoubleSizeT, Run) { run(); } // NOLINT INSTANTIATE_TEST_CASE_P( // NOLINT SelectK, @@ -461,7 +461,7 @@ INSTANTIATE_TEST_CASE_P( // NOLINT select::Algo::kRadix11bitsExtraPass))); using ReferencedRandomFloatSizeT = - SelectK::params_random>; + SelectK::params_random>; TEST_P(ReferencedRandomFloatSizeT, LargeK) { run(); } // NOLINT INSTANTIATE_TEST_CASE_P(SelectK, // NOLINT ReferencedRandomFloatSizeT,