diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index e32697a68a..9c487be156 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -36,12 +36,7 @@ trap "EXITCODE=1" ERR set +e # Run libraft gtests from libraft-tests package -rapids-logger "Run gtests" -for gt in "$CONDA_PREFIX"/bin/gtests/libraft/* ; do - test_name=$(basename ${gt}) - echo "Running gtest $test_name" - ${gt} --gtest_output=xml:${RAPIDS_TESTS_DIR} -done +ctest -j8 --output-on-failure rapids-logger "Test script exiting with value: $EXITCODE" exit ${EXITCODE} diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 77f571f705..87fc7ed782 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -13,27 +13,38 @@ # ============================================================================= # ################################################################################################## -# * compiler function ----------------------------------------------------------------------------- +# enable testing ################################################################################ +# ################################################################################################## +enable_testing() +include(rapids-test) +rapids_test_init() function(ConfigureTest) set(options OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY) - set(oneValueArgs NAME) + set(oneValueArgs NAME GPUS PERCENT) set(multiValueArgs PATH TARGETS CONFIGURATIONS) - cmake_parse_arguments(ConfigureTest "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - set(TEST_NAME ${ConfigureTest_NAME}) - - add_executable(${TEST_NAME} ${ConfigureTest_PATH}) + cmake_parse_arguments(_RAFT_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if(NOT DEFINED _RAFT_TEST_GPUS AND NOT DEFINED _RAFT_TEST_PERCENT) + set(_RAFT_TEST_GPUS 1) + set(_RAFT_TEST_PERCENT 30) + endif() + if(NOT DEFINED _RAFT_TEST_GPUS) + set(_RAFT_TEST_GPUS 1) + endif() + if(NOT DEFINED _RAFT_TEST_PERCENT) + set(_RAFT_TEST_PERCENT 100) + endif() - message("TEST PATH: ${ConfigureTest_PATH}") + set(TEST_NAME ${_RAFT_TEST_NAME}) + add_executable(${TEST_NAME} ${_RAFT_TEST_PATH}) target_link_libraries( ${TEST_NAME} PRIVATE raft raft_internal - $<$:raft::compiled> + $<$:raft::compiled> GTest::gtest GTest::gtest_main Threads::Threads @@ -41,35 +52,32 @@ function(ConfigureTest) $ $ ) - - add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME}) - set_target_properties( ${TEST_NAME} - PROPERTIES # set target compile options + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "$" INSTALL_RPATH "\$ORIGIN/../../../lib" CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON ) - target_compile_options( ${TEST_NAME} PRIVATE "$<$:${RAFT_CXX_FLAGS}>" "$<$:${RAFT_CUDA_FLAGS}>" ) - - if(ConfigureTest_EXPLICIT_INSTANTIATE_ONLY) + if(_RAFT_TEST_EXPLICIT_INSTANTIATE_ONLY) target_compile_definitions(${TEST_NAME} PRIVATE "RAFT_EXPLICIT_INSTANTIATE_ONLY") endif() target_include_directories(${TEST_NAME} PUBLIC "$") - install( - TARGETS ${TEST_NAME} - COMPONENT testing - DESTINATION bin/gtests/libraft - EXCLUDE_FROM_ALL + rapids_test_add( + NAME ${TEST_NAME} + COMMAND ${TEST_NAME} + GPUS ${_RAFT_TEST_GPUS} + PERCENT ${_RAFT_TEST_PERCENT} + INSTALL_COMPONENT_SET testing ) endfunction() @@ -90,7 +98,6 @@ if(BUILD_TESTS) test/cluster/cluster_solvers.cu test/cluster/linkage.cu test/cluster/kmeans_find_k.cu - OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY ) @@ -117,7 +124,6 @@ if(BUILD_TESTS) test/core/span.cu test/core/temporary_device_buffer.cu test/test.cpp - OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY ) @@ -147,7 +153,6 @@ if(BUILD_TESTS) test/distance/masked_nn_compress_to_bits.cu test/distance/fused_l2_nn.cu test/distance/gram.cu - OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY ) @@ -181,11 +186,11 @@ if(BUILD_TESTS) # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. ConfigureTest( - NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} OPTIONAL LIB + NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB EXPLICIT_INSTANTIATE_ONLY ) ConfigureTest( - NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} OPTIONAL LIB + NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB ) ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) @@ -244,15 +249,33 @@ if(BUILD_TESTS) test/matrix/matrix.cu test/matrix/norm.cu test/matrix/reverse.cu - test/matrix/select_k.cu test/matrix/slice.cu test/matrix/triangular.cu test/sparse/spectral_matrix.cu - OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY ) + + ConfigureTest( + NAME + MATRIX_SELECT_TEST + PATH + test/matrix/select_k.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + MATRIX_SELECT_LARGE_TEST + PATH + test/matrix/select_large_k.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( NAME RANDOM_TEST @@ -270,7 +293,7 @@ if(BUILD_TESTS) ConfigureTest( NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu - test/lap/lap.cu test/sparse/mst.cu OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY + test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY ) ConfigureTest( @@ -296,7 +319,7 @@ if(BUILD_TESTS) ConfigureTest( NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu - test/sparse/gram.cu OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY + test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY ) ConfigureTest( @@ -306,7 +329,6 @@ if(BUILD_TESTS) test/sparse/neighbors/connect_components.cu test/sparse/neighbors/brute_force.cu test/sparse/neighbors/knn_graph.cu - OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY ) @@ -315,6 +337,21 @@ if(BUILD_TESTS) NAME NEIGHBORS_TEST PATH + test/neighbors/knn.cu + test/neighbors/fused_l2_knn.cu + test/neighbors/tiled_knn.cu + test/neighbors/haversine.cu + test/neighbors/ball_cover.cu + test/neighbors/epsilon_neighborhood.cu + test/neighbors/refine.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + NEIGHBORS_ANN_CAGRA_TEST + PATH test/neighbors/ann_cagra/test_float_uint32_t.cu test/neighbors/ann_cagra/test_int8_t_uint32_t.cu test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu @@ -327,6 +364,15 @@ if(BUILD_TESTS) src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + GPUS 1 PERCENT 100 + ) + + ConfigureTest( + NAME + NEIGHBORS_ANN_IVF_TEST + PATH test/neighbors/ann_ivf_flat/test_float_int64_t.cu test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu @@ -335,17 +381,19 @@ if(BUILD_TESTS) test/neighbors/ann_ivf_pq/test_float_int64_t.cu test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu - test/neighbors/knn.cu - test/neighbors/fused_l2_knn.cu - test/neighbors/tiled_knn.cu - test/neighbors/haversine.cu - test/neighbors/ball_cover.cu - test/neighbors/epsilon_neighborhood.cu - test/neighbors/refine.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + GPUS 1 PERCENT 100 + ) + + ConfigureTest( + NAME + NEIGHBORS_SELECTION_TEST + PATH test/neighbors/selection.cu - OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY + GPUS 1 PERCENT 50 ) ConfigureTest( @@ -377,7 +425,6 @@ if(BUILD_TESTS) test/stats/trustworthiness.cu test/stats/weighted_mean.cu test/stats/v_measure.cu - OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY ) @@ -395,3 +442,8 @@ if(BUILD_TESTS) test/util/reduction.cu ) endif() + +# ################################################################################################## +# Install tests #################################################################################### +# ################################################################################################## +rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing DESTINATION bin/gtests/libraft) diff --git a/cpp/test/distance/gram.cu b/cpp/test/distance/gram.cu index b3640a888a..d5fecd93c6 100644 --- a/cpp/test/distance/gram.cu +++ b/cpp/test/distance/gram.cu @@ -75,9 +75,14 @@ template class GramMatrixTest : public ::testing::TestWithParam { protected: GramMatrixTest() - : params(GetParam()), stream(0), x1(0, stream), x2(0, stream), gram(0, stream), gram_host(0) + : params(GetParam()), + handle(), + x1(0, resource::get_cuda_stream(handle)), + x2(0, resource::get_cuda_stream(handle)), + gram(0, resource::get_cuda_stream(handle)), + gram_host(0) { - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + auto stream = resource::get_cuda_stream(handle); if (params.ld1 == 0) { params.ld1 = params.is_row_major ? params.n_cols : params.n1; } if (params.ld2 == 0) { params.ld2 = params.is_row_major ? params.n_cols : params.n2; } @@ -99,7 +104,7 @@ class GramMatrixTest : public ::testing::TestWithParam { r.uniform(x2.data(), x2.size(), math_t(0), math_t(1), stream); } - ~GramMatrixTest() override { RAFT_CUDA_TRY_NO_THROW(cudaStreamDestroy(stream)); } + ~GramMatrixTest() override {} void runTest() { @@ -127,6 +132,7 @@ class GramMatrixTest : public ::testing::TestWithParam { (*kernel)(handle, x1_span, x2_span, out_span); + auto stream = resource::get_cuda_stream(handle); naiveGramMatrixKernel(params.n1, params.n2, params.n_cols, @@ -142,16 +148,16 @@ class GramMatrixTest : public ::testing::TestWithParam { handle); ASSERT_TRUE(raft::devArrMatchHost( - gram_host.data(), gram.data(), gram.size(), raft::CompareApprox(1e-6f))); + gram_host.data(), gram.data(), gram.size(), raft::CompareApprox(1e-6f), stream)); } - raft::resources handle; - cudaStream_t stream = 0; GramMatrixInputs params; + raft::resources handle; rmm::device_uvector x1; rmm::device_uvector x2; rmm::device_uvector gram; + std::vector gram_host; }; diff --git a/cpp/test/matrix/select_k.cu b/cpp/test/matrix/select_k.cu index 487b6d0bfd..63f020b420 100644 --- a/cpp/test/matrix/select_k.cu +++ b/cpp/test/matrix/select_k.cu @@ -13,357 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#include "../test_utils.cuh" -#include - -#include - -#include -#include -#include -#include - -#include - -#include -#include - -#include -#include +#include "select_k.cuh" namespace raft::matrix { -template -auto gen_simple_ids(uint32_t batch_size, uint32_t len) -> std::vector -{ - std::vector out(batch_size * len); - auto s = rmm::cuda_stream_default; - rmm::device_uvector out_d(out.size(), s); - sparse::iota_fill(out_d.data(), IdxT(batch_size), IdxT(len), s); - update_host(out.data(), out_d.data(), out.size(), s); - s.synchronize(); - return out; -} - -template -struct io_simple { - public: - bool not_supported = false; - - io_simple(const select::params& spec, - const std::vector& in_dists, - const std::vector& out_dists, - const std::vector& out_ids) - : in_dists_(in_dists), - in_ids_(gen_simple_ids(spec.batch_size, spec.len)), - out_dists_(out_dists), - out_ids_(out_ids) - { - } - - auto get_in_dists() -> std::vector& { return in_dists_; } - auto get_in_ids() -> std::vector& { return in_ids_; } - auto get_out_dists() -> std::vector& { return out_dists_; } - auto get_out_ids() -> std::vector& { return out_ids_; } - - private: - std::vector in_dists_; - std::vector in_ids_; - std::vector out_dists_; - std::vector out_ids_; -}; - -template -struct io_computed { - public: - bool not_supported = false; - - io_computed(const select::params& spec, - const select::Algo& algo, - const std::vector& in_dists, - const std::optional>& in_ids = std::nullopt) - : in_dists_(in_dists), - in_ids_(in_ids.value_or(gen_simple_ids(spec.batch_size, spec.len))), - out_dists_(spec.batch_size * spec.k), - out_ids_(spec.batch_size * spec.k) - { - // check if the size is supported by the algorithm - switch (algo) { - case select::Algo::kWarpAuto: - case select::Algo::kWarpImmediate: - case select::Algo::kWarpFiltered: - case select::Algo::kWarpDistributed: - case select::Algo::kWarpDistributedShm: { - if (spec.k > raft::matrix::detail::select::warpsort::kMaxCapacity) { - not_supported = true; - return; - } - } break; - default: break; - } - - resources handle{}; - auto stream = resource::get_cuda_stream(handle); - - rmm::device_uvector in_dists_d(in_dists_.size(), stream); - rmm::device_uvector in_ids_d(in_ids_.size(), stream); - rmm::device_uvector out_dists_d(out_dists_.size(), stream); - rmm::device_uvector out_ids_d(out_ids_.size(), stream); - - update_device(in_dists_d.data(), in_dists_.data(), in_dists_.size(), stream); - update_device(in_ids_d.data(), in_ids_.data(), in_ids_.size(), stream); - - select::select_k_impl(handle, - algo, - in_dists_d.data(), - spec.use_index_input ? in_ids_d.data() : nullptr, - spec.batch_size, - spec.len, - spec.k, - out_dists_d.data(), - out_ids_d.data(), - spec.select_min); - - update_host(out_dists_.data(), out_dists_d.data(), out_dists_.size(), stream); - update_host(out_ids_.data(), out_ids_d.data(), out_ids_.size(), stream); - - interruptible::synchronize(stream); - - auto p = topk_sort_permutation(out_dists_, out_ids_, spec.k, spec.select_min); - apply_permutation(out_dists_, p); - apply_permutation(out_ids_, p); - } - - auto get_in_dists() -> std::vector& { return in_dists_; } - auto get_in_ids() -> std::vector& { return in_ids_; } - auto get_out_dists() -> std::vector& { return out_dists_; } - auto get_out_ids() -> std::vector& { return out_ids_; } - - private: - std::vector in_dists_; - std::vector in_ids_; - std::vector out_dists_; - std::vector out_ids_; - - auto topk_sort_permutation(const std::vector& vec, - const std::vector& inds, - uint32_t k, - bool select_min) -> std::vector - { - std::vector p(vec.size()); - std::iota(p.begin(), p.end(), 0); - if (select_min) { - std::sort(p.begin(), p.end(), [&vec, &inds, k](IdxT i, IdxT j) { - const IdxT ik = i / k; - const IdxT jk = j / k; - if (ik == jk) { - if (vec[i] == vec[j]) { return inds[i] < inds[j]; } - return vec[i] < vec[j]; - } - return ik < jk; - }); - } else { - std::sort(p.begin(), p.end(), [&vec, &inds, k](IdxT i, IdxT j) { - const IdxT ik = i / k; - const IdxT jk = j / k; - if (ik == jk) { - if (vec[i] == vec[j]) { return inds[i] < inds[j]; } - return vec[i] > vec[j]; - } - return ik < jk; - }); - } - return p; - } - - template - void apply_permutation(std::vector& vec, const std::vector& p) // NOLINT - { - for (auto i = IdxT(vec.size()) - 1; i > 0; i--) { - auto j = p[i]; - while (j > i) - j = p[j]; - std::swap(vec[j], vec[i]); - } - } -}; - -template - -using Params = std::tuple; - -template typename ParamsReader> -struct SelectK // NOLINT - : public testing::TestWithParam::params_t> { - const select::params spec; - const select::Algo algo; - typename ParamsReader::io_t ref; - io_computed res; - - explicit SelectK(Params::io_t> ps) - : spec(std::get<0>(ps)), - algo(std::get<1>(ps)), // NOLINT - ref(std::get<2>(ps)), // NOLINT - res(spec, algo, ref.get_in_dists(), ref.get_in_ids()) // NOLINT - { - } - - explicit SelectK(typename ParamsReader::params_t ps) - : SelectK(ParamsReader::read(ps)) - { - } - - SelectK() - : SelectK(testing::TestWithParam::params_t>::GetParam()) - { - } - - void run() - { - if (ref.not_supported || res.not_supported) { GTEST_SKIP(); } - ASSERT_TRUE(hostVecMatch(ref.get_out_dists(), res.get_out_dists(), Compare())); - - // If the dists (keys) are the same, different corresponding ids may end up in the selection due - // to non-deterministic nature of some implementations. - auto& in_ids = ref.get_in_ids(); - auto& in_dists = ref.get_in_dists(); - auto compare_ids = [&in_ids, &in_dists](const IdxT& i, const IdxT& j) { - if (i == j) return true; - auto ix_i = static_cast(std::find(in_ids.begin(), in_ids.end(), i) - in_ids.begin()); - auto ix_j = static_cast(std::find(in_ids.begin(), in_ids.end(), j) - in_ids.begin()); - if (static_cast(ix_i) >= in_ids.size() || static_cast(ix_j) >= in_ids.size()) - return false; - auto dist_i = in_dists[ix_i]; - auto dist_j = in_dists[ix_j]; - if (dist_i == dist_j) return true; - std::cout << "ERROR: ref[" << ix_i << "] = " << dist_i << " != " - << "res[" << ix_j << "] = " << dist_j << std::endl; - return false; - }; - ASSERT_TRUE(hostVecMatch(ref.get_out_ids(), res.get_out_ids(), compare_ids)); - } -}; - -template -struct params_simple { - using io_t = io_simple; - using input_t = - std::tuple, std::vector, std::vector>; - using params_t = std::tuple; - - static auto read(params_t ps) -> Params - { - auto ins = std::get<0>(ps); - auto algo = std::get<1>(ps); - return std::make_tuple( - std::get<0>(ins), - algo, - io_simple( - std::get<0>(ins), std::get<1>(ins), std::get<2>(ins), std::get<3>(ins))); - } -}; - -auto inputs_simple_f = testing::Values( - params_simple::input_t( - {5, 5, 5, true, true}, - {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, - 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, - {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, - 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0}, - {4, 3, 2, 1, 0, 0, 1, 2, 3, 4, 3, 0, 1, 4, 2, 4, 2, 1, 3, 0, 0, 2, 1, 4, 3}), - params_simple::input_t( - {5, 5, 3, true, true}, - {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, - 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, - {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, - {4, 3, 2, 0, 1, 2, 3, 0, 1, 4, 2, 1, 0, 2, 1}), - params_simple::input_t( - {5, 5, 5, true, false}, - {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, - 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, - {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, - 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0}, - {4, 3, 2, 1, 0, 0, 1, 2, 3, 4, 3, 0, 1, 4, 2, 4, 2, 1, 3, 0, 0, 2, 1, 4, 3}), - params_simple::input_t( - {5, 5, 3, true, false}, - {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, - 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, - {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, - {4, 3, 2, 0, 1, 2, 3, 0, 1, 4, 2, 1, 0, 2, 1}), - params_simple::input_t( - {5, 7, 3, true, true}, - {5.0, 4.0, 3.0, 2.0, 1.3, 7.5, 19.0, 9.0, 2.0, 3.0, 3.0, 5.0, 6.0, 4.0, 2.0, 3.0, 5.0, 1.0, - 4.0, 1.0, 1.0, 5.0, 7.0, 2.5, 4.0, 7.0, 8.0, 8.0, 1.0, 3.0, 2.0, 5.0, 4.0, 1.1, 1.2}, - {1.3, 2.0, 3.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.5, 4.0, 5.0, 1.0, 1.1, 1.2}, - {4, 3, 2, 1, 2, 3, 3, 5, 6, 2, 3, 0, 0, 5, 6}), - params_simple::input_t( - {1, 7, 3, true, true}, {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {3, 5, 6}), - params_simple::input_t( - {1, 7, 3, false, false}, {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, {5.0, 4.0, 3.0}, {2, 4, 1}), - params_simple::input_t( - {1, 7, 3, false, true}, {2.0, 3.0, 5.0, 9.0, 4.0, 9.0, 9.0}, {9.0, 9.0, 9.0}, {3, 5, 6}), - params_simple::input_t( - {1, 130, 5, false, true}, - {19, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, - 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 4, 4, 2, 3, 2, 3, 2, 3, 2, 3, 2, 20}, - {20, 19, 18, 17, 16}, - {129, 0, 117, 116, 115}), - params_simple::input_t( - {1, 130, 15, false, true}, - {19, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, - 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 4, 4, 2, 3, 2, 3, 2, 3, 2, 3, 2, 20}, - {20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6}, - {129, 0, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105})); - -using SimpleFloatInt = SelectK; -TEST_P(SimpleFloatInt, Run) { run(); } // NOLINT -INSTANTIATE_TEST_CASE_P( // NOLINT - SelectK, - SimpleFloatInt, - testing::Combine(inputs_simple_f, - testing::Values(select::Algo::kPublicApi, - select::Algo::kRadix8bits, - select::Algo::kRadix11bits, - select::Algo::kRadix11bitsExtraPass, - select::Algo::kWarpImmediate, - select::Algo::kWarpFiltered, - select::Algo::kWarpDistributed))); - -template -struct with_ref { - template - struct params_random { - using io_t = io_computed; - using params_t = std::tuple; - - static auto read(params_t ps) -> Params - { - auto spec = std::get<0>(ps); - auto algo = std::get<1>(ps); - std::vector dists(spec.len * spec.batch_size); - - raft::resources handle; - { - auto s = resource::get_cuda_stream(handle); - rmm::device_uvector dists_d(spec.len * spec.batch_size, s); - raft::random::RngState r(42); - normal(handle, r, dists_d.data(), dists_d.size(), KeyT(10.0), KeyT(100.0)); - update_host(dists.data(), dists_d.data(), dists_d.size(), s); - s.synchronize(); - } - - return std::make_tuple(spec, algo, io_computed(spec, RefAlgo, dists)); - } - }; -}; - auto inputs_random_longlist = testing::Values(select::params{1, 130, 15, false}, select::params{1, 128, 15, false}, select::params{20, 700, 1, true}, @@ -412,7 +65,7 @@ auto inputs_random_largesize = testing::Values(select::params{100, 100000, 1, tr select::params{1, 1000000000, 256, false, false}); auto inputs_random_largek = testing::Values(select::params{100, 100000, 1000, true}, - select::params{100, 100000, 2000, true}, + select::params{100, 100000, 2000, false}, select::params{100, 100000, 100000, true, false}, select::params{100, 100000, 2048, false}, select::params{100, 100000, 1237, true}); @@ -458,14 +111,4 @@ INSTANTIATE_TEST_CASE_P( // NOLINT select::Algo::kRadix8bits, select::Algo::kRadix11bits, select::Algo::kRadix11bitsExtraPass))); - -using ReferencedRandomFloatSizeT = - SelectK::params_random>; -TEST_P(ReferencedRandomFloatSizeT, LargeK) { run(); } // NOLINT -INSTANTIATE_TEST_CASE_P(SelectK, // NOLINT - ReferencedRandomFloatSizeT, - testing::Combine(inputs_random_largek, - testing::Values(select::Algo::kRadix11bits, - select::Algo::kRadix11bitsExtraPass))); - } // namespace raft::matrix diff --git a/cpp/test/matrix/select_k.cuh b/cpp/test/matrix/select_k.cuh new file mode 100644 index 0000000000..e0e0cad225 --- /dev/null +++ b/cpp/test/matrix/select_k.cuh @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" +#include + +#include + +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include + +namespace raft::matrix { + +template +auto gen_simple_ids(uint32_t batch_size, uint32_t len) -> std::vector +{ + std::vector out(batch_size * len); + auto s = rmm::cuda_stream_default; + rmm::device_uvector out_d(out.size(), s); + sparse::iota_fill(out_d.data(), IdxT(batch_size), IdxT(len), s); + update_host(out.data(), out_d.data(), out.size(), s); + s.synchronize(); + return out; +} + +template +struct io_simple { + public: + bool not_supported = false; + + io_simple(const select::params& spec, + const std::vector& in_dists, + const std::vector& out_dists, + const std::vector& out_ids) + : in_dists_(in_dists), + in_ids_(gen_simple_ids(spec.batch_size, spec.len)), + out_dists_(out_dists), + out_ids_(out_ids) + { + } + + auto get_in_dists() -> std::vector& { return in_dists_; } + auto get_in_ids() -> std::vector& { return in_ids_; } + auto get_out_dists() -> std::vector& { return out_dists_; } + auto get_out_ids() -> std::vector& { return out_ids_; } + + private: + std::vector in_dists_; + std::vector in_ids_; + std::vector out_dists_; + std::vector out_ids_; +}; + +template +struct io_computed { + public: + bool not_supported = false; + + io_computed(const select::params& spec, + const select::Algo& algo, + const std::vector& in_dists, + const std::optional>& in_ids = std::nullopt) + : in_dists_(in_dists), + in_ids_(in_ids.value_or(gen_simple_ids(spec.batch_size, spec.len))), + out_dists_(spec.batch_size * spec.k), + out_ids_(spec.batch_size * spec.k) + { + // check if the size is supported by the algorithm + switch (algo) { + case select::Algo::kWarpAuto: + case select::Algo::kWarpImmediate: + case select::Algo::kWarpFiltered: + case select::Algo::kWarpDistributed: + case select::Algo::kWarpDistributedShm: { + if (spec.k > raft::matrix::detail::select::warpsort::kMaxCapacity) { + not_supported = true; + return; + } + } break; + default: break; + } + + resources handle{}; + auto stream = resource::get_cuda_stream(handle); + + rmm::device_uvector in_dists_d(in_dists_.size(), stream); + rmm::device_uvector in_ids_d(in_ids_.size(), stream); + rmm::device_uvector out_dists_d(out_dists_.size(), stream); + rmm::device_uvector out_ids_d(out_ids_.size(), stream); + + update_device(in_dists_d.data(), in_dists_.data(), in_dists_.size(), stream); + update_device(in_ids_d.data(), in_ids_.data(), in_ids_.size(), stream); + + select::select_k_impl(handle, + algo, + in_dists_d.data(), + spec.use_index_input ? in_ids_d.data() : nullptr, + spec.batch_size, + spec.len, + spec.k, + out_dists_d.data(), + out_ids_d.data(), + spec.select_min); + + update_host(out_dists_.data(), out_dists_d.data(), out_dists_.size(), stream); + update_host(out_ids_.data(), out_ids_d.data(), out_ids_.size(), stream); + + interruptible::synchronize(stream); + + auto p = topk_sort_permutation(out_dists_, out_ids_, spec.k, spec.select_min); + apply_permutation(out_dists_, p); + apply_permutation(out_ids_, p); + } + + auto get_in_dists() -> std::vector& { return in_dists_; } + auto get_in_ids() -> std::vector& { return in_ids_; } + auto get_out_dists() -> std::vector& { return out_dists_; } + auto get_out_ids() -> std::vector& { return out_ids_; } + + private: + std::vector in_dists_; + std::vector in_ids_; + std::vector out_dists_; + std::vector out_ids_; + + auto topk_sort_permutation(const std::vector& vec, + const std::vector& inds, + uint32_t k, + bool select_min) -> std::vector + { + std::vector p(vec.size()); + std::iota(p.begin(), p.end(), 0); + if (select_min) { + std::sort(p.begin(), p.end(), [&vec, &inds, k](IdxT i, IdxT j) { + const IdxT ik = i / k; + const IdxT jk = j / k; + if (ik == jk) { + if (vec[i] == vec[j]) { return inds[i] < inds[j]; } + return vec[i] < vec[j]; + } + return ik < jk; + }); + } else { + std::sort(p.begin(), p.end(), [&vec, &inds, k](IdxT i, IdxT j) { + const IdxT ik = i / k; + const IdxT jk = j / k; + if (ik == jk) { + if (vec[i] == vec[j]) { return inds[i] < inds[j]; } + return vec[i] > vec[j]; + } + return ik < jk; + }); + } + return p; + } + + template + void apply_permutation(std::vector& vec, const std::vector& p) // NOLINT + { + for (auto i = IdxT(vec.size()) - 1; i > 0; i--) { + auto j = p[i]; + while (j > i) + j = p[j]; + std::swap(vec[j], vec[i]); + } + } +}; + +template +using Params = std::tuple; + +template typename ParamsReader> +struct SelectK // NOLINT + : public testing::TestWithParam::params_t> { + const select::params spec; + const select::Algo algo; + typename ParamsReader::io_t ref; + io_computed res; + + explicit SelectK(Params::io_t> ps) + : spec(std::get<0>(ps)), + algo(std::get<1>(ps)), // NOLINT + ref(std::get<2>(ps)), // NOLINT + res(spec, algo, ref.get_in_dists(), ref.get_in_ids()) // NOLINT + { + } + + explicit SelectK(typename ParamsReader::params_t ps) + : SelectK(ParamsReader::read(ps)) + { + } + + SelectK() + : SelectK(testing::TestWithParam::params_t>::GetParam()) + { + } + + void run() + { + if (ref.not_supported || res.not_supported) { GTEST_SKIP(); } + ASSERT_TRUE(hostVecMatch(ref.get_out_dists(), res.get_out_dists(), Compare())); + + // If the dists (keys) are the same, different corresponding ids may end up in the selection due + // to non-deterministic nature of some implementations. + auto& in_ids = ref.get_in_ids(); + auto& in_dists = ref.get_in_dists(); + auto compare_ids = [&in_ids, &in_dists](const IdxT& i, const IdxT& j) { + if (i == j) return true; + auto ix_i = static_cast(std::find(in_ids.begin(), in_ids.end(), i) - in_ids.begin()); + auto ix_j = static_cast(std::find(in_ids.begin(), in_ids.end(), j) - in_ids.begin()); + if (static_cast(ix_i) >= in_ids.size() || static_cast(ix_j) >= in_ids.size()) + return false; + auto dist_i = in_dists[ix_i]; + auto dist_j = in_dists[ix_j]; + if (dist_i == dist_j) return true; + std::cout << "ERROR: ref[" << ix_i << "] = " << dist_i << " != " + << "res[" << ix_j << "] = " << dist_j << std::endl; + return false; + }; + ASSERT_TRUE(hostVecMatch(ref.get_out_ids(), res.get_out_ids(), compare_ids)); + } +}; + +template +struct params_simple { + using io_t = io_simple; + using input_t = + std::tuple, std::vector, std::vector>; + using params_t = std::tuple; + + static auto read(params_t ps) -> Params + { + auto ins = std::get<0>(ps); + auto algo = std::get<1>(ps); + return std::make_tuple( + std::get<0>(ins), + algo, + io_simple( + std::get<0>(ins), std::get<1>(ins), std::get<2>(ins), std::get<3>(ins))); + } +}; + +auto inputs_simple_f = testing::Values( + params_simple::input_t( + {5, 5, 5, true, true}, + {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, + 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, + 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0}, + {4, 3, 2, 1, 0, 0, 1, 2, 3, 4, 3, 0, 1, 4, 2, 4, 2, 1, 3, 0, 0, 2, 1, 4, 3}), + params_simple::input_t( + {5, 5, 3, true, true}, + {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, + 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, + {4, 3, 2, 0, 1, 2, 3, 0, 1, 4, 2, 1, 0, 2, 1}), + params_simple::input_t( + {5, 5, 5, true, false}, + {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, + 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, + 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0}, + {4, 3, 2, 1, 0, 0, 1, 2, 3, 4, 3, 0, 1, 4, 2, 4, 2, 1, 3, 0, 0, 2, 1, 4, 3}), + params_simple::input_t( + {5, 5, 3, true, false}, + {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, + 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, + {4, 3, 2, 0, 1, 2, 3, 0, 1, 4, 2, 1, 0, 2, 1}), + params_simple::input_t( + {5, 7, 3, true, true}, + {5.0, 4.0, 3.0, 2.0, 1.3, 7.5, 19.0, 9.0, 2.0, 3.0, 3.0, 5.0, 6.0, 4.0, 2.0, 3.0, 5.0, 1.0, + 4.0, 1.0, 1.0, 5.0, 7.0, 2.5, 4.0, 7.0, 8.0, 8.0, 1.0, 3.0, 2.0, 5.0, 4.0, 1.1, 1.2}, + {1.3, 2.0, 3.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.5, 4.0, 5.0, 1.0, 1.1, 1.2}, + {4, 3, 2, 1, 2, 3, 3, 5, 6, 2, 3, 0, 0, 5, 6}), + params_simple::input_t( + {1, 7, 3, true, true}, {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {3, 5, 6}), + params_simple::input_t( + {1, 7, 3, false, false}, {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, {5.0, 4.0, 3.0}, {2, 4, 1}), + params_simple::input_t( + {1, 7, 3, false, true}, {2.0, 3.0, 5.0, 9.0, 4.0, 9.0, 9.0}, {9.0, 9.0, 9.0}, {3, 5, 6}), + params_simple::input_t( + {1, 130, 5, false, true}, + {19, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 4, 4, 2, 3, 2, 3, 2, 3, 2, 3, 2, 20}, + {20, 19, 18, 17, 16}, + {129, 0, 117, 116, 115}), + params_simple::input_t( + {1, 130, 15, false, true}, + {19, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 4, 4, 2, 3, 2, 3, 2, 3, 2, 3, 2, 20}, + {20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6}, + {129, 0, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105})); + +using SimpleFloatInt = SelectK; +TEST_P(SimpleFloatInt, Run) { run(); } // NOLINT +INSTANTIATE_TEST_CASE_P( // NOLINT + SelectK, + SimpleFloatInt, + testing::Combine(inputs_simple_f, + testing::Values(select::Algo::kPublicApi, + select::Algo::kRadix8bits, + select::Algo::kRadix11bits, + select::Algo::kRadix11bitsExtraPass, + select::Algo::kWarpImmediate, + select::Algo::kWarpFiltered, + select::Algo::kWarpDistributed))); + +template +struct with_ref { + template + struct params_random { + using io_t = io_computed; + using params_t = std::tuple; + + static auto read(params_t ps) -> Params + { + auto spec = std::get<0>(ps); + auto algo = std::get<1>(ps); + std::vector dists(spec.len * spec.batch_size); + + raft::resources handle; + { + auto s = resource::get_cuda_stream(handle); + rmm::device_uvector dists_d(spec.len * spec.batch_size, s); + raft::random::RngState r(42); + normal(handle, r, dists_d.data(), dists_d.size(), KeyT(10.0), KeyT(100.0)); + update_host(dists.data(), dists_d.data(), dists_d.size(), s); + s.synchronize(); + } + + return std::make_tuple(spec, algo, io_computed(spec, RefAlgo, dists)); + } + }; +}; + +} // namespace raft::matrix diff --git a/cpp/test/matrix/select_large_k.cu b/cpp/test/matrix/select_large_k.cu new file mode 100644 index 0000000000..2772e84eb3 --- /dev/null +++ b/cpp/test/matrix/select_large_k.cu @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "select_k.cuh" + +namespace raft::matrix { + +auto inputs_random_largek = testing::Values(select::params{100, 100000, 1000, true}, + select::params{100, 100000, 2000, false}, + select::params{100, 100000, 100000, true, false}, + select::params{100, 100000, 2048, false}, + select::params{100, 100000, 1237, true}); + +using ReferencedRandomFloatSizeT = + SelectK::params_random>; +TEST_P(ReferencedRandomFloatSizeT, LargeK) { run(); } // NOLINT +INSTANTIATE_TEST_CASE_P(SelectK, // NOLINT + ReferencedRandomFloatSizeT, + testing::Combine(inputs_random_largek, + testing::Values(select::Algo::kRadix11bits, + select::Algo::kRadix11bitsExtraPass))); + +} // namespace raft::matrix diff --git a/cpp/test/neighbors/selection.cu b/cpp/test/neighbors/selection.cu index 5d63338b45..6030e2a1a6 100644 --- a/cpp/test/neighbors/selection.cu +++ b/cpp/test/neighbors/selection.cu @@ -441,7 +441,7 @@ auto inputs_random_largesize = testing::Values(SelectTestSpec{100, 100000, 1, tr SelectTestSpec{1, 100000000, 256, false, false}); auto inputs_random_largek = testing::Values(SelectTestSpec{100, 100000, 1000, true}, - SelectTestSpec{100, 100000, 2000, true}, + SelectTestSpec{100, 100000, 2000, false}, SelectTestSpec{100, 100000, 100000, true, false}, SelectTestSpec{100, 100000, 2048, false}, SelectTestSpec{100, 100000, 1237, true}); @@ -482,6 +482,11 @@ INSTANTIATE_TEST_CASE_P(SelectionTest, * SelectionTest/ReferencedRandomFloatSizeT.LargeK/0 * Indicices do not match! ref[91628] = 131.359 != res[36504] = 158.438 * Actual: false (actual=36504 != expected=91628 @38999; + * + * SelectionTest/ReferencedRandomFloatSizeT.LargeK/1 + * ERROR: ref[57977] = 58.9079 != res[21973] = 54.9354 + * Actual: false (actual=21973 != expected=57977 @107999; + * */ typedef SelectionTest::params_random> ReferencedRandomFloatSizeT; diff --git a/cpp/test/random/rng_discrete.cu b/cpp/test/random/rng_discrete.cu index 799f44735e..d1293f34ea 100644 --- a/cpp/test/random/rng_discrete.cu +++ b/cpp/test/random/rng_discrete.cu @@ -193,15 +193,16 @@ const std::vector> inputs_i64 = { {1, 10000, 5, 5, GenPhilox, 1234ULL}, }; -#define RNG_DISCRETE_TEST(test_type, test_name, test_inputs) \ - typedef RAFT_DEPAREN(test_type) test_name; \ - TEST_P(test_name, Result) \ - { \ - ASSERT_TRUE(devArrMatchHost(exp_histogram.data(), \ - histogram.data(), \ - exp_histogram.size(), \ - CompareApprox(tolerance))); \ - } \ +#define RNG_DISCRETE_TEST(test_type, test_name, test_inputs) \ + typedef RAFT_DEPAREN(test_type) test_name; \ + TEST_P(test_name, Result) \ + { \ + ASSERT_TRUE(devArrMatchHost(exp_histogram.data(), \ + histogram.data(), \ + exp_histogram.size(), \ + CompareApprox(tolerance), \ + stream)); \ + } \ INSTANTIATE_TEST_CASE_P(ReduceTests, test_name, ::testing::ValuesIn(test_inputs)) RNG_DISCRETE_TEST((RngDiscreteTest), RngDiscreteTestI32FI32, inputs_i32); diff --git a/cpp/test/sparse/gram.cu b/cpp/test/sparse/gram.cu index 87cebd3519..7b4736a08c 100644 --- a/cpp/test/sparse/gram.cu +++ b/cpp/test/sparse/gram.cu @@ -157,6 +157,8 @@ class GramMatrixTest : public ::testing::TestWithParam { raft::random::Rng r(42137ULL); r.uniform(x1.data(), x1.size(), math_t(0), math_t(1), stream); r.uniform(x2.data(), x2.size(), math_t(0), math_t(1), stream); + + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); } ~GramMatrixTest() override { RAFT_CUDA_TRY_NO_THROW(cudaStreamDestroy(stream)); } @@ -204,7 +206,6 @@ class GramMatrixTest : public ::testing::TestWithParam { raft::update_device(indices, indices_host.data(), nnz, stream); raft::update_device(data, data_host.data(), nnz, stream); resource::sync_stream(handle, stream); - return nnz; } @@ -273,7 +274,9 @@ class GramMatrixTest : public ::testing::TestWithParam { (*kernel)(handle, x1_csr, x2_csr, out_span); } } - + // Something in gram is executing not on the 'stream' and therefore + // a full device sync is required + RAFT_CUDA_TRY(cudaDeviceSynchronize()); naiveGramMatrixKernel(params.n1, params.n2, params.n_cols, @@ -287,11 +290,10 @@ class GramMatrixTest : public ::testing::TestWithParam { params.kernel, stream, handle); - resource::sync_stream(handle, stream); ASSERT_TRUE(raft::devArrMatchHost( - gram_host.data(), gram.data(), gram.size(), raft::CompareApprox(1e-6f))); + gram_host.data(), gram.data(), gram.size(), raft::CompareApprox(1e-6f), stream)); } raft::resources handle; diff --git a/cpp/test/util/device_atomics.cu b/cpp/test/util/device_atomics.cu index 5e8a67c8f6..355cb0d4dd 100644 --- a/cpp/test/util/device_atomics.cu +++ b/cpp/test/util/device_atomics.cu @@ -51,12 +51,12 @@ TEST(Raft, AtomicIncWarp) // Write all 1M thread indices to a unique location in `out_device` test_atomic_inc_warp_kernel<<>>(counter.data(), out_device.data()); - // Copy data to host - RAFT_CUDA_TRY(cudaMemcpy(out_host.data(), - (const void*)out_device.data(), - num_elts * sizeof(int), - cudaMemcpyDeviceToHost)); + RAFT_CUDA_TRY(cudaMemcpyAsync(out_host.data(), + (const void*)out_device.data(), + num_elts * sizeof(int), + cudaMemcpyDeviceToHost, + s)); // Check that count is correct and that each thread index is contained in the // array exactly once. diff --git a/dependencies.yaml b/dependencies.yaml index 200aa52596..7466947ce6 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -109,7 +109,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - cmake>=3.23.1,!=3.25.0 + - &cmake_ver cmake>=3.23.1,!=3.25.0 - cython>=0.29,<0.30 - ninja - scikit-build>=0.13.1 @@ -246,6 +246,7 @@ dependencies: common: - output_types: [conda] packages: + - *cmake_ver - gtest>=1.13.0 - gmock>=1.13.0 docs: