From 4c797bfa251d36f57870cc9ca8636d3098be964c Mon Sep 17 00:00:00 2001 From: Paul Taylor <178183+trxcllnt@users.noreply.github.com> Date: Wed, 29 May 2024 11:07:57 -0700 Subject: [PATCH] Fix building cugraph with CCCL main (#4404) Similar to https://github.com/rapidsai/cudf/pull/15552, we are testing [building RAPIDS with CCCL's main branch](https://github.com/NVIDIA/cccl/pull/1667) to get ahead of any breaking changes. Authors: - Paul Taylor (https://github.com/trxcllnt) - Ralph Liu (https://github.com/nv-rliu) - Seunghwa Kang (https://github.com/seunghwak) - Ray Bell (https://github.com/raybellwaves) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cugraph/pull/4404 --- .devcontainer/Dockerfile | 5 + .../cuda11.8-conda/devcontainer.json | 2 +- .devcontainer/cuda11.8-pip/devcontainer.json | 8 +- .../cuda12.2-conda/devcontainer.json | 2 +- .devcontainer/cuda12.2-pip/devcontainer.json | 8 +- .github/workflows/pr.yaml | 2 +- cpp/CMakeLists.txt | 4 +- .../cugraph/utilities/device_functors.cuh | 9 +- cpp/include/cugraph/utilities/mask_utils.cuh | 5 +- cpp/src/community/detail/common_methods.cuh | 3 +- cpp/src/community/legacy/louvain.cuh | 15 ++- .../weakly_connected_components_impl.cuh | 15 ++- cpp/src/detail/utility_wrappers.cu | 4 +- cpp/src/prims/kv_store.cuh | 1 + ...m_reduce_dst_key_aggregated_outgoing_e.cuh | 2 +- cpp/src/structure/graph_view_impl.cuh | 36 +++--- cpp/tests/CMakeLists.txt | 24 +++- cpp/tests/prims/mg_extract_transform_e.cu | 109 +++++------------- .../sampling/sampling_post_processing_test.cu | 38 +++--- 19 files changed, 138 insertions(+), 154 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 3d0ac075be3..190003dd7af 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -7,6 +7,11 @@ FROM ${BASE} as pip-base ENV DEFAULT_VIRTUAL_ENV=rapids +RUN apt update -y \ + && DEBIAN_FRONTEND=noninteractive apt install -y \ + libblas-dev liblapack-dev \ + && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*; + FROM ${BASE} as conda-base ENV DEFAULT_CONDA_ENV=rapids diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 7c9cd0258a4..d878f2d6584 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index a4dc168505b..a0edcb27df8 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,19 +5,16 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ucx1.15.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { - "version": "1.15.0" - }, "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": { "version": "11.8", "installcuBLAS": true, @@ -28,7 +25,6 @@ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ - "ghcr.io/rapidsai/devcontainers/features/ucx", "ghcr.io/rapidsai/devcontainers/features/cuda", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json index eae4967f3b2..8a095d9b934 100644 --- a/.devcontainer/cuda12.2-conda/devcontainer.json +++ b/.devcontainer/cuda12.2-conda/devcontainer.json @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json index 393a5c63d23..10436f8b28d 100644 --- a/.devcontainer/cuda12.2-pip/devcontainer.json +++ b/.devcontainer/cuda12.2-pip/devcontainer.json @@ -5,19 +5,16 @@ "args": { "CUDA": "12.2", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ucx1.15.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { - "version": "1.15.0" - }, "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": { "version": "12.2", "installcuBLAS": true, @@ -28,7 +25,6 @@ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ - "ghcr.io/rapidsai/devcontainers/features/ucx", "ghcr.io/rapidsai/devcontainers/features/cuda", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index c04e0e879d2..5733646a8b9 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -196,5 +196,5 @@ jobs: extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY build_command: | sccache -z; - build-all --verbose -j$(nproc --ignore=1); + build-all --verbose -j$(nproc --ignore=1) -DBUILD_CUGRAPH_MG_TESTS=ON; sccache -s; diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2527599fece..7dca3d983a5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -92,14 +92,14 @@ set(CUGRAPH_CXX_FLAGS "") set(CUGRAPH_CUDA_FLAGS "") if(CMAKE_COMPILER_IS_GNUCXX) - list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations) + list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS) endif(CMAKE_COMPILER_IS_GNUCXX) message("-- Building for GPU_ARCHS = ${CMAKE_CUDA_ARCHITECTURES}") list(APPEND CUGRAPH_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) -list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas=--disable-warnings) +list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS -Xptxas=--disable-warnings) list(APPEND CUGRAPH_CUDA_FLAGS -Xcompiler=-Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable) list(APPEND CUGRAPH_CUDA_FLAGS -Xfatbin=-compress-all) diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh index 3af8ed1dd19..20cf98f7e6d 100644 --- a/cpp/include/cugraph/utilities/device_functors.cuh +++ b/cpp/include/cugraph/utilities/device_functors.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,13 +78,14 @@ struct indirection_t { template struct indirection_if_idx_valid_t { + using value_type = typename thrust::iterator_traits::value_type; Iterator first{}; index_t invalid_idx{}; - typename thrust::iterator_traits::value_type invalid_value{}; + value_type invalid_value{}; - __device__ typename thrust::iterator_traits::value_type operator()(index_t i) const + __device__ value_type operator()(index_t i) const { - return (i != invalid_idx) ? *(first + i) : invalid_value; + return (i != invalid_idx) ? static_cast(*(first + i)) : invalid_value; } }; diff --git a/cpp/include/cugraph/utilities/mask_utils.cuh b/cpp/include/cugraph/utilities/mask_utils.cuh index 7b69ea3fe3a..1d86eef0ed1 100644 --- a/cpp/include/cugraph/utilities/mask_utils.cuh +++ b/cpp/include/cugraph/utilities/mask_utils.cuh @@ -20,6 +20,7 @@ #include +#include #include #include #include @@ -160,13 +161,13 @@ size_t count_set_bits(raft::handle_t const& handle, MaskIterator mask_first, siz handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(packed_bool_size(num_bits)), - [mask_first, num_bits] __device__(size_t i) { + cuda::proclaim_return_type([mask_first, num_bits] __device__(size_t i) -> size_t { auto word = *(mask_first + i); if ((i + 1) * packed_bools_per_word() > num_bits) { word &= packed_bool_partial_mask(num_bits % packed_bools_per_word()); } return static_cast(__popc(word)); - }, + }), size_t{0}, thrust::plus{}); } diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh index fe0a415db30..dcad4e92b95 100644 --- a/cpp/src/community/detail/common_methods.cuh +++ b/cpp/src/community/detail/common_methods.cuh @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -178,7 +179,7 @@ weight_t compute_modularity( handle.get_thrust_policy(), cluster_weights.begin(), cluster_weights.end(), - [] __device__(weight_t p) { return p * p; }, + cuda::proclaim_return_type([] __device__(weight_t p) -> weight_t { return p * p; }), weight_t{0}, thrust::plus()); diff --git a/cpp/src/community/legacy/louvain.cuh b/cpp/src/community/legacy/louvain.cuh index 6cf5bbdc3c6..53d0b231c03 100644 --- a/cpp/src/community/legacy/louvain.cuh +++ b/cpp/src/community/legacy/louvain.cuh @@ -22,6 +22,7 @@ #include #include + #ifdef TIMING #include #endif @@ -29,6 +30,7 @@ #include #include +#include #include #include #include @@ -141,12 +143,13 @@ class Louvain { handle_.get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_vertices), - [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__( - vertex_t community) { - return ((d_inc[community] / total_edge_weight) - resolution * - (d_deg[community] * d_deg[community]) / - (total_edge_weight * total_edge_weight)); - }, + cuda::proclaim_return_type( + [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__( + vertex_t community) -> weight_t { + return ((d_inc[community] / total_edge_weight) - + resolution * (d_deg[community] * d_deg[community]) / + (total_edge_weight * total_edge_weight)); + }), weight_t{0.0}, thrust::plus()); diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh index d4d6d842951..f63f28210d8 100644 --- a/cpp/src/components/weakly_connected_components_impl.cuh +++ b/cpp/src/components/weakly_connected_components_impl.cuh @@ -34,6 +34,7 @@ #include +#include #include #include #include @@ -400,9 +401,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle, handle.get_thrust_policy(), new_root_candidates.begin(), new_root_candidates.begin() + (new_root_candidates.size() > 0 ? 1 : 0), - [vertex_partition, degrees = degrees.data()] __device__(auto v) { - return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)]; - }, + cuda::proclaim_return_type( + [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t { + return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)]; + }), edge_t{0}, thrust::plus{}); @@ -642,9 +644,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).begin().get_iterator_tuple()), thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).end().get_iterator_tuple()), - [vertex_partition, degrees = degrees.data()] __device__(auto v) { - return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)]; - }, + cuda::proclaim_return_type( + [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t { + return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)]; + }), edge_t{0}, thrust::plus()); diff --git a/cpp/src/detail/utility_wrappers.cu b/cpp/src/detail/utility_wrappers.cu index 9100ecbd5e1..6d6158a16e7 100644 --- a/cpp/src/detail/utility_wrappers.cu +++ b/cpp/src/detail/utility_wrappers.cu @@ -21,6 +21,7 @@ #include +#include #include #include #include @@ -139,7 +140,8 @@ vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view, rmm::exec_policy(stream_view), edge_first, edge_first + num_edges, - [] __device__(auto e) { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }, + cuda::proclaim_return_type( + [] __device__(auto e) -> vertex_t { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }), vertex_t{0}, thrust::maximum()); } diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh index 5001a20bb83..de233fd583b 100644 --- a/cpp/src/prims/kv_store.cuh +++ b/cpp/src/prims/kv_store.cuh @@ -17,6 +17,7 @@ #include "prims/detail/optional_dataframe_buffer.hpp" +#include #include #include diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 006d7760666..7be30b0a5f0 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -754,7 +754,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( std::make_unique>( std::move(majors), std::move(edge_major_values), - invalid_vertex_id::value, + edge_src_value_t{}, true, handle.get_stream()); } diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 29dca6ef409..7097349dce5 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -353,7 +353,7 @@ edge_t count_edge_partition_multi_edges( execution_policy, thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[2], thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[3], - [edge_partition] __device__(auto major) { + cuda::proclaim_return_type([edge_partition] __device__(auto major) -> edge_t { auto major_offset = edge_partition.major_offset_from_major_nocheck(major); vertex_t const* indices{nullptr}; [[maybe_unused]] edge_t edge_offset{}; @@ -365,7 +365,7 @@ edge_t count_edge_partition_multi_edges( if (indices[i - 1] == indices[i]) { ++count; } } return count; - }, + }), edge_t{0}, thrust::plus{}); } @@ -374,19 +374,21 @@ edge_t count_edge_partition_multi_edges( execution_policy, thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(*(edge_partition.dcs_nzd_vertex_count())), - [edge_partition, major_start_offset = (*segment_offsets)[3]] __device__(auto idx) { - auto major_idx = - major_start_offset + idx; // major_offset != major_idx in the hypersparse region - vertex_t const* indices{nullptr}; - [[maybe_unused]] edge_t edge_offset{}; - edge_t local_degree{}; - thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx); - edge_t count{0}; - for (edge_t i = 1; i < local_degree; ++i) { // assumes neighbors are sorted - if (indices[i - 1] == indices[i]) { ++count; } - } - return count; - }, + cuda::proclaim_return_type( + [edge_partition, + major_start_offset = (*segment_offsets)[3]] __device__(auto idx) -> edge_t { + auto major_idx = + major_start_offset + idx; // major_offset != major_idx in the hypersparse region + vertex_t const* indices{nullptr}; + [[maybe_unused]] edge_t edge_offset{}; + edge_t local_degree{}; + thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx); + edge_t count{0}; + for (edge_t i = 1; i < local_degree; ++i) { // assumes neighbors are sorted + if (indices[i - 1] == indices[i]) { ++count; } + } + return count; + }), edge_t{0}, thrust::plus{}); } @@ -398,7 +400,7 @@ edge_t count_edge_partition_multi_edges( thrust::make_counting_iterator(edge_partition.major_range_first()), thrust::make_counting_iterator(edge_partition.major_range_first()) + edge_partition.major_range_size(), - [edge_partition] __device__(auto major) { + cuda::proclaim_return_type([edge_partition] __device__(auto major) -> edge_t { auto major_offset = edge_partition.major_offset_from_major_nocheck(major); vertex_t const* indices{nullptr}; [[maybe_unused]] edge_t edge_offset{}; @@ -409,7 +411,7 @@ edge_t count_edge_partition_multi_edges( if (indices[i - 1] == indices[i]) { ++count; } } return count; - }, + }), edge_t{0}, thrust::plus{}); } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index d1dd2dec069..2152de28ff9 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -169,7 +169,11 @@ function(ConfigureTest CMAKE_TEST_NAME) ) set_target_properties( ${CMAKE_TEST_NAME} - PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib") + PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON) rapids_test_add( NAME ${CMAKE_TEST_NAME} @@ -195,7 +199,11 @@ function(ConfigureTestMG CMAKE_TEST_NAME) ) set_target_properties( ${CMAKE_TEST_NAME} - PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib") + PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON) rapids_test_add( NAME ${CMAKE_TEST_NAME} @@ -241,7 +249,11 @@ function(ConfigureCTest CMAKE_TEST_NAME) ) set_target_properties( ${CMAKE_TEST_NAME} - PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib") + PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON) rapids_test_add( NAME ${CMAKE_TEST_NAME} @@ -269,7 +281,11 @@ function(ConfigureCTestMG CMAKE_TEST_NAME) ) set_target_properties( ${CMAKE_TEST_NAME} - PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib") + PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON) rapids_test_add( NAME ${CMAKE_TEST_NAME} diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu index 20e87070fa5..d7aa953ef7c 100644 --- a/cpp/tests/prims/mg_extract_transform_e.cu +++ b/cpp/tests/prims/mg_extract_transform_e.cu @@ -59,55 +59,27 @@ #include #include -template +template struct e_op_t { - static_assert(std::is_same_v || - std::is_same_v>); static_assert(std::is_same_v || std::is_same_v>); - using return_type = thrust::optional, - std::conditional_t, - thrust::tuple, - thrust::tuple>, - std::conditional_t, - thrust::tuple, - thrust::tuple>>>; - - __device__ return_type operator()(key_t optionally_tagged_src, - vertex_t dst, - property_t src_val, - property_t dst_val, - thrust::nullopt_t) const + using return_type = + thrust::optional, + thrust::tuple, + thrust::tuple>>; + + __device__ return_type operator()( + vertex_t src, vertex_t dst, property_t src_val, property_t dst_val, thrust::nullopt_t) const { auto output_payload = static_cast(1); if (src_val < dst_val) { - if constexpr (std::is_same_v) { - if constexpr (std::is_arithmetic_v) { - return thrust::make_tuple(optionally_tagged_src, dst, output_payload); - } else { - static_assert(thrust::tuple_size::value == size_t{2}); - return thrust::make_tuple(optionally_tagged_src, - dst, - thrust::get<0>(output_payload), - thrust::get<1>(output_payload)); - } + if constexpr (std::is_arithmetic_v) { + return thrust::make_tuple(src, dst, output_payload); } else { - static_assert(thrust::tuple_size::value == size_t{2}); - if constexpr (std::is_arithmetic_v) { - return thrust::make_tuple(thrust::get<0>(optionally_tagged_src), - thrust::get<1>(optionally_tagged_src), - dst, - output_payload); - } else { - static_assert(thrust::tuple_size::value == size_t{2}); - return thrust::make_tuple(thrust::get<0>(optionally_tagged_src), - thrust::get<1>(optionally_tagged_src), - dst, - thrust::get<0>(output_payload), - thrust::get<1>(output_payload)); - } + static_assert(thrust::tuple_size::value == size_t{2}); + return thrust::make_tuple( + src, dst, thrust::get<0>(output_payload), thrust::get<1>(output_payload)); } } else { return thrust::nullopt; @@ -134,19 +106,11 @@ class Tests_MGExtractTransformE virtual void TearDown() {} // Compare the results of extract_transform_e primitive - template + template void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) { using result_t = int32_t; - using key_t = - std::conditional_t, vertex_t, thrust::tuple>; - - static_assert(std::is_same_v || std::is_arithmetic_v); static_assert(std::is_same_v || cugraph::is_arithmetic_or_thrust_tuple_of_arithmetic::value); if constexpr (cugraph::is_thrust_tuple::value) { @@ -212,7 +176,7 @@ class Tests_MGExtractTransformE mg_src_prop.view(), mg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), - e_op_t{}); + e_op_t{}); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -225,7 +189,7 @@ class Tests_MGExtractTransformE if (prims_usecase.check_correctness) { auto mg_aggregate_extract_transform_output_buffer = cugraph::allocate_dataframe_buffer< - typename e_op_t::return_type::value_type>( + typename e_op_t::return_type::value_type>( size_t{0}, handle_->get_stream()); std::get<0>(mg_aggregate_extract_transform_output_buffer) = cugraph::test::device_gatherv(*handle_, @@ -239,18 +203,12 @@ class Tests_MGExtractTransformE cugraph::test::device_gatherv(*handle_, std::get<2>(mg_extract_transform_output_buffer).data(), std::get<2>(mg_extract_transform_output_buffer).size()); - if constexpr (!std::is_same_v || !std::is_arithmetic_v) { + if constexpr (!std::is_arithmetic_v) { std::get<3>(mg_aggregate_extract_transform_output_buffer) = cugraph::test::device_gatherv(*handle_, std::get<3>(mg_extract_transform_output_buffer).data(), std::get<3>(mg_extract_transform_output_buffer).size()); } - if constexpr (!std::is_same_v && !std::is_arithmetic_v) { - std::get<4>(mg_aggregate_extract_transform_output_buffer) = - cugraph::test::device_gatherv(*handle_, - std::get<4>(mg_extract_transform_output_buffer).data(), - std::get<4>(mg_extract_transform_output_buffer).size()); - } cugraph::graph_t sg_graph(*handle_); std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = @@ -292,7 +250,7 @@ class Tests_MGExtractTransformE sg_src_prop.view(), sg_dst_prop.view(), cugraph::edge_dummy_property_t{}.view(), - e_op_t{}); + e_op_t{}); thrust::sort(handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(sg_extract_transform_output_buffer), @@ -321,13 +279,13 @@ using Tests_MGExtractTransformE_Rmat = Tests_MGExtractTransformE(std::get<0>(param), std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidInt32) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -335,14 +293,14 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidInt32) TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatVoidTupleFloatInt32) { auto param = GetParam(); - run_current_test>( - std::get<0>(param), std::get<1>(param)); + run_current_test>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidTupleFloatInt32) { auto param = GetParam(); - run_current_test>( + run_current_test>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -350,14 +308,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidTupleFloatInt32) TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatInt32Int32) { auto param = GetParam(); - run_current_test(std::get<0>(param), - std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32Int32) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -365,14 +322,14 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32Int32) TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatInt32TupleFloatInt32) { auto param = GetParam(); - run_current_test>( - std::get<0>(param), std::get<1>(param)); + run_current_test>(std::get<0>(param), + std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32TupleFloatInt32) { auto param = GetParam(); - run_current_test>( + run_current_test>( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -380,14 +337,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32TupleFloatInt32) TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int64FloatInt32Int32) { auto param = GetParam(); - run_current_test(std::get<0>(param), - std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int64FloatInt32Int32) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -395,14 +351,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int64FloatInt32Int32) TEST_P(Tests_MGExtractTransformE_File, CheckInt64Int64FloatInt32Int32) { auto param = GetParam(); - run_current_test(std::get<0>(param), - std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt64Int64FloatInt32Int32) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu index c87cc5b960b..3bca382a2eb 100644 --- a/cpp/tests/sampling/sampling_post_processing_test.cu +++ b/cpp/tests/sampling/sampling_post_processing_test.cu @@ -398,15 +398,16 @@ bool check_renumber_map_invariants( handle.get_thrust_policy(), unique_majors.begin(), unique_majors.end(), - [sorted_org_vertices = - raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t major) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }, + cuda::proclaim_return_type( + [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), + sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t major) -> vertex_t { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; + }), std::numeric_limits::lowest(), thrust::maximum{}); @@ -414,15 +415,16 @@ bool check_renumber_map_invariants( handle.get_thrust_policy(), unique_minors.begin(), unique_minors.end(), - [sorted_org_vertices = - raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t minor) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }, + cuda::proclaim_return_type( + [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), + sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t minor) -> vertex_t { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; + }), std::numeric_limits::max(), thrust::minimum{});