From 4c797bfa251d36f57870cc9ca8636d3098be964c Mon Sep 17 00:00:00 2001
From: Paul Taylor <178183+trxcllnt@users.noreply.github.com>
Date: Wed, 29 May 2024 11:07:57 -0700
Subject: [PATCH] Fix building cugraph with CCCL main (#4404)

Similar to https://github.com/rapidsai/cudf/pull/15552, we are testing [building RAPIDS with CCCL's main branch](https://github.com/NVIDIA/cccl/pull/1667) to get ahead of any breaking changes.

Authors:
  - Paul Taylor (https://github.com/trxcllnt)
  - Ralph Liu (https://github.com/nv-rliu)
  - Seunghwa Kang (https://github.com/seunghwak)
  - Ray Bell (https://github.com/raybellwaves)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Seunghwa Kang (https://github.com/seunghwak)
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/cugraph/pull/4404
---
 .devcontainer/Dockerfile                      |   5 +
 .../cuda11.8-conda/devcontainer.json          |   2 +-
 .devcontainer/cuda11.8-pip/devcontainer.json  |   8 +-
 .../cuda12.2-conda/devcontainer.json          |   2 +-
 .devcontainer/cuda12.2-pip/devcontainer.json  |   8 +-
 .github/workflows/pr.yaml                     |   2 +-
 cpp/CMakeLists.txt                            |   4 +-
 .../cugraph/utilities/device_functors.cuh     |   9 +-
 cpp/include/cugraph/utilities/mask_utils.cuh  |   5 +-
 cpp/src/community/detail/common_methods.cuh   |   3 +-
 cpp/src/community/legacy/louvain.cuh          |  15 ++-
 .../weakly_connected_components_impl.cuh      |  15 ++-
 cpp/src/detail/utility_wrappers.cu            |   4 +-
 cpp/src/prims/kv_store.cuh                    |   1 +
 ...m_reduce_dst_key_aggregated_outgoing_e.cuh |   2 +-
 cpp/src/structure/graph_view_impl.cuh         |  36 +++---
 cpp/tests/CMakeLists.txt                      |  24 +++-
 cpp/tests/prims/mg_extract_transform_e.cu     | 109 +++++-------------
 .../sampling/sampling_post_processing_test.cu |  38 +++---
 19 files changed, 138 insertions(+), 154 deletions(-)

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 3d0ac075be3..190003dd7af 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -7,6 +7,11 @@ FROM ${BASE} as pip-base
 
 ENV DEFAULT_VIRTUAL_ENV=rapids
 
+RUN apt update -y \
+ && DEBIAN_FRONTEND=noninteractive apt install -y \
+    libblas-dev liblapack-dev \
+ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*;
+
 FROM ${BASE} as conda-base
 
 ENV DEFAULT_CONDA_ENV=rapids
diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
index 7c9cd0258a4..d878f2d6584 100644
--- a/.devcontainer/cuda11.8-conda/devcontainer.json
+++ b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
index a4dc168505b..a0edcb27df8 100644
--- a/.devcontainer/cuda11.8-pip/devcontainer.json
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,19 +5,16 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ucx1.15.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
-      "version": "1.15.0"
-    },
     "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
       "version": "11.8",
       "installcuBLAS": true,
@@ -28,7 +25,6 @@
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
   },
   "overrideFeatureInstallOrder": [
-    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/cuda",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json
index eae4967f3b2..8a095d9b934 100644
--- a/.devcontainer/cuda12.2-conda/devcontainer.json
+++ b/.devcontainer/cuda12.2-conda/devcontainer.json
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json
index 393a5c63d23..10436f8b28d 100644
--- a/.devcontainer/cuda12.2-pip/devcontainer.json
+++ b/.devcontainer/cuda12.2-pip/devcontainer.json
@@ -5,19 +5,16 @@
     "args": {
       "CUDA": "12.2",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ucx1.15.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
-      "version": "1.15.0"
-    },
     "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
       "version": "12.2",
       "installcuBLAS": true,
@@ -28,7 +25,6 @@
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
   },
   "overrideFeatureInstallOrder": [
-    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/cuda",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index c04e0e879d2..5733646a8b9 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -196,5 +196,5 @@ jobs:
       extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY
       build_command: |
         sccache -z;
-        build-all --verbose -j$(nproc --ignore=1);
+        build-all --verbose -j$(nproc --ignore=1) -DBUILD_CUGRAPH_MG_TESTS=ON;
         sccache -s;
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2527599fece..7dca3d983a5 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -92,14 +92,14 @@ set(CUGRAPH_CXX_FLAGS "")
 set(CUGRAPH_CUDA_FLAGS "")
 
 if(CMAKE_COMPILER_IS_GNUCXX)
-    list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations)
+    list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
 endif(CMAKE_COMPILER_IS_GNUCXX)
 
 
 message("-- Building for GPU_ARCHS = ${CMAKE_CUDA_ARCHITECTURES}")
 
 list(APPEND CUGRAPH_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
-list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas=--disable-warnings)
+list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS -Xptxas=--disable-warnings)
 list(APPEND CUGRAPH_CUDA_FLAGS -Xcompiler=-Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable)
 list(APPEND CUGRAPH_CUDA_FLAGS -Xfatbin=-compress-all)
 
diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh
index 3af8ed1dd19..20cf98f7e6d 100644
--- a/cpp/include/cugraph/utilities/device_functors.cuh
+++ b/cpp/include/cugraph/utilities/device_functors.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -78,13 +78,14 @@ struct indirection_t {
 
 template <typename index_t, typename Iterator>
 struct indirection_if_idx_valid_t {
+  using value_type = typename thrust::iterator_traits<Iterator>::value_type;
   Iterator first{};
   index_t invalid_idx{};
-  typename thrust::iterator_traits<Iterator>::value_type invalid_value{};
+  value_type invalid_value{};
 
-  __device__ typename thrust::iterator_traits<Iterator>::value_type operator()(index_t i) const
+  __device__ value_type operator()(index_t i) const
   {
-    return (i != invalid_idx) ? *(first + i) : invalid_value;
+    return (i != invalid_idx) ? static_cast<value_type>(*(first + i)) : invalid_value;
   }
 };
 
diff --git a/cpp/include/cugraph/utilities/mask_utils.cuh b/cpp/include/cugraph/utilities/mask_utils.cuh
index 7b69ea3fe3a..1d86eef0ed1 100644
--- a/cpp/include/cugraph/utilities/mask_utils.cuh
+++ b/cpp/include/cugraph/utilities/mask_utils.cuh
@@ -20,6 +20,7 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/functional>
 #include <thrust/copy.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -160,13 +161,13 @@ size_t count_set_bits(raft::handle_t const& handle, MaskIterator mask_first, siz
     handle.get_thrust_policy(),
     thrust::make_counting_iterator(size_t{0}),
     thrust::make_counting_iterator(packed_bool_size(num_bits)),
-    [mask_first, num_bits] __device__(size_t i) {
+    cuda::proclaim_return_type<size_t>([mask_first, num_bits] __device__(size_t i) -> size_t {
       auto word = *(mask_first + i);
       if ((i + 1) * packed_bools_per_word() > num_bits) {
         word &= packed_bool_partial_mask(num_bits % packed_bools_per_word());
       }
       return static_cast<size_t>(__popc(word));
-    },
+    }),
     size_t{0},
     thrust::plus<size_t>{});
 }
diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh
index fe0a415db30..dcad4e92b95 100644
--- a/cpp/src/community/detail/common_methods.cuh
+++ b/cpp/src/community/detail/common_methods.cuh
@@ -29,6 +29,7 @@
 #include <cugraph/detail/utility_wrappers.hpp>
 #include <cugraph/graph_functions.hpp>
 
+#include <cuda/functional>
 #include <thrust/binary_search.h>
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
@@ -178,7 +179,7 @@ weight_t compute_modularity(
     handle.get_thrust_policy(),
     cluster_weights.begin(),
     cluster_weights.end(),
-    [] __device__(weight_t p) { return p * p; },
+    cuda::proclaim_return_type<weight_t>([] __device__(weight_t p) -> weight_t { return p * p; }),
     weight_t{0},
     thrust::plus<weight_t>());
 
diff --git a/cpp/src/community/legacy/louvain.cuh b/cpp/src/community/legacy/louvain.cuh
index 6cf5bbdc3c6..53d0b231c03 100644
--- a/cpp/src/community/legacy/louvain.cuh
+++ b/cpp/src/community/legacy/louvain.cuh
@@ -22,6 +22,7 @@
 
 #include <cugraph/dendrogram.hpp>
 #include <cugraph/legacy/graph.hpp>
+
 #ifdef TIMING
 #include <cugraph/utilities/high_res_timer.hpp>
 #endif
@@ -29,6 +30,7 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/functional>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
 #include <thrust/execution_policy.h>
@@ -141,12 +143,13 @@ class Louvain {
       handle_.get_thrust_policy(),
       thrust::make_counting_iterator(0),
       thrust::make_counting_iterator(graph.number_of_vertices),
-      [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__(
-        vertex_t community) {
-        return ((d_inc[community] / total_edge_weight) - resolution *
-                                                           (d_deg[community] * d_deg[community]) /
-                                                           (total_edge_weight * total_edge_weight));
-      },
+      cuda::proclaim_return_type<weight_t>(
+        [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__(
+          vertex_t community) -> weight_t {
+          return ((d_inc[community] / total_edge_weight) -
+                  resolution * (d_deg[community] * d_deg[community]) /
+                    (total_edge_weight * total_edge_weight));
+        }),
       weight_t{0.0},
       thrust::plus<weight_t>());
 
diff --git a/cpp/src/components/weakly_connected_components_impl.cuh b/cpp/src/components/weakly_connected_components_impl.cuh
index d4d6d842951..f63f28210d8 100644
--- a/cpp/src/components/weakly_connected_components_impl.cuh
+++ b/cpp/src/components/weakly_connected_components_impl.cuh
@@ -34,6 +34,7 @@
 
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/functional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
@@ -400,9 +401,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
         handle.get_thrust_policy(),
         new_root_candidates.begin(),
         new_root_candidates.begin() + (new_root_candidates.size() > 0 ? 1 : 0),
-        [vertex_partition, degrees = degrees.data()] __device__(auto v) {
-          return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
-        },
+        cuda::proclaim_return_type<edge_t>(
+          [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
+            return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
+          }),
         edge_t{0},
         thrust::plus<edge_t>{});
 
@@ -642,9 +644,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
         handle.get_thrust_policy(),
         thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).begin().get_iterator_tuple()),
         thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).end().get_iterator_tuple()),
-        [vertex_partition, degrees = degrees.data()] __device__(auto v) {
-          return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
-        },
+        cuda::proclaim_return_type<edge_t>(
+          [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
+            return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
+          }),
         edge_t{0},
         thrust::plus<edge_t>());
 
diff --git a/cpp/src/detail/utility_wrappers.cu b/cpp/src/detail/utility_wrappers.cu
index 9100ecbd5e1..6d6158a16e7 100644
--- a/cpp/src/detail/utility_wrappers.cu
+++ b/cpp/src/detail/utility_wrappers.cu
@@ -21,6 +21,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/functional>
 #include <thrust/count.h>
 #include <thrust/distance.h>
 #include <thrust/functional.h>
@@ -139,7 +140,8 @@ vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view,
     rmm::exec_policy(stream_view),
     edge_first,
     edge_first + num_edges,
-    [] __device__(auto e) { return std::max(thrust::get<0>(e), thrust::get<1>(e)); },
+    cuda::proclaim_return_type<vertex_t>(
+      [] __device__(auto e) -> vertex_t { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }),
     vertex_t{0},
     thrust::maximum<vertex_t>());
 }
diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh
index 5001a20bb83..de233fd583b 100644
--- a/cpp/src/prims/kv_store.cuh
+++ b/cpp/src/prims/kv_store.cuh
@@ -17,6 +17,7 @@
 
 #include "prims/detail/optional_dataframe_buffer.hpp"
 
+#include <cugraph/graph.hpp>
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/device_functors.cuh>
 
diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
index 006d7760666..7be30b0a5f0 100644
--- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
+++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
@@ -754,7 +754,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
             std::make_unique<kv_store_t<vertex_t, edge_src_value_t, true>>(
               std::move(majors),
               std::move(edge_major_values),
-              invalid_vertex_id<vertex_t>::value,
+              edge_src_value_t{},
               true,
               handle.get_stream());
         }
diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh
index 29dca6ef409..7097349dce5 100644
--- a/cpp/src/structure/graph_view_impl.cuh
+++ b/cpp/src/structure/graph_view_impl.cuh
@@ -353,7 +353,7 @@ edge_t count_edge_partition_multi_edges(
         execution_policy,
         thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[2],
         thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[3],
-        [edge_partition] __device__(auto major) {
+        cuda::proclaim_return_type<edge_t>([edge_partition] __device__(auto major) -> edge_t {
           auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
           vertex_t const* indices{nullptr};
           [[maybe_unused]] edge_t edge_offset{};
@@ -365,7 +365,7 @@ edge_t count_edge_partition_multi_edges(
             if (indices[i - 1] == indices[i]) { ++count; }
           }
           return count;
-        },
+        }),
         edge_t{0},
         thrust::plus<edge_t>{});
     }
@@ -374,19 +374,21 @@ edge_t count_edge_partition_multi_edges(
         execution_policy,
         thrust::make_counting_iterator(vertex_t{0}),
         thrust::make_counting_iterator(*(edge_partition.dcs_nzd_vertex_count())),
-        [edge_partition, major_start_offset = (*segment_offsets)[3]] __device__(auto idx) {
-          auto major_idx =
-            major_start_offset + idx;  // major_offset != major_idx in the hypersparse region
-          vertex_t const* indices{nullptr};
-          [[maybe_unused]] edge_t edge_offset{};
-          edge_t local_degree{};
-          thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx);
-          edge_t count{0};
-          for (edge_t i = 1; i < local_degree; ++i) {  // assumes neighbors are sorted
-            if (indices[i - 1] == indices[i]) { ++count; }
-          }
-          return count;
-        },
+        cuda::proclaim_return_type<edge_t>(
+          [edge_partition,
+           major_start_offset = (*segment_offsets)[3]] __device__(auto idx) -> edge_t {
+            auto major_idx =
+              major_start_offset + idx;  // major_offset != major_idx in the hypersparse region
+            vertex_t const* indices{nullptr};
+            [[maybe_unused]] edge_t edge_offset{};
+            edge_t local_degree{};
+            thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx);
+            edge_t count{0};
+            for (edge_t i = 1; i < local_degree; ++i) {  // assumes neighbors are sorted
+              if (indices[i - 1] == indices[i]) { ++count; }
+            }
+            return count;
+          }),
         edge_t{0},
         thrust::plus<edge_t>{});
     }
@@ -398,7 +400,7 @@ edge_t count_edge_partition_multi_edges(
       thrust::make_counting_iterator(edge_partition.major_range_first()),
       thrust::make_counting_iterator(edge_partition.major_range_first()) +
         edge_partition.major_range_size(),
-      [edge_partition] __device__(auto major) {
+      cuda::proclaim_return_type<edge_t>([edge_partition] __device__(auto major) -> edge_t {
         auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
         vertex_t const* indices{nullptr};
         [[maybe_unused]] edge_t edge_offset{};
@@ -409,7 +411,7 @@ edge_t count_edge_partition_multi_edges(
           if (indices[i - 1] == indices[i]) { ++count; }
         }
         return count;
-      },
+      }),
       edge_t{0},
       thrust::plus<edge_t>{});
   }
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index d1dd2dec069..2152de28ff9 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -169,7 +169,11 @@ function(ConfigureTest CMAKE_TEST_NAME)
     )
     set_target_properties(
         ${CMAKE_TEST_NAME}
-            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib")
+            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib"
+                       CXX_STANDARD                        17
+                       CXX_STANDARD_REQUIRED               ON
+                       CUDA_STANDARD                       17
+                       CUDA_STANDARD_REQUIRED              ON)
 
     rapids_test_add(
         NAME ${CMAKE_TEST_NAME}
@@ -195,7 +199,11 @@ function(ConfigureTestMG CMAKE_TEST_NAME)
     )
     set_target_properties(
         ${CMAKE_TEST_NAME}
-            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib")
+            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib"
+                       CXX_STANDARD                        17
+                       CXX_STANDARD_REQUIRED               ON
+                       CUDA_STANDARD                       17
+                       CUDA_STANDARD_REQUIRED              ON)
 
     rapids_test_add(
         NAME ${CMAKE_TEST_NAME}
@@ -241,7 +249,11 @@ function(ConfigureCTest CMAKE_TEST_NAME)
     )
     set_target_properties(
         ${CMAKE_TEST_NAME}
-            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib")
+            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib"
+                       CXX_STANDARD                        17
+                       CXX_STANDARD_REQUIRED               ON
+                       CUDA_STANDARD                       17
+                       CUDA_STANDARD_REQUIRED              ON)
 
     rapids_test_add(
         NAME ${CMAKE_TEST_NAME}
@@ -269,7 +281,11 @@ function(ConfigureCTestMG CMAKE_TEST_NAME)
     )
     set_target_properties(
         ${CMAKE_TEST_NAME}
-            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib")
+            PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib"
+                       CXX_STANDARD                        17
+                       CXX_STANDARD_REQUIRED               ON
+                       CUDA_STANDARD                       17
+                       CUDA_STANDARD_REQUIRED              ON)
 
     rapids_test_add(
         NAME ${CMAKE_TEST_NAME}
diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu
index 20e87070fa5..d7aa953ef7c 100644
--- a/cpp/tests/prims/mg_extract_transform_e.cu
+++ b/cpp/tests/prims/mg_extract_transform_e.cu
@@ -59,55 +59,27 @@
 #include <sstream>
 #include <type_traits>
 
-template <typename key_t, typename vertex_t, typename property_t, typename output_payload_t>
+template <typename vertex_t, typename property_t, typename output_payload_t>
 struct e_op_t {
-  static_assert(std::is_same_v<key_t, vertex_t> ||
-                std::is_same_v<key_t, thrust::tuple<vertex_t, int32_t>>);
   static_assert(std::is_same_v<output_payload_t, int32_t> ||
                 std::is_same_v<output_payload_t, thrust::tuple<float, int32_t>>);
 
-  using return_type = thrust::optional<typename std::conditional_t<
-    std::is_same_v<key_t, vertex_t>,
-    std::conditional_t<std::is_arithmetic_v<output_payload_t>,
-                       thrust::tuple<vertex_t, vertex_t, int32_t>,
-                       thrust::tuple<vertex_t, vertex_t, float, int32_t>>,
-    std::conditional_t<std::is_arithmetic_v<output_payload_t>,
-                       thrust::tuple<vertex_t, int32_t, vertex_t, int32_t>,
-                       thrust::tuple<vertex_t, int32_t, vertex_t, float, int32_t>>>>;
-
-  __device__ return_type operator()(key_t optionally_tagged_src,
-                                    vertex_t dst,
-                                    property_t src_val,
-                                    property_t dst_val,
-                                    thrust::nullopt_t) const
+  using return_type =
+    thrust::optional<std::conditional_t<std::is_arithmetic_v<output_payload_t>,
+                                        thrust::tuple<vertex_t, vertex_t, int32_t>,
+                                        thrust::tuple<vertex_t, vertex_t, float, int32_t>>>;
+
+  __device__ return_type operator()(
+    vertex_t src, vertex_t dst, property_t src_val, property_t dst_val, thrust::nullopt_t) const
   {
     auto output_payload = static_cast<output_payload_t>(1);
     if (src_val < dst_val) {
-      if constexpr (std::is_same_v<key_t, vertex_t>) {
-        if constexpr (std::is_arithmetic_v<output_payload_t>) {
-          return thrust::make_tuple(optionally_tagged_src, dst, output_payload);
-        } else {
-          static_assert(thrust::tuple_size<output_payload_t>::value == size_t{2});
-          return thrust::make_tuple(optionally_tagged_src,
-                                    dst,
-                                    thrust::get<0>(output_payload),
-                                    thrust::get<1>(output_payload));
-        }
+      if constexpr (std::is_arithmetic_v<output_payload_t>) {
+        return thrust::make_tuple(src, dst, output_payload);
       } else {
-        static_assert(thrust::tuple_size<key_t>::value == size_t{2});
-        if constexpr (std::is_arithmetic_v<output_payload_t>) {
-          return thrust::make_tuple(thrust::get<0>(optionally_tagged_src),
-                                    thrust::get<1>(optionally_tagged_src),
-                                    dst,
-                                    output_payload);
-        } else {
-          static_assert(thrust::tuple_size<output_payload_t>::value == size_t{2});
-          return thrust::make_tuple(thrust::get<0>(optionally_tagged_src),
-                                    thrust::get<1>(optionally_tagged_src),
-                                    dst,
-                                    thrust::get<0>(output_payload),
-                                    thrust::get<1>(output_payload));
-        }
+        static_assert(thrust::tuple_size<output_payload_t>::value == size_t{2});
+        return thrust::make_tuple(
+          src, dst, thrust::get<0>(output_payload), thrust::get<1>(output_payload));
       }
     } else {
       return thrust::nullopt;
@@ -134,19 +106,11 @@ class Tests_MGExtractTransformE
   virtual void TearDown() {}
 
   // Compare the results of extract_transform_e primitive
-  template <typename vertex_t,
-            typename edge_t,
-            typename weight_t,
-            typename tag_t,
-            typename output_payload_t>
+  template <typename vertex_t, typename edge_t, typename weight_t, typename output_payload_t>
   void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase)
   {
     using result_t = int32_t;
 
-    using key_t =
-      std::conditional_t<std::is_same_v<tag_t, void>, vertex_t, thrust::tuple<vertex_t, tag_t>>;
-
-    static_assert(std::is_same_v<tag_t, void> || std::is_arithmetic_v<tag_t>);
     static_assert(std::is_same_v<output_payload_t, void> ||
                   cugraph::is_arithmetic_or_thrust_tuple_of_arithmetic<output_payload_t>::value);
     if constexpr (cugraph::is_thrust_tuple<output_payload_t>::value) {
@@ -212,7 +176,7 @@ class Tests_MGExtractTransformE
                                    mg_src_prop.view(),
                                    mg_dst_prop.view(),
                                    cugraph::edge_dummy_property_t{}.view(),
-                                   e_op_t<key_t, vertex_t, result_t, output_payload_t>{});
+                                   e_op_t<vertex_t, result_t, output_payload_t>{});
 
     if (cugraph::test::g_perf) {
       RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
@@ -225,7 +189,7 @@ class Tests_MGExtractTransformE
 
     if (prims_usecase.check_correctness) {
       auto mg_aggregate_extract_transform_output_buffer = cugraph::allocate_dataframe_buffer<
-        typename e_op_t<key_t, vertex_t, result_t, output_payload_t>::return_type::value_type>(
+        typename e_op_t<vertex_t, result_t, output_payload_t>::return_type::value_type>(
         size_t{0}, handle_->get_stream());
       std::get<0>(mg_aggregate_extract_transform_output_buffer) =
         cugraph::test::device_gatherv(*handle_,
@@ -239,18 +203,12 @@ class Tests_MGExtractTransformE
         cugraph::test::device_gatherv(*handle_,
                                       std::get<2>(mg_extract_transform_output_buffer).data(),
                                       std::get<2>(mg_extract_transform_output_buffer).size());
-      if constexpr (!std::is_same_v<key_t, vertex_t> || !std::is_arithmetic_v<output_payload_t>) {
+      if constexpr (!std::is_arithmetic_v<output_payload_t>) {
         std::get<3>(mg_aggregate_extract_transform_output_buffer) =
           cugraph::test::device_gatherv(*handle_,
                                         std::get<3>(mg_extract_transform_output_buffer).data(),
                                         std::get<3>(mg_extract_transform_output_buffer).size());
       }
-      if constexpr (!std::is_same_v<key_t, vertex_t> && !std::is_arithmetic_v<output_payload_t>) {
-        std::get<4>(mg_aggregate_extract_transform_output_buffer) =
-          cugraph::test::device_gatherv(*handle_,
-                                        std::get<4>(mg_extract_transform_output_buffer).data(),
-                                        std::get<4>(mg_extract_transform_output_buffer).size());
-      }
 
       cugraph::graph_t<vertex_t, edge_t, store_transposed, false> sg_graph(*handle_);
       std::tie(sg_graph, std::ignore, std::ignore, std::ignore) =
@@ -292,7 +250,7 @@ class Tests_MGExtractTransformE
                                        sg_src_prop.view(),
                                        sg_dst_prop.view(),
                                        cugraph::edge_dummy_property_t{}.view(),
-                                       e_op_t<key_t, vertex_t, result_t, output_payload_t>{});
+                                       e_op_t<vertex_t, result_t, output_payload_t>{});
 
         thrust::sort(handle_->get_thrust_policy(),
                      cugraph::get_dataframe_buffer_begin(sg_extract_transform_output_buffer),
@@ -321,13 +279,13 @@ using Tests_MGExtractTransformE_Rmat = Tests_MGExtractTransformE<cugraph::test::
 TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatVoidInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, void, int32_t>(std::get<0>(param), std::get<1>(param));
+  run_current_test<int32_t, int32_t, float, int32_t>(std::get<0>(param), std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, void, int32_t>(
+  run_current_test<int32_t, int32_t, float, int32_t>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }
@@ -335,14 +293,14 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidInt32)
 TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatVoidTupleFloatInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, void, thrust::tuple<float, int32_t>>(
-    std::get<0>(param), std::get<1>(param));
+  run_current_test<int32_t, int32_t, float, thrust::tuple<float, int32_t>>(std::get<0>(param),
+                                                                           std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidTupleFloatInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, void, thrust::tuple<float, int32_t>>(
+  run_current_test<int32_t, int32_t, float, thrust::tuple<float, int32_t>>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }
@@ -350,14 +308,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatVoidTupleFloatInt32)
 TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, int32_t, int32_t>(std::get<0>(param),
-                                                              std::get<1>(param));
+  run_current_test<int32_t, int32_t, float, int32_t>(std::get<0>(param), std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, int32_t, int32_t>(
+  run_current_test<int32_t, int32_t, float, int32_t>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }
@@ -365,14 +322,14 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32Int32)
 TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int32FloatInt32TupleFloatInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, int32_t, thrust::tuple<float, int32_t>>(
-    std::get<0>(param), std::get<1>(param));
+  run_current_test<int32_t, int32_t, float, thrust::tuple<float, int32_t>>(std::get<0>(param),
+                                                                           std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32TupleFloatInt32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int32_t, float, int32_t, thrust::tuple<float, int32_t>>(
+  run_current_test<int32_t, int32_t, float, thrust::tuple<float, int32_t>>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }
@@ -380,14 +337,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int32FloatInt32TupleFloatInt32)
 TEST_P(Tests_MGExtractTransformE_File, CheckInt32Int64FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int64_t, float, int32_t, int32_t>(std::get<0>(param),
-                                                              std::get<1>(param));
+  run_current_test<int32_t, int64_t, float, int32_t>(std::get<0>(param), std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int64FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int32_t, int64_t, float, int32_t, int32_t>(
+  run_current_test<int32_t, int64_t, float, int32_t>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }
@@ -395,14 +351,13 @@ TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt32Int64FloatInt32Int32)
 TEST_P(Tests_MGExtractTransformE_File, CheckInt64Int64FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int64_t, int64_t, float, int32_t, int32_t>(std::get<0>(param),
-                                                              std::get<1>(param));
+  run_current_test<int64_t, int64_t, float, int32_t>(std::get<0>(param), std::get<1>(param));
 }
 
 TEST_P(Tests_MGExtractTransformE_Rmat, CheckInt64Int64FloatInt32Int32)
 {
   auto param = GetParam();
-  run_current_test<int64_t, int64_t, float, int32_t, int32_t>(
+  run_current_test<int64_t, int64_t, float, int32_t>(
     std::get<0>(param),
     cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param)));
 }
diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu
index c87cc5b960b..3bca382a2eb 100644
--- a/cpp/tests/sampling/sampling_post_processing_test.cu
+++ b/cpp/tests/sampling/sampling_post_processing_test.cu
@@ -398,15 +398,16 @@ bool check_renumber_map_invariants(
       handle.get_thrust_policy(),
       unique_majors.begin(),
       unique_majors.end(),
-      [sorted_org_vertices =
-         raft::device_span<vertex_t const>(sorted_org_vertices.data(), sorted_org_vertices.size()),
-       matching_renumbered_vertices = raft::device_span<vertex_t const>(
-         matching_renumbered_vertices.data(),
-         matching_renumbered_vertices.size())] __device__(vertex_t major) {
-        auto it = thrust::lower_bound(
-          thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major);
-        return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)];
-      },
+      cuda::proclaim_return_type<vertex_t>(
+        [sorted_org_vertices = raft::device_span<vertex_t const>(sorted_org_vertices.data(),
+                                                                 sorted_org_vertices.size()),
+         matching_renumbered_vertices = raft::device_span<vertex_t const>(
+           matching_renumbered_vertices.data(),
+           matching_renumbered_vertices.size())] __device__(vertex_t major) -> vertex_t {
+          auto it = thrust::lower_bound(
+            thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major);
+          return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)];
+        }),
       std::numeric_limits<vertex_t>::lowest(),
       thrust::maximum<vertex_t>{});
 
@@ -414,15 +415,16 @@ bool check_renumber_map_invariants(
       handle.get_thrust_policy(),
       unique_minors.begin(),
       unique_minors.end(),
-      [sorted_org_vertices =
-         raft::device_span<vertex_t const>(sorted_org_vertices.data(), sorted_org_vertices.size()),
-       matching_renumbered_vertices = raft::device_span<vertex_t const>(
-         matching_renumbered_vertices.data(),
-         matching_renumbered_vertices.size())] __device__(vertex_t minor) {
-        auto it = thrust::lower_bound(
-          thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor);
-        return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)];
-      },
+      cuda::proclaim_return_type<vertex_t>(
+        [sorted_org_vertices = raft::device_span<vertex_t const>(sorted_org_vertices.data(),
+                                                                 sorted_org_vertices.size()),
+         matching_renumbered_vertices = raft::device_span<vertex_t const>(
+           matching_renumbered_vertices.data(),
+           matching_renumbered_vertices.size())] __device__(vertex_t minor) -> vertex_t {
+          auto it = thrust::lower_bound(
+            thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor);
+          return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)];
+        }),
       std::numeric_limits<vertex_t>::max(),
       thrust::minimum<vertex_t>{});