From 0039f33b29994c12bf1fe06922c7c5a26c7318f7 Mon Sep 17 00:00:00 2001
From: Louis Sugy <lsugy@nvidia.com>
Date: Wed, 14 Dec 2022 21:52:45 +0100
Subject: [PATCH] Unify use of common functors (#1049)

The same functors are redefined in many different files. I believe this change can help with compilation times, executable size, and consistency.

Authors:
  - Louis Sugy (https://github.com/Nyrio)

Approvers:
  - Tamas Bela Feher (https://github.com/tfeher)
  - Artem M. Chirkin (https://github.com/achirkin)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: https://github.com/rapidsai/raft/pull/1049
---
 cpp/bench/linalg/norm.cu                      |   3 +-
 cpp/include/raft/cluster/detail/kmeans.cuh    | 117 +++---
 .../raft/cluster/detail/kmeans_common.cuh     |  75 ++--
 cpp/include/raft/cluster/kmeans.cuh           |   4 +-
 cpp/include/raft/core/operators.hpp           | 354 ++++++++++++++++++
 cpp/include/raft/distance/detail/canberra.cuh |   4 +-
 .../raft/distance/detail/chebyshev.cuh        |   4 +-
 .../raft/distance/detail/correlation.cuh      |  12 +-
 cpp/include/raft/distance/detail/cosine.cuh   |  12 +-
 .../raft/distance/detail/euclidean.cuh        |  12 +-
 .../raft/distance/detail/fused_l2_nn.cuh      |  38 +-
 .../raft/distance/detail/hellinger.cuh        |  15 +-
 cpp/include/raft/distance/detail/l1.cuh       |   4 +-
 .../raft/distance/detail/minkowski.cuh        |   4 +-
 .../detail/pairwise_distance_base.cuh         |   1 +
 cpp/include/raft/label/detail/classlabels.cuh |   4 +-
 cpp/include/raft/linalg/add.cuh               |   2 -
 .../raft/linalg/coalesced_reduction.cuh       |  25 +-
 cpp/include/raft/linalg/detail/add.cuh        |   9 +-
 .../linalg/detail/coalesced_reduction.cuh     |  87 ++---
 cpp/include/raft/linalg/detail/divide.cuh     |   5 +-
 cpp/include/raft/linalg/detail/eltwise.cuh    |  19 +-
 cpp/include/raft/linalg/detail/functional.cuh |  69 ----
 cpp/include/raft/linalg/detail/multiply.cuh   |   4 +-
 cpp/include/raft/linalg/detail/norm.cuh       |  25 +-
 cpp/include/raft/linalg/detail/reduce.cuh     |  14 +-
 .../raft/linalg/detail/strided_reduction.cuh  |  21 +-
 cpp/include/raft/linalg/detail/subtract.cuh   |   7 +-
 cpp/include/raft/linalg/divide.cuh            |   3 +-
 cpp/include/raft/linalg/eltwise.cuh           |   6 -
 cpp/include/raft/linalg/norm.cuh              |  13 +-
 cpp/include/raft/linalg/normalize.cuh         |  31 +-
 cpp/include/raft/linalg/power.cuh             |   8 +-
 cpp/include/raft/linalg/reduce.cuh            |  25 +-
 cpp/include/raft/linalg/sqrt.cuh              |   5 +-
 cpp/include/raft/linalg/strided_reduction.cuh |  25 +-
 cpp/include/raft/matrix/detail/gather.cuh     |  37 +-
 cpp/include/raft/matrix/detail/math.cuh       |  47 +--
 .../sparse/distance/detail/lp_distance.cuh    |  36 +-
 cpp/include/raft/sparse/op/detail/slice.cuh   |  12 +-
 .../raft/spatial/knn/detail/ann_quantized.cuh |   7 +-
 .../raft/spatial/knn/detail/fused_l2_knn.cuh  |  29 +-
 .../spatial/knn/detail/ivf_flat_build.cuh     |   3 +-
 .../spatial/knn/detail/ivf_flat_search.cuh    |   3 +-
 .../raft/spatial/knn/detail/ivf_pq_build.cuh  |  12 +-
 .../raft/spatial/knn/detail/ivf_pq_search.cuh |  32 +-
 .../raft/spatial/knn/detail/processing.cuh    |  34 +-
 .../stats/detail/batched/silhouette_score.cuh |  25 +-
 cpp/include/raft/stats/detail/mean_center.cuh |  20 +-
 .../raft/stats/detail/silhouette_score.cuh    |  53 +--
 .../raft/stats/detail/weighted_mean.cuh       |   5 +-
 cpp/include/raft/util/cuda_utils.cuh          |  49 ++-
 cpp/include/raft/util/scatter.cuh             |   5 +-
 cpp/src/distance/cluster/cluster_cost.cuh     |  24 +-
 cpp/test/CMakeLists.txt                       |   2 +
 cpp/test/cluster/kmeans.cu                    |   9 +-
 cpp/test/cluster/linkage.cu                   |   2 +-
 cpp/test/core/operators_device.cu             | 340 +++++++++++++++++
 cpp/test/core/operators_host.cpp              | 286 ++++++++++++++
 cpp/test/distance/dist_adj.cu                 |   2 +-
 cpp/test/distance/dist_canberra.cu            |   2 +-
 cpp/test/distance/dist_chebyshev.cu           |   2 +-
 cpp/test/distance/dist_correlation.cu         |   2 +-
 cpp/test/distance/dist_cos.cu                 |   2 +-
 cpp/test/distance/dist_euc_exp.cu             |   2 +-
 cpp/test/distance/dist_euc_unexp.cu           |   2 +-
 cpp/test/distance/dist_eucsqrt_exp.cu         |   2 +-
 cpp/test/distance/dist_hamming.cu             |   2 +-
 cpp/test/distance/dist_hellinger.cu           |   2 +-
 cpp/test/distance/dist_jensen_shannon.cu      |   2 +-
 cpp/test/distance/dist_kl_divergence.cu       |   2 +-
 cpp/test/distance/dist_l1.cu                  |   2 +-
 cpp/test/distance/dist_minkowski.cu           |   2 +-
 cpp/test/distance/dist_russell_rao.cu         |   2 +-
 cpp/test/distance/distance_base.cuh           |   5 +-
 cpp/test/distance/fused_l2_nn.cu              |   6 +-
 cpp/test/distance/gram.cu                     |   2 +-
 cpp/test/label/label.cu                       |   2 +-
 cpp/test/label/merge_labels.cu                |   2 +-
 cpp/test/linalg/add.cu                        |   2 +-
 cpp/test/linalg/axpy.cu                       |   2 +-
 cpp/test/linalg/binary_op.cu                  |  13 +-
 cpp/test/linalg/binary_op.cuh                 |   2 +-
 cpp/test/linalg/cholesky_r1.cu                |   2 +-
 cpp/test/linalg/coalesced_reduction.cu        |  18 +-
 cpp/test/linalg/divide.cu                     |   2 +-
 cpp/test/linalg/dot.cu                        |   2 +-
 cpp/test/linalg/eig.cu                        |   2 +-
 cpp/test/linalg/eig_sel.cu                    |   2 +-
 cpp/test/linalg/eltwise.cu                    |   2 +-
 cpp/test/linalg/gemm_layout.cu                |   2 +-
 cpp/test/linalg/gemv.cu                       |   2 +-
 cpp/test/linalg/map.cu                        |   2 +-
 cpp/test/linalg/map_then_reduce.cu            |  15 +-
 cpp/test/linalg/matrix_vector.cu              |  19 +-
 cpp/test/linalg/matrix_vector_op.cu           |   2 +-
 cpp/test/linalg/matrix_vector_op.cuh          |   2 +-
 cpp/test/linalg/mean_squared_error.cu         |   2 +-
 cpp/test/linalg/multiply.cu                   |   2 +-
 cpp/test/linalg/norm.cu                       |  16 +-
 cpp/test/linalg/normalize.cu                  |  19 +-
 cpp/test/linalg/power.cu                      |   2 +-
 cpp/test/linalg/reduce.cu                     |  15 +-
 cpp/test/linalg/reduce.cuh                    |  37 +-
 cpp/test/linalg/reduce_cols_by_key.cu         |   2 +-
 cpp/test/linalg/reduce_rows_by_key.cu         |   2 +-
 cpp/test/linalg/rsvd.cu                       |   2 +-
 cpp/test/linalg/sqrt.cu                       |   2 +-
 cpp/test/linalg/strided_reduction.cu          |  18 +-
 cpp/test/linalg/subtract.cu                   |   2 +-
 cpp/test/linalg/svd.cu                        |   2 +-
 cpp/test/linalg/ternary_op.cu                 |   2 +-
 cpp/test/linalg/transpose.cu                  |   2 +-
 cpp/test/linalg/unary_op.cu                   |   2 +-
 cpp/test/linalg/unary_op.cuh                  |   2 +-
 cpp/test/matrix/argmax.cu                     |   2 +-
 cpp/test/matrix/argmin.cu                     |   2 +-
 cpp/test/matrix/columnSort.cu                 |   2 +-
 cpp/test/matrix/diagonal.cu                   |   2 +-
 cpp/test/matrix/gather.cu                     |   2 +-
 cpp/test/matrix/linewise_op.cu                |  10 +-
 cpp/test/matrix/math.cu                       |   2 +-
 cpp/test/matrix/matrix.cu                     |   2 +-
 cpp/test/matrix/norm.cu                       |   2 +-
 cpp/test/matrix/reverse.cu                    |   2 +-
 cpp/test/matrix/slice.cu                      |   2 +-
 cpp/test/matrix/triangular.cu                 |   2 +-
 cpp/test/mst.cu                               |   2 +-
 cpp/test/neighbors/ann_ivf_flat.cu            |   2 +-
 cpp/test/neighbors/ann_ivf_pq.cuh             |   2 +-
 cpp/test/neighbors/ann_utils.cuh              |   2 +-
 cpp/test/neighbors/ball_cover.cu              |   2 +-
 cpp/test/neighbors/epsilon_neighborhood.cu    |   2 +-
 cpp/test/neighbors/faiss_mr.cu                |   2 +-
 cpp/test/neighbors/fused_l2_knn.cu            |   2 +-
 cpp/test/neighbors/haversine.cu               |   2 +-
 cpp/test/neighbors/knn.cu                     |   2 +-
 cpp/test/neighbors/refine.cu                  |   2 +-
 cpp/test/neighbors/selection.cu               |   2 +-
 cpp/test/random/make_blobs.cu                 |   2 +-
 cpp/test/random/make_regression.cu            |   2 +-
 cpp/test/random/multi_variable_gaussian.cu    |   2 +-
 cpp/test/random/permute.cu                    |   2 +-
 cpp/test/random/rmat_rectangular_generator.cu |   2 +-
 cpp/test/random/rng.cu                        |   2 +-
 cpp/test/random/rng_discrete.cu               |   2 +-
 cpp/test/random/rng_int.cu                    |   2 +-
 cpp/test/random/sample_without_replacement.cu |   2 +-
 cpp/test/sparse/add.cu                        |   2 +-
 cpp/test/sparse/convert_coo.cu                |   2 +-
 cpp/test/sparse/convert_csr.cu                |   2 +-
 cpp/test/sparse/csr_row_slice.cu              |   2 +-
 cpp/test/sparse/csr_to_dense.cu               |   2 +-
 cpp/test/sparse/csr_transpose.cu              |   2 +-
 cpp/test/sparse/degree.cu                     |   2 +-
 cpp/test/sparse/dist_coo_spmv.cu              |  16 +-
 cpp/test/sparse/distance.cu                   |   2 +-
 cpp/test/sparse/filter.cu                     |   2 +-
 cpp/test/sparse/neighbors/brute_force.cu      |   2 +-
 .../sparse/neighbors/connect_components.cu    |   2 +-
 cpp/test/sparse/neighbors/knn_graph.cu        |   2 +-
 cpp/test/sparse/norm.cu                       |   2 +-
 cpp/test/sparse/reduce.cu                     |   2 +-
 cpp/test/sparse/row_op.cu                     |   2 +-
 cpp/test/sparse/sort.cu                       |   2 +-
 cpp/test/sparse/spgemmi.cu                    |   2 +-
 cpp/test/sparse/symmetrize.cu                 |   2 +-
 cpp/test/stats/accuracy.cu                    |   2 +-
 cpp/test/stats/adjusted_rand_index.cu         |   2 +-
 cpp/test/stats/completeness_score.cu          |   2 +-
 cpp/test/stats/contingencyMatrix.cu           |   2 +-
 cpp/test/stats/cov.cu                         |   2 +-
 cpp/test/stats/dispersion.cu                  |   2 +-
 cpp/test/stats/entropy.cu                     |   2 +-
 cpp/test/stats/histogram.cu                   |   2 +-
 cpp/test/stats/homogeneity_score.cu           |   2 +-
 cpp/test/stats/information_criterion.cu       |   2 +-
 cpp/test/stats/kl_divergence.cu               |   2 +-
 cpp/test/stats/mean.cu                        |   2 +-
 cpp/test/stats/mean_center.cu                 |   2 +-
 cpp/test/stats/meanvar.cu                     |   2 +-
 cpp/test/stats/minmax.cu                      |   2 +-
 cpp/test/stats/mutual_info_score.cu           |   2 +-
 cpp/test/stats/r2_score.cu                    |   2 +-
 cpp/test/stats/rand_index.cu                  |   2 +-
 cpp/test/stats/regression_metrics.cu          |   2 +-
 cpp/test/stats/silhouette_score.cu            |   2 +-
 cpp/test/stats/stddev.cu                      |   2 +-
 cpp/test/stats/sum.cu                         |   2 +-
 cpp/test/stats/trustworthiness.cu             |   2 +-
 cpp/test/stats/v_measure.cu                   |   2 +-
 cpp/test/stats/weighted_mean.cu               |   2 +-
 cpp/test/test_utils.cuh                       | 329 ++++++++++++++++
 cpp/test/test_utils.h                         | 336 +----------------
 194 files changed, 2028 insertions(+), 1222 deletions(-)
 create mode 100644 cpp/include/raft/core/operators.hpp
 delete mode 100644 cpp/include/raft/linalg/detail/functional.cuh
 create mode 100644 cpp/test/core/operators_device.cu
 create mode 100644 cpp/test/core/operators_host.cpp
 create mode 100644 cpp/test/test_utils.cuh

diff --git a/cpp/bench/linalg/norm.cu b/cpp/bench/linalg/norm.cu
index cce4195cf1..efecee88c9 100644
--- a/cpp/bench/linalg/norm.cu
+++ b/cpp/bench/linalg/norm.cu
@@ -18,6 +18,7 @@
 #include <raft/linalg/matrix_vector_op.cuh>
 #include <raft/linalg/norm.cuh>
 #include <raft/random/rng.cuh>
+#include <raft/util/cuda_utils.cuh>
 #include <raft/util/itertools.hpp>
 
 #include <rmm/device_uvector.hpp>
@@ -60,7 +61,7 @@ struct rowNorm : public fixture {
                          output_view,
                          raft::linalg::L2Norm,
                          raft::linalg::Apply::ALONG_ROWS,
-                         raft::SqrtOp<T>());
+                         raft::sqrt_op());
     });
   }
 
diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh
index 2d3481b4e1..e575849536 100644
--- a/cpp/include/raft/cluster/detail/kmeans.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans.cuh
@@ -36,6 +36,7 @@
 #include <raft/core/kvp.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/mdarray.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/map_then_reduce.cuh>
 #include <raft/linalg/matrix_vector_op.cuh>
@@ -197,16 +198,15 @@ void kmeansPlusPlus(const raft::handle_t& handle,
     // Outputs minDistanceBuf[n_trials x n_samples] where minDistance[i, :] contains updated
     // minClusterDistance that includes candidate-i
     auto minDistBuf = distBuffer.view();
-    raft::linalg::matrixVectorOp(
-      minDistBuf.data_handle(),
-      pwd.data_handle(),
-      minClusterDistance.data_handle(),
-      pwd.extent(1),
-      pwd.extent(0),
-      true,
-      true,
-      [=] __device__(DataT mat, DataT vec) { return vec <= mat ? vec : mat; },
-      stream);
+    raft::linalg::matrixVectorOp(minDistBuf.data_handle(),
+                                 pwd.data_handle(),
+                                 minClusterDistance.data_handle(),
+                                 pwd.extent(1),
+                                 pwd.extent(0),
+                                 true,
+                                 true,
+                                 raft::min_op{},
+                                 stream);
 
     // Calculate costPerCandidate[n_trials] where costPerCandidate[i] is the cluster cost when using
     // centroid candidate-i
@@ -326,21 +326,15 @@ void update_centroids(const raft::handle_t& handle,
   //   weight_per_cluster[n_clusters] - 1D array, weight_per_cluster[i] contains sum of weights in
   //   cluster-i.
   // Note - when weight_per_cluster[i] is 0, new_centroids[i] is reset to 0
-  raft::linalg::matrixVectorOp(
-    new_centroids.data_handle(),
-    new_centroids.data_handle(),
-    weight_per_cluster.data_handle(),
-    new_centroids.extent(1),
-    new_centroids.extent(0),
-    true,
-    false,
-    [=] __device__(DataT mat, DataT vec) {
-      if (vec == 0)
-        return DataT(0);
-      else
-        return mat / vec;
-    },
-    handle.get_stream());
+  raft::linalg::matrixVectorOp(new_centroids.data_handle(),
+                               new_centroids.data_handle(),
+                               weight_per_cluster.data_handle(),
+                               new_centroids.extent(1),
+                               new_centroids.extent(0),
+                               true,
+                               false,
+                               raft::div_checkzero_op{},
+                               handle.get_stream());
 
   // copy centroids[i] to new_centroids[i] when weight_per_cluster[i] is 0
   cub::ArgIndexInputIterator<DataT*> itr_wt(weight_per_cluster.data_handle());
@@ -356,9 +350,7 @@ void update_centroids(const raft::handle_t& handle,
       // copy when the sum of weights in the cluster is 0
       return map.value == 0;
     },
-    [=] __device__(raft::KeyValuePair<ptrdiff_t, DataT> map) {  // map
-      return map.key;
-    },
+    raft::key_op{},
     handle.get_stream());
 }
 
@@ -399,7 +391,7 @@ void kmeans_fit_main(const raft::handle_t& handle,
   // resource
   auto wtInCluster = raft::make_device_vector<DataT, IndexT>(handle, n_clusters);
 
-  rmm::device_scalar<raft::KeyValuePair<IndexT, DataT>> clusterCostD(stream);
+  rmm::device_scalar<DataT> clusterCostD(stream);
 
   // L2 norm of X: ||x||^2
   auto L2NormX = raft::make_device_vector<DataT, IndexT>(handle, n_samples);
@@ -470,16 +462,12 @@ void kmeans_fit_main(const raft::handle_t& handle,
     // compute the squared norm between the newCentroids and the original
     // centroids, destructor releases the resource
     auto sqrdNorm = raft::make_device_scalar(handle, DataT(0));
-    raft::linalg::mapThenSumReduce(
-      sqrdNorm.data_handle(),
-      newCentroids.size(),
-      [=] __device__(const DataT a, const DataT b) {
-        DataT diff = a - b;
-        return diff * diff;
-      },
-      stream,
-      centroids.data_handle(),
-      newCentroids.data_handle());
+    raft::linalg::mapThenSumReduce(sqrdNorm.data_handle(),
+                                   newCentroids.size(),
+                                   raft::sqdiff_op{},
+                                   stream,
+                                   centroids.data_handle(),
+                                   newCentroids.data_handle());
 
     DataT sqrdNormError = 0;
     raft::copy(&sqrdNormError, sqrdNorm.data_handle(), sqrdNorm.size(), stream);
@@ -494,18 +482,11 @@ void kmeans_fit_main(const raft::handle_t& handle,
                                  minClusterAndDistance.view(),
                                  workspace,
                                  raft::make_device_scalar_view(clusterCostD.data()),
-                                 [] __device__(const raft::KeyValuePair<IndexT, DataT>& a,
-                                               const raft::KeyValuePair<IndexT, DataT>& b) {
-                                   raft::KeyValuePair<IndexT, DataT> res;
-                                   res.key   = 0;
-                                   res.value = a.value + b.value;
-                                   return res;
-                                 });
-
-      DataT curClusteringCost = 0;
-      raft::copy(&curClusteringCost, &(clusterCostD.data()->value), 1, stream);
-
-      handle.sync_stream(stream);
+                                 raft::value_op{},
+                                 raft::add_op{});
+
+      DataT curClusteringCost = clusterCostD.value(stream);
+
       ASSERT(curClusteringCost != (DataT)0.0,
              "Too few points and centroids being found is getting 0 cost from "
              "centers");
@@ -558,15 +539,10 @@ void kmeans_fit_main(const raft::handle_t& handle,
                              minClusterAndDistance.view(),
                              workspace,
                              raft::make_device_scalar_view(clusterCostD.data()),
-                             [] __device__(const raft::KeyValuePair<IndexT, DataT>& a,
-                                           const raft::KeyValuePair<IndexT, DataT>& b) {
-                               raft::KeyValuePair<IndexT, DataT> res;
-                               res.key   = 0;
-                               res.value = a.value + b.value;
-                               return res;
-                             });
+                             raft::value_op{},
+                             raft::add_op{});
 
-  raft::copy(inertia.data_handle(), &(clusterCostD.data()->value), 1, stream);
+  inertia[0] = clusterCostD.value(stream);
 
   RAFT_LOG_DEBUG("KMeans.fit: completed after %d iterations with %f inertia[0] ",
                  n_iter[0] > params.max_iter ? n_iter[0] - 1 : n_iter[0],
@@ -678,7 +654,8 @@ void initScalableKMeansPlusPlus(const raft::handle_t& handle,
                              minClusterDistanceVec.view(),
                              workspace,
                              raft::make_device_scalar_view(clusterCost.data()),
-                             [] __device__(const DataT& a, const DataT& b) { return a + b; });
+                             raft::identity_op{},
+                             raft::add_op{});
 
   auto psi = clusterCost.value(stream);
 
@@ -710,7 +687,8 @@ void initScalableKMeansPlusPlus(const raft::handle_t& handle,
                                minClusterDistanceVec.view(),
                                workspace,
                                raft::make_device_scalar_view<DataT>(clusterCost.data()),
-                               [] __device__(const DataT& a, const DataT& b) { return a + b; });
+                               raft::identity_op{},
+                               raft::add_op{});
 
     psi = clusterCost.value(stream);
 
@@ -1079,7 +1057,7 @@ void kmeans_predict(handle_t const& handle,
                                                       workspace);
 
   // calculate cluster cost phi_x(C)
-  rmm::device_scalar<raft::KeyValuePair<IndexT, DataT>> clusterCostD(stream);
+  rmm::device_scalar<DataT> clusterCostD(stream);
   // TODO: add different templates for InType of binaryOp to avoid thrust transform
   thrust::transform(handle.get_thrust_policy(),
                     minClusterAndDistance.data_handle(),
@@ -1097,21 +1075,16 @@ void kmeans_predict(handle_t const& handle,
                              minClusterAndDistance.view(),
                              workspace,
                              raft::make_device_scalar_view(clusterCostD.data()),
-                             [] __device__(const raft::KeyValuePair<IndexT, DataT>& a,
-                                           const raft::KeyValuePair<IndexT, DataT>& b) {
-                               raft::KeyValuePair<IndexT, DataT> res;
-                               res.key   = 0;
-                               res.value = a.value + b.value;
-                               return res;
-                             });
-
-  raft::copy(inertia.data_handle(), &(clusterCostD.data()->value), 1, stream);
+                             raft::value_op{},
+                             raft::add_op{});
 
   thrust::transform(handle.get_thrust_policy(),
                     minClusterAndDistance.data_handle(),
                     minClusterAndDistance.data_handle() + minClusterAndDistance.size(),
                     labels.data_handle(),
-                    [=] __device__(raft::KeyValuePair<IndexT, DataT> pair) { return pair.key; });
+                    raft::key_op{});
+
+  inertia[0] = clusterCostD.value(stream);
 }
 
 template <typename DataT, typename IndexT = int>
diff --git a/cpp/include/raft/cluster/detail/kmeans_common.cuh b/cpp/include/raft/cluster/detail/kmeans_common.cuh
index b1ef43fea7..2fd33ac759 100644
--- a/cpp/include/raft/cluster/detail/kmeans_common.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans_common.cuh
@@ -34,6 +34,7 @@
 #include <raft/core/kvp.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/mdarray.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
 #include <raft/distance/fused_l2_nn.cuh>
@@ -155,12 +156,11 @@ void checkWeight(const raft::handle_t& handle,
       n_samples);
 
     auto scale = static_cast<DataT>(n_samples) / wt_sum;
-    raft::linalg::unaryOp(
-      weight.data_handle(),
-      weight.data_handle(),
-      n_samples,
-      [=] __device__(const DataT& wt) { return wt * scale; },
-      stream);
+    raft::linalg::unaryOp(weight.data_handle(),
+                          weight.data_handle(),
+                          n_samples,
+                          raft::mul_const_op<DataT>{scale},
+                          stream);
   }
 }
 
@@ -178,33 +178,42 @@ IndexT getCentroidsBatchSize(int batch_centroids, IndexT n_local_clusters)
   return (minVal == 0) ? n_local_clusters : minVal;
 }
 
-template <typename DataT, typename ReductionOpT, typename IndexT = int>
+template <typename InputT,
+          typename OutputT,
+          typename MainOpT,
+          typename ReductionOpT,
+          typename IndexT = int>
 void computeClusterCost(const raft::handle_t& handle,
-                        raft::device_vector_view<DataT, IndexT> minClusterDistance,
+                        raft::device_vector_view<InputT, IndexT> minClusterDistance,
                         rmm::device_uvector<char>& workspace,
-                        raft::device_scalar_view<DataT> clusterCost,
+                        raft::device_scalar_view<OutputT> clusterCost,
+                        MainOpT main_op,
                         ReductionOpT reduction_op)
 {
-  cudaStream_t stream       = handle.get_stream();
+  cudaStream_t stream = handle.get_stream();
+
+  cub::TransformInputIterator<OutputT, MainOpT, InputT*> itr(minClusterDistance.data_handle(),
+                                                             main_op);
+
   size_t temp_storage_bytes = 0;
   RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(nullptr,
                                           temp_storage_bytes,
-                                          minClusterDistance.data_handle(),
+                                          itr,
                                           clusterCost.data_handle(),
                                           minClusterDistance.size(),
                                           reduction_op,
-                                          DataT(),
+                                          OutputT(),
                                           stream));
 
   workspace.resize(temp_storage_bytes, stream);
 
   RAFT_CUDA_TRY(cub::DeviceReduce::Reduce(workspace.data(),
                                           temp_storage_bytes,
-                                          minClusterDistance.data_handle(),
+                                          itr,
                                           clusterCost.data_handle(),
                                           minClusterDistance.size(),
                                           reduction_op,
-                                          DataT(),
+                                          OutputT(),
                                           stream));
 }
 
@@ -266,9 +275,7 @@ void sampleCentroids(const raft::handle_t& handle,
                        sampledMinClusterDistance.data_handle(),
                        nPtsSampledInRank,
                        inRankCp.data(),
-                       [=] __device__(raft::KeyValuePair<ptrdiff_t, DataT> val) {  // MapTransformOp
-                         return val.key;
-                       },
+                       raft::key_op{},
                        stream);
 }
 
@@ -463,10 +470,8 @@ void minClusterAndDistanceCompute(
             pair.value = val;
             return pair;
           },
-          [=] __device__(raft::KeyValuePair<IndexT, DataT> a, raft::KeyValuePair<IndexT, DataT> b) {
-            return (b.value < a.value) ? b : a;
-          },
-          [=] __device__(raft::KeyValuePair<IndexT, DataT> pair) { return pair; });
+          raft::argmin_op{},
+          raft::identity_op{});
       }
     }
   }
@@ -541,7 +546,6 @@ void minClusterDistanceCompute(const raft::handle_t& handle,
     if (is_fused) {
       workspace.resize((sizeof(IndexT)) * ns, stream);
 
-      // todo(lsugy): remove cIdx
       raft::distance::fusedL2NNMinReduce<DataT, DataT, IndexT>(
         minClusterDistanceView.data_handle(),
         datasetView.data_handle(),
@@ -576,23 +580,16 @@ void minClusterDistanceCompute(const raft::handle_t& handle,
         pairwise_distance_kmeans<DataT, IndexT>(
           handle, datasetView, centroidsView, pairwiseDistanceView, workspace, metric);
 
-        raft::linalg::coalescedReduction(
-          minClusterDistanceView.data_handle(),
-          pairwiseDistanceView.data_handle(),
-          pairwiseDistanceView.extent(1),
-          pairwiseDistanceView.extent(0),
-          std::numeric_limits<DataT>::max(),
-          stream,
-          true,
-          [=] __device__(DataT val, IndexT i) {  // MainLambda
-            return val;
-          },
-          [=] __device__(DataT a, DataT b) {  // ReduceLambda
-            return (b < a) ? b : a;
-          },
-          [=] __device__(DataT val) {  // FinalLambda
-            return val;
-          });
+        raft::linalg::coalescedReduction(minClusterDistanceView.data_handle(),
+                                         pairwiseDistanceView.data_handle(),
+                                         pairwiseDistanceView.extent(1),
+                                         pairwiseDistanceView.extent(0),
+                                         std::numeric_limits<DataT>::max(),
+                                         stream,
+                                         true,
+                                         raft::identity_op{},
+                                         raft::min_op{},
+                                         raft::identity_op{});
       }
     }
   }
diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh
index d64815244b..4b912dc966 100644
--- a/cpp/include/raft/cluster/kmeans.cuh
+++ b/cpp/include/raft/cluster/kmeans.cuh
@@ -20,6 +20,7 @@
 #include <raft/cluster/kmeans_types.hpp>
 #include <raft/core/kvp.hpp>
 #include <raft/core/mdarray.hpp>
+#include <raft/core/operators.hpp>
 
 namespace raft::cluster::kmeans {
 
@@ -313,7 +314,8 @@ void cluster_cost(const raft::handle_t& handle,
                   raft::device_scalar_view<DataT> clusterCost,
                   ReductionOpT reduction_op)
 {
-  detail::computeClusterCost(handle, minClusterDistance, workspace, clusterCost, reduction_op);
+  detail::computeClusterCost(
+    handle, minClusterDistance, workspace, clusterCost, raft::identity_op{}, reduction_op);
 }
 
 /**
diff --git a/cpp/include/raft/core/operators.hpp b/cpp/include/raft/core/operators.hpp
new file mode 100644
index 0000000000..19b9695896
--- /dev/null
+++ b/cpp/include/raft/core/operators.hpp
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+#include <raft/core/detail/macros.hpp>
+
+namespace raft {
+
+/**
+ * @defgroup Functors Commonly used functors.
+ * The optional unused arguments are useful for kernels that pass the index along with the value.
+ * @{
+ */
+
+struct identity_op {
+  template <typename Type, typename... UnusedArgs>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in, UnusedArgs...) const
+  {
+    return in;
+  }
+};
+
+template <typename OutT>
+struct cast_op {
+  template <typename InT, typename... UnusedArgs>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(InT in, UnusedArgs...) const
+  {
+    return static_cast<OutT>(in);
+  }
+};
+
+struct key_op {
+  template <typename KVP, typename... UnusedArgs>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const KVP& p, UnusedArgs...) const
+  {
+    return p.key;
+  }
+};
+
+struct value_op {
+  template <typename KVP, typename... UnusedArgs>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const KVP& p, UnusedArgs...) const
+  {
+    return p.value;
+  }
+};
+
+struct sqrt_op {
+  template <typename Type, typename... UnusedArgs>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in, UnusedArgs...) const
+  {
+    return std::sqrt(in);
+  }
+};
+
+struct nz_op {
+  template <typename Type, typename... UnusedArgs>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in, UnusedArgs...) const
+  {
+    return in != Type(0) ? Type(1) : Type(0);
+  }
+};
+
+struct abs_op {
+  template <typename Type, typename... UnusedArgs>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in, UnusedArgs...) const
+  {
+    return std::abs(in);
+  }
+};
+
+struct sq_op {
+  template <typename Type, typename... UnusedArgs>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in, UnusedArgs...) const
+  {
+    return in * in;
+  }
+};
+
+struct add_op {
+  template <typename T1, typename T2>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const T1& a, const T2& b) const
+  {
+    return a + b;
+  }
+};
+
+struct sub_op {
+  template <typename T1, typename T2>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const T1& a, const T2& b) const
+  {
+    return a - b;
+  }
+};
+
+struct mul_op {
+  template <typename T1, typename T2>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const T1& a, const T2& b) const
+  {
+    return a * b;
+  }
+};
+
+struct div_op {
+  template <typename T1, typename T2>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const T1& a, const T2& b) const
+  {
+    return a / b;
+  }
+};
+
+struct div_checkzero_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& a, const Type& b) const
+  {
+    if (b == Type{0}) { return Type{0}; }
+    return a / b;
+  }
+};
+
+struct pow_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& a, const Type& b) const
+  {
+    return std::pow(a, b);
+  }
+};
+
+struct min_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& a, const Type& b) const
+  {
+    if (a > b) { return b; }
+    return a;
+  }
+};
+
+struct max_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& a, const Type& b) const
+  {
+    if (b > a) { return b; }
+    return a;
+  }
+};
+
+struct sqdiff_op {
+  template <typename Type>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& a, const Type& b) const
+  {
+    Type diff = a - b;
+    return diff * diff;
+  }
+};
+
+struct argmin_op {
+  template <typename KVP>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const KVP& a, const KVP& b) const
+  {
+    if ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key))) { return b; }
+    return a;
+  }
+};
+
+struct argmax_op {
+  template <typename KVP>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(const KVP& a, const KVP& b) const
+  {
+    if ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key))) { return b; }
+    return a;
+  }
+};
+
+template <typename ScalarT>
+struct const_op {
+  const ScalarT scalar;
+
+  constexpr explicit const_op(const ScalarT& s) : scalar{s} {}
+
+  template <typename... Args>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(Args...) const
+  {
+    return scalar;
+  }
+};
+
+/**
+ * @brief Wraps around a binary operator, passing a constant on the right-hand side.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *  #include <raft/core/operators.hpp>
+ *
+ *  raft::plug_const_op<float, raft::mul_op> op(2.0f);
+ *  std::cout << op(2.1f) << std::endl;  // 4.2
+ * @endcode
+ *
+ * @tparam ConstT
+ * @tparam BinaryOpT
+ */
+template <typename ConstT, typename BinaryOpT>
+struct plug_const_op {
+  const ConstT c;
+  const BinaryOpT composed_op;
+
+  template <typename OpT     = BinaryOpT,
+            typename UnusedT = std::enable_if_t<std::is_default_constructible_v<OpT>>>
+  constexpr explicit plug_const_op(const ConstT& s)
+    : c{s}, composed_op{}  // The compiler complains if composed_op is not initialized explicitly
+  {
+  }
+  constexpr plug_const_op(const ConstT& s, BinaryOpT o) : c{s}, composed_op{o} {}
+
+  template <typename InT>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(InT a) const
+  {
+    return composed_op(a, c);
+  }
+};
+
+template <typename Type>
+using add_const_op = plug_const_op<Type, add_op>;
+
+template <typename Type>
+using sub_const_op = plug_const_op<Type, sub_op>;
+
+template <typename Type>
+using mul_const_op = plug_const_op<Type, mul_op>;
+
+template <typename Type>
+using div_const_op = plug_const_op<Type, div_op>;
+
+template <typename Type>
+using div_checkzero_const_op = plug_const_op<Type, div_checkzero_op>;
+
+template <typename Type>
+using pow_const_op = plug_const_op<Type, pow_op>;
+
+/**
+ * @brief Constructs an operator by composing a chain of operators.
+ *
+ * Note that all arguments are passed to the innermost operator.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *  #include <raft/core/operators.hpp>
+ *
+ *  auto op = raft::compose_op(raft::sqrt_op(), raft::abs_op(), raft::cast_op<float>(),
+ *                             raft::add_const_op<int>(8));
+ *  std::cout << op(-50) << std::endl;  // 6.48074
+ * @endcode
+ *
+ * @tparam OpsT Any number of operation types.
+ */
+template <typename... OpsT>
+struct compose_op {
+  const std::tuple<OpsT...> ops;
+
+  template <typename TupleT = std::tuple<OpsT...>,
+            typename CondT  = std::enable_if_t<std::is_default_constructible_v<TupleT>>>
+  constexpr compose_op()
+  {
+  }
+  constexpr explicit compose_op(OpsT... ops) : ops{ops...} {}
+
+  template <typename... Args>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(Args&&... args) const
+  {
+    return compose<sizeof...(OpsT)>(std::forward<Args>(args)...);
+  }
+
+ private:
+  template <size_t RemOps, typename... Args>
+  constexpr RAFT_INLINE_FUNCTION auto compose(Args&&... args) const
+  {
+    if constexpr (RemOps > 0) {
+      return compose<RemOps - 1>(std::get<RemOps - 1>(ops)(std::forward<Args>(args)...));
+    } else {
+      return identity_op{}(std::forward<Args>(args)...);
+    }
+  }
+};
+
+/**
+ * @brief Constructs an operator by composing an outer op with one inner op for each of its inputs.
+ *
+ * Usage example:
+ * @code{.cpp}
+ *  #include <raft/core/operators.hpp>
+ *
+ *  raft::map_args_op<raft::add_op, raft::sqrt_op, raft::cast_op<float>> op;
+ *  std::cout << op(42.0f, 10) << std::endl;  // 16.4807
+ * @endcode
+ *
+ * @tparam OuterOpT Outer operation type
+ * @tparam ArgOpsT Operation types for each input of the outer operation
+ */
+template <typename OuterOpT, typename... ArgOpsT>
+struct map_args_op {
+  const OuterOpT outer_op;
+  const std::tuple<ArgOpsT...> arg_ops;
+
+  template <typename T1    = OuterOpT,
+            typename T2    = std::tuple<ArgOpsT...>,
+            typename CondT = std::enable_if_t<std::is_default_constructible_v<T1> &&
+                                              std::is_default_constructible_v<T2>>>
+  constexpr map_args_op()
+    : outer_op{}  // The compiler complains if outer_op is not initialized explicitly
+  {
+  }
+  constexpr explicit map_args_op(OuterOpT outer_op, ArgOpsT... arg_ops)
+    : outer_op{outer_op}, arg_ops{arg_ops...}
+  {
+  }
+
+  template <typename... Args>
+  constexpr RAFT_INLINE_FUNCTION auto operator()(Args&&... args) const
+  {
+    constexpr size_t kNumOps = sizeof...(ArgOpsT);
+    static_assert(kNumOps == sizeof...(Args),
+                  "The number of arguments does not match the number of mapping operators");
+    return map_args(std::make_index_sequence<kNumOps>{}, std::forward<Args>(args)...);
+  }
+
+ private:
+  template <size_t... I, typename... Args>
+  constexpr RAFT_INLINE_FUNCTION auto map_args(std::index_sequence<I...>, Args&&... args) const
+  {
+    return outer_op(std::get<I>(arg_ops)(std::forward<Args>(args))...);
+  }
+};
+
+/** @} */
+}  // namespace raft
diff --git a/cpp/include/raft/distance/detail/canberra.cuh b/cpp/include/raft/distance/detail/canberra.cuh
index 6be994b80a..90ed3940e1 100644
--- a/cpp/include/raft/distance/detail/canberra.cuh
+++ b/cpp/include/raft/distance/detail/canberra.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -73,7 +73,7 @@ static void canberraImpl(const DataT* x,
 
   // Accumulation operation lambda
   auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) {
-    const auto diff = raft::L1Op<AccT, IdxT>()(x - y);
+    const auto diff = raft::myAbs(x - y);
     const auto add  = raft::myAbs(x) + raft::myAbs(y);
     // deal with potential for 0 in denominator by
     // forcing 1/0 instead
diff --git a/cpp/include/raft/distance/detail/chebyshev.cuh b/cpp/include/raft/distance/detail/chebyshev.cuh
index 1ac10f269e..454ee8c8bb 100644
--- a/cpp/include/raft/distance/detail/chebyshev.cuh
+++ b/cpp/include/raft/distance/detail/chebyshev.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -72,7 +72,7 @@ static void chebyshevImpl(const DataT* x,
 
   // Accumulation operation lambda
   auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) {
-    const auto diff = raft::L1Op<AccT, IdxT>()(x - y);
+    const auto diff = raft::myAbs(x - y);
     acc             = raft::myMax(acc, diff);
   };
 
diff --git a/cpp/include/raft/distance/detail/correlation.cuh b/cpp/include/raft/distance/detail/correlation.cuh
index 2b77d280fe..9bdbbf112c 100644
--- a/cpp/include/raft/distance/detail/correlation.cuh
+++ b/cpp/include/raft/distance/detail/correlation.cuh
@@ -262,8 +262,8 @@ void correlationImpl(int m,
                          true,
                          stream,
                          false,
-                         raft::Nop<InType>(),
-                         raft::Sum<InType>());
+                         raft::identity_op(),
+                         raft::add_op());
     raft::linalg::reduce(norm_row_vec,
                          pB,
                          k,
@@ -273,8 +273,8 @@ void correlationImpl(int m,
                          true,
                          stream,
                          false,
-                         raft::Nop<InType>(),
-                         raft::Sum<InType>());
+                         raft::identity_op(),
+                         raft::add_op());
 
     sq_norm_col_vec += (m + n);
     sq_norm_row_vec = sq_norm_col_vec + m;
@@ -290,8 +290,8 @@ void correlationImpl(int m,
                          true,
                          stream,
                          false,
-                         raft::Nop<InType>(),
-                         raft::Sum<InType>());
+                         raft::identity_op(),
+                         raft::add_op());
     sq_norm_col_vec += m;
     sq_norm_row_vec = sq_norm_col_vec;
     raft::linalg::rowNorm(sq_norm_col_vec, pA, k, m, raft::linalg::L2Norm, isRowMajor, stream);
diff --git a/cpp/include/raft/distance/detail/cosine.cuh b/cpp/include/raft/distance/detail/cosine.cuh
index f06051962f..46a694aa51 100644
--- a/cpp/include/raft/distance/detail/cosine.cuh
+++ b/cpp/include/raft/distance/detail/cosine.cuh
@@ -19,6 +19,7 @@
 #include <raft/distance/detail/pairwise_distance_base.cuh>
 #include <raft/distance/detail/pairwise_distance_cutlass_base.cuh>
 #include <raft/linalg/norm.cuh>
+#include <raft/util/cuda_utils.cuh>
 
 namespace raft {
 namespace distance {
@@ -229,8 +230,6 @@ void cosineAlgo1(Index_ m,
                  cudaStream_t stream,
                  bool isRowMajor)
 {
-  auto norm_op = [] __device__(AccType in) { return raft::mySqrt(in); };
-
   // raft distance support inputs as float/double and output as uint8_t/float/double.
   static_assert(!((sizeof(OutType) > 1) && (sizeof(AccType) != sizeof(OutType))),
                 "OutType can be uint8_t, float, double,"
@@ -248,10 +247,13 @@ void cosineAlgo1(Index_ m,
   InType* row_vec = workspace;
   if (pA != pB) {
     row_vec += m;
-    raft::linalg::rowNorm(col_vec, pA, k, m, raft::linalg::L2Norm, isRowMajor, stream, norm_op);
-    raft::linalg::rowNorm(row_vec, pB, k, n, raft::linalg::L2Norm, isRowMajor, stream, norm_op);
+    raft::linalg::rowNorm(
+      col_vec, pA, k, m, raft::linalg::L2Norm, isRowMajor, stream, raft::sqrt_op{});
+    raft::linalg::rowNorm(
+      row_vec, pB, k, n, raft::linalg::L2Norm, isRowMajor, stream, raft::sqrt_op{});
   } else {
-    raft::linalg::rowNorm(col_vec, pA, k, m, raft::linalg::L2Norm, isRowMajor, stream, norm_op);
+    raft::linalg::rowNorm(
+      col_vec, pA, k, m, raft::linalg::L2Norm, isRowMajor, stream, raft::sqrt_op{});
   }
 
   if (isRowMajor) {
diff --git a/cpp/include/raft/distance/detail/euclidean.cuh b/cpp/include/raft/distance/detail/euclidean.cuh
index 5ea74fa884..4184810fff 100644
--- a/cpp/include/raft/distance/detail/euclidean.cuh
+++ b/cpp/include/raft/distance/detail/euclidean.cuh
@@ -19,6 +19,7 @@
 #include <raft/distance/detail/pairwise_distance_base.cuh>
 #include <raft/distance/detail/pairwise_distance_cutlass_base.cuh>
 #include <raft/linalg/norm.cuh>
+#include <raft/util/cuda_utils.cuh>
 
 namespace raft {
 namespace distance {
@@ -247,8 +248,6 @@ void euclideanAlgo1(Index_ m,
                     cudaStream_t stream,
                     bool isRowMajor)
 {
-  auto norm_op = [] __device__(InType in) { return in; };
-
   // raft distance support inputs as float/double and output as uint8_t/float/double.
   static_assert(!((sizeof(OutType) > 1) && (sizeof(AccType) != sizeof(OutType))),
                 "OutType can be uint8_t, float, double,"
@@ -266,10 +265,13 @@ void euclideanAlgo1(Index_ m,
   InType* row_vec = workspace;
   if (pA != pB) {
     row_vec += m;
-    raft::linalg::rowNorm(col_vec, pA, k, m, raft::linalg::L2Norm, isRowMajor, stream, norm_op);
-    raft::linalg::rowNorm(row_vec, pB, k, n, raft::linalg::L2Norm, isRowMajor, stream, norm_op);
+    raft::linalg::rowNorm(
+      col_vec, pA, k, m, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
+    raft::linalg::rowNorm(
+      row_vec, pB, k, n, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
   } else {
-    raft::linalg::rowNorm(col_vec, pA, k, m, raft::linalg::L2Norm, isRowMajor, stream, norm_op);
+    raft::linalg::rowNorm(
+      col_vec, pA, k, m, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
   }
 
   if (isRowMajor) {
diff --git a/cpp/include/raft/distance/detail/fused_l2_nn.cuh b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
index e8c2648c2e..c9750df8ad 100644
--- a/cpp/include/raft/distance/detail/fused_l2_nn.cuh
+++ b/cpp/include/raft/distance/detail/fused_l2_nn.cuh
@@ -298,8 +298,6 @@ void fusedL2NNImpl(OutT* min,
     RAFT_CUDA_TRY(cudaGetLastError());
   }
 
-  auto fin_op = [] __device__(DataT d_val, int g_d_idx) { return d_val; };
-
   constexpr size_t shmemSize = P::SmemSize + ((P::Mblk + P::Nblk) * sizeof(DataT));
   if (sqrt) {
     auto fusedL2NNSqrt = fusedL2NNkernel<DataT,
@@ -310,11 +308,23 @@ void fusedL2NNImpl(OutT* min,
                                          ReduceOpT,
                                          KVPReduceOpT,
                                          decltype(core_lambda),
-                                         decltype(fin_op)>;
+                                         raft::identity_op>;
     dim3 grid          = launchConfigGenerator<P>(m, n, shmemSize, fusedL2NNSqrt);
 
-    fusedL2NNSqrt<<<grid, blk, shmemSize, stream>>>(
-      min, x, y, xn, yn, m, n, k, maxVal, workspace, redOp, pairRedOp, core_lambda, fin_op);
+    fusedL2NNSqrt<<<grid, blk, shmemSize, stream>>>(min,
+                                                    x,
+                                                    y,
+                                                    xn,
+                                                    yn,
+                                                    m,
+                                                    n,
+                                                    k,
+                                                    maxVal,
+                                                    workspace,
+                                                    redOp,
+                                                    pairRedOp,
+                                                    core_lambda,
+                                                    raft::identity_op{});
   } else {
     auto fusedL2NN = fusedL2NNkernel<DataT,
                                      OutT,
@@ -324,10 +334,22 @@ void fusedL2NNImpl(OutT* min,
                                      ReduceOpT,
                                      KVPReduceOpT,
                                      decltype(core_lambda),
-                                     decltype(fin_op)>;
+                                     raft::identity_op>;
     dim3 grid      = launchConfigGenerator<P>(m, n, shmemSize, fusedL2NN);
-    fusedL2NN<<<grid, blk, shmemSize, stream>>>(
-      min, x, y, xn, yn, m, n, k, maxVal, workspace, redOp, pairRedOp, core_lambda, fin_op);
+    fusedL2NN<<<grid, blk, shmemSize, stream>>>(min,
+                                                x,
+                                                y,
+                                                xn,
+                                                yn,
+                                                m,
+                                                n,
+                                                k,
+                                                maxVal,
+                                                workspace,
+                                                redOp,
+                                                pairRedOp,
+                                                core_lambda,
+                                                raft::identity_op{});
   }
 
   RAFT_CUDA_TRY(cudaGetLastError());
diff --git a/cpp/include/raft/distance/detail/hellinger.cuh b/cpp/include/raft/distance/detail/hellinger.cuh
index 31854fd1d6..51f462ab36 100644
--- a/cpp/include/raft/distance/detail/hellinger.cuh
+++ b/cpp/include/raft/distance/detail/hellinger.cuh
@@ -17,6 +17,7 @@
 #pragma once
 #include <raft/distance/detail/pairwise_distance_base.cuh>
 #include <raft/linalg/unary_op.cuh>
+#include <raft/util/cuda_utils.cuh>
 
 namespace raft {
 namespace distance {
@@ -78,14 +79,10 @@ static void hellingerImpl(const DataT* x,
 
   dim3 blk(KPolicy::Nthreads);
 
-  auto unaryOp_lambda = [] __device__(DataT input) { return raft::mySqrt(input); };
   // First sqrt x and y
-  raft::linalg::unaryOp<DataT, decltype(unaryOp_lambda), IdxT>(
-    (DataT*)x, x, m * k, unaryOp_lambda, stream);
-
+  raft::linalg::unaryOp<DataT, raft::sqrt_op, IdxT>((DataT*)x, x, m * k, raft::sqrt_op{}, stream);
   if (x != y) {
-    raft::linalg::unaryOp<DataT, decltype(unaryOp_lambda), IdxT>(
-      (DataT*)y, y, n * k, unaryOp_lambda, stream);
+    raft::linalg::unaryOp<DataT, raft::sqrt_op, IdxT>((DataT*)y, y, n * k, raft::sqrt_op{}, stream);
   }
 
   // Accumulation operation lambda
@@ -145,11 +142,9 @@ static void hellingerImpl(const DataT* x,
   }
 
   // Revert sqrt of x and y
-  raft::linalg::unaryOp<DataT, decltype(unaryOp_lambda), IdxT>(
-    (DataT*)x, x, m * k, unaryOp_lambda, stream);
+  raft::linalg::unaryOp<DataT, raft::sqrt_op, IdxT>((DataT*)x, x, m * k, raft::sqrt_op{}, stream);
   if (x != y) {
-    raft::linalg::unaryOp<DataT, decltype(unaryOp_lambda), IdxT>(
-      (DataT*)y, y, n * k, unaryOp_lambda, stream);
+    raft::linalg::unaryOp<DataT, raft::sqrt_op, IdxT>((DataT*)y, y, n * k, raft::sqrt_op{}, stream);
   }
 
   RAFT_CUDA_TRY(cudaGetLastError());
diff --git a/cpp/include/raft/distance/detail/l1.cuh b/cpp/include/raft/distance/detail/l1.cuh
index 6372019fd3..95514db60b 100644
--- a/cpp/include/raft/distance/detail/l1.cuh
+++ b/cpp/include/raft/distance/detail/l1.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -71,7 +71,7 @@ static void l1Impl(const DataT* x,
 
   // Accumulation operation lambda
   auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) {
-    const auto diff = raft::L1Op<AccT, IdxT>()(x - y);
+    const auto diff = raft::myAbs(x - y);
     acc += diff;
   };
 
diff --git a/cpp/include/raft/distance/detail/minkowski.cuh b/cpp/include/raft/distance/detail/minkowski.cuh
index d3d0979d0d..bda83babf1 100644
--- a/cpp/include/raft/distance/detail/minkowski.cuh
+++ b/cpp/include/raft/distance/detail/minkowski.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -74,7 +74,7 @@ void minkowskiUnExpImpl(const DataT* x,
 
   // Accumulation operation lambda
   auto core_lambda = [p] __device__(AccT & acc, DataT & x, DataT & y) {
-    const auto diff = raft::L1Op<DataT>()(x - y);
+    const auto diff = raft::myAbs(x - y);
     acc += raft::myPow(diff, p);
   };
 
diff --git a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh
index 26536d13cd..69bb83d29a 100644
--- a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh
+++ b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 #pragma once
+#include <raft/core/operators.hpp>
 #include <raft/linalg/contractions.cuh>
 #include <raft/linalg/norm.cuh>
 #include <raft/util/cuda_utils.cuh>
diff --git a/cpp/include/raft/label/detail/classlabels.cuh b/cpp/include/raft/label/detail/classlabels.cuh
index 0af1c70b91..64d8b4bfae 100644
--- a/cpp/include/raft/label/detail/classlabels.cuh
+++ b/cpp/include/raft/label/detail/classlabels.cuh
@@ -18,6 +18,7 @@
 
 #include <cub/cub.cuh>
 
+#include <raft/core/operators.hpp>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -194,8 +195,7 @@ void make_monotonic(
 template <typename Type>
 void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zero_based = false)
 {
-  make_monotonic<Type>(
-    out, in, N, stream, [] __device__(Type val) { return false; }, zero_based);
+  make_monotonic<Type>(out, in, N, stream, raft::const_op(false), zero_based);
 }
 
 };  // namespace detail
diff --git a/cpp/include/raft/linalg/add.cuh b/cpp/include/raft/linalg/add.cuh
index 341ba45af5..27ab24abe8 100644
--- a/cpp/include/raft/linalg/add.cuh
+++ b/cpp/include/raft/linalg/add.cuh
@@ -27,8 +27,6 @@
 namespace raft {
 namespace linalg {
 
-using detail::adds_scalar;
-
 /**
  * @ingroup arithmetic
  * @brief Elementwise scalar add operation on the input buffer
diff --git a/cpp/include/raft/linalg/coalesced_reduction.cuh b/cpp/include/raft/linalg/coalesced_reduction.cuh
index e9e5a99f46..45cd640edc 100644
--- a/cpp/include/raft/linalg/coalesced_reduction.cuh
+++ b/cpp/include/raft/linalg/coalesced_reduction.cuh
@@ -22,6 +22,7 @@
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/handle.hpp>
+#include <raft/core/operators.hpp>
 
 namespace raft {
 namespace linalg {
@@ -56,9 +57,9 @@ namespace linalg {
 template <typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void coalescedReduction(OutType* dots,
                         const InType* data,
                         IdxType D,
@@ -66,9 +67,9 @@ void coalescedReduction(OutType* dots,
                         OutType init,
                         cudaStream_t stream,
                         bool inplace           = false,
-                        MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                        ReduceLambda reduce_op = raft::Sum<OutType>(),
-                        FinalLambda final_op   = raft::Nop<OutType>())
+                        MainLambda main_op     = raft::identity_op(),
+                        ReduceLambda reduce_op = raft::add_op(),
+                        FinalLambda final_op   = raft::identity_op())
 {
   detail::coalescedReduction<InType, OutType, IdxType>(
     dots, data, D, N, init, stream, inplace, main_op, reduce_op, final_op);
@@ -113,17 +114,17 @@ template <typename InValueType,
           typename LayoutPolicy,
           typename OutValueType,
           typename IdxType,
-          typename MainLambda   = raft::Nop<InValueType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutValueType>,
-          typename FinalLambda  = raft::Nop<OutValueType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void coalesced_reduction(const raft::handle_t& handle,
                          raft::device_matrix_view<const InValueType, IdxType, LayoutPolicy> data,
                          raft::device_vector_view<OutValueType, IdxType> dots,
                          OutValueType init,
                          bool inplace           = false,
-                         MainLambda main_op     = raft::Nop<InValueType, IdxType>(),
-                         ReduceLambda reduce_op = raft::Sum<OutValueType>(),
-                         FinalLambda final_op   = raft::Nop<OutValueType>())
+                         MainLambda main_op     = raft::identity_op(),
+                         ReduceLambda reduce_op = raft::add_op(),
+                         FinalLambda final_op   = raft::identity_op())
 {
   if constexpr (std::is_same_v<LayoutPolicy, raft::row_major>) {
     RAFT_EXPECTS(static_cast<IdxType>(dots.size()) == data.extent(0),
diff --git a/cpp/include/raft/linalg/detail/add.cuh b/cpp/include/raft/linalg/detail/add.cuh
index 34966ebbc2..bf9b2bd1d8 100644
--- a/cpp/include/raft/linalg/detail/add.cuh
+++ b/cpp/include/raft/linalg/detail/add.cuh
@@ -16,14 +16,11 @@
 
 #pragma once
 
-#include "functional.cuh"
-
+#include <raft/core/operators.hpp>
 #include <raft/linalg/binary_op.cuh>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/util/cuda_utils.cuh>
 
-#include <thrust/functional.h>
-
 namespace raft {
 namespace linalg {
 namespace detail {
@@ -31,13 +28,13 @@ namespace detail {
 template <typename InT, typename OutT = InT, typename IdxType = int>
 void addScalar(OutT* out, const InT* in, InT scalar, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::unaryOp(out, in, len, adds_scalar<InT, OutT>(scalar), stream);
+  raft::linalg::unaryOp(out, in, len, raft::add_const_op<InT>(scalar), stream);
 }
 
 template <typename InT, typename OutT = InT, typename IdxType = int>
 void add(OutT* out, const InT* in1, const InT* in2, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::binaryOp(out, in1, in2, len, thrust::plus<InT>(), stream);
+  raft::linalg::binaryOp(out, in1, in2, len, raft::add_op(), stream);
 }
 
 template <class InT, typename IdxType, typename OutT = InT>
diff --git a/cpp/include/raft/linalg/detail/coalesced_reduction.cuh b/cpp/include/raft/linalg/detail/coalesced_reduction.cuh
index 63351f5475..238e17fa56 100644
--- a/cpp/include/raft/linalg/detail/coalesced_reduction.cuh
+++ b/cpp/include/raft/linalg/detail/coalesced_reduction.cuh
@@ -18,6 +18,7 @@
 
 #include <cub/cub.cuh>
 #include <raft/common/nvtx.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <rmm/device_uvector.hpp>
 
@@ -71,9 +72,9 @@ template <typename Policy,
           typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void coalescedReductionThin(OutType* dots,
                             const InType* data,
                             IdxType D,
@@ -81,9 +82,9 @@ void coalescedReductionThin(OutType* dots,
                             OutType init,
                             cudaStream_t stream,
                             bool inplace           = false,
-                            MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                            ReduceLambda reduce_op = raft::Sum<OutType>(),
-                            FinalLambda final_op   = raft::Nop<OutType>())
+                            MainLambda main_op     = raft::identity_op(),
+                            ReduceLambda reduce_op = raft::add_op(),
+                            FinalLambda final_op   = raft::identity_op())
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "coalescedReductionThin<%d,%d>", Policy::LogicalWarpSize, Policy::RowsPerBlock);
@@ -97,9 +98,9 @@ void coalescedReductionThin(OutType* dots,
 template <typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void coalescedReductionThinDispatcher(OutType* dots,
                                       const InType* data,
                                       IdxType D,
@@ -107,9 +108,9 @@ void coalescedReductionThinDispatcher(OutType* dots,
                                       OutType init,
                                       cudaStream_t stream,
                                       bool inplace           = false,
-                                      MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                                      ReduceLambda reduce_op = raft::Sum<OutType>(),
-                                      FinalLambda final_op   = raft::Nop<OutType>())
+                                      MainLambda main_op     = raft::identity_op(),
+                                      ReduceLambda reduce_op = raft::add_op(),
+                                      FinalLambda final_op   = raft::identity_op())
 {
   if (D <= IdxType(2)) {
     coalescedReductionThin<ReductionThinPolicy<2, 64>>(
@@ -168,9 +169,9 @@ template <int TPB,
           typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void coalescedReductionMedium(OutType* dots,
                               const InType* data,
                               IdxType D,
@@ -178,9 +179,9 @@ void coalescedReductionMedium(OutType* dots,
                               OutType init,
                               cudaStream_t stream,
                               bool inplace           = false,
-                              MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                              ReduceLambda reduce_op = raft::Sum<OutType>(),
-                              FinalLambda final_op   = raft::Nop<OutType>())
+                              MainLambda main_op     = raft::identity_op(),
+                              ReduceLambda reduce_op = raft::add_op(),
+                              FinalLambda final_op   = raft::identity_op())
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope("coalescedReductionMedium<%d>", TPB);
   coalescedReductionMediumKernel<TPB>
@@ -191,9 +192,9 @@ void coalescedReductionMedium(OutType* dots,
 template <typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void coalescedReductionMediumDispatcher(OutType* dots,
                                         const InType* data,
                                         IdxType D,
@@ -201,9 +202,9 @@ void coalescedReductionMediumDispatcher(OutType* dots,
                                         OutType init,
                                         cudaStream_t stream,
                                         bool inplace           = false,
-                                        MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                                        ReduceLambda reduce_op = raft::Sum<OutType>(),
-                                        FinalLambda final_op   = raft::Nop<OutType>())
+                                        MainLambda main_op     = raft::identity_op(),
+                                        ReduceLambda reduce_op = raft::add_op(),
+                                        FinalLambda final_op   = raft::identity_op())
 {
   // Note: for now, this kernel is only used when D > 256. If this changes in the future, use
   // smaller block sizes when relevant.
@@ -251,9 +252,9 @@ template <typename ThickPolicy,
           typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void coalescedReductionThick(OutType* dots,
                              const InType* data,
                              IdxType D,
@@ -261,9 +262,9 @@ void coalescedReductionThick(OutType* dots,
                              OutType init,
                              cudaStream_t stream,
                              bool inplace           = false,
-                             MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                             ReduceLambda reduce_op = raft::Sum<OutType>(),
-                             FinalLambda final_op   = raft::Nop<OutType>())
+                             MainLambda main_op     = raft::identity_op(),
+                             ReduceLambda reduce_op = raft::add_op(),
+                             FinalLambda final_op   = raft::identity_op())
 {
   common::nvtx::range<common::nvtx::domain::raft> fun_scope(
     "coalescedReductionThick<%d,%d>", ThickPolicy::ThreadsPerBlock, ThickPolicy::BlocksPerRow);
@@ -291,7 +292,7 @@ void coalescedReductionThick(OutType* dots,
                                      init,
                                      stream,
                                      inplace,
-                                     raft::Nop<OutType, IdxType>(),
+                                     raft::identity_op(),
                                      reduce_op,
                                      final_op);
 }
@@ -299,9 +300,9 @@ void coalescedReductionThick(OutType* dots,
 template <typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void coalescedReductionThickDispatcher(OutType* dots,
                                        const InType* data,
                                        IdxType D,
@@ -309,9 +310,9 @@ void coalescedReductionThickDispatcher(OutType* dots,
                                        OutType init,
                                        cudaStream_t stream,
                                        bool inplace           = false,
-                                       MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                                       ReduceLambda reduce_op = raft::Sum<OutType>(),
-                                       FinalLambda final_op   = raft::Nop<OutType>())
+                                       MainLambda main_op     = raft::identity_op(),
+                                       ReduceLambda reduce_op = raft::add_op(),
+                                       FinalLambda final_op   = raft::identity_op())
 {
   // Note: multiple elements per thread to take advantage of the sequential reduction and loop
   // unrolling
@@ -330,9 +331,9 @@ void coalescedReductionThickDispatcher(OutType* dots,
 template <typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void coalescedReduction(OutType* dots,
                         const InType* data,
                         IdxType D,
@@ -340,9 +341,9 @@ void coalescedReduction(OutType* dots,
                         OutType init,
                         cudaStream_t stream,
                         bool inplace           = false,
-                        MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                        ReduceLambda reduce_op = raft::Sum<OutType>(),
-                        FinalLambda final_op   = raft::Nop<OutType>())
+                        MainLambda main_op     = raft::identity_op(),
+                        ReduceLambda reduce_op = raft::add_op(),
+                        FinalLambda final_op   = raft::identity_op())
 {
   /* The primitive selects one of three implementations based on heuristics:
    *  - Thin: very efficient when D is small and/or N is large
diff --git a/cpp/include/raft/linalg/detail/divide.cuh b/cpp/include/raft/linalg/detail/divide.cuh
index 333cd3e83c..eef1d19d6e 100644
--- a/cpp/include/raft/linalg/detail/divide.cuh
+++ b/cpp/include/raft/linalg/detail/divide.cuh
@@ -16,9 +16,8 @@
 
 #pragma once
 
-#include "functional.cuh"
-
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/unary_op.cuh>
 
 namespace raft {
@@ -28,7 +27,7 @@ namespace detail {
 template <typename InT, typename OutT = InT, typename IdxType = int>
 void divideScalar(OutT* out, const InT* in, InT scalar, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::unaryOp(out, in, len, divides_scalar<InT, OutT>(scalar), stream);
+  raft::linalg::unaryOp(out, in, len, raft::div_const_op<InT>(scalar), stream);
 }
 
 };  // end namespace detail
diff --git a/cpp/include/raft/linalg/detail/eltwise.cuh b/cpp/include/raft/linalg/detail/eltwise.cuh
index 019f86a779..25b4ca0499 100644
--- a/cpp/include/raft/linalg/detail/eltwise.cuh
+++ b/cpp/include/raft/linalg/detail/eltwise.cuh
@@ -16,13 +16,10 @@
 
 #pragma once
 
-#include "functional.cuh"
-
+#include <raft/core/operators.hpp>
 #include <raft/linalg/binary_op.cuh>
 #include <raft/linalg/unary_op.cuh>
 
-#include <thrust/functional.h>
-
 namespace raft {
 namespace linalg {
 namespace detail {
@@ -30,48 +27,48 @@ namespace detail {
 template <typename InType, typename IdxType, typename OutType = InType>
 void scalarAdd(OutType* out, const InType* in, InType scalar, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::unaryOp(out, in, len, adds_scalar<InType, OutType>(scalar), stream);
+  raft::linalg::unaryOp(out, in, len, raft::add_const_op<InType>(scalar), stream);
 }
 
 template <typename InType, typename IdxType, typename OutType = InType>
 void scalarMultiply(OutType* out, const InType* in, InType scalar, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::unaryOp(out, in, len, multiplies_scalar<InType, OutType>(scalar), stream);
+  raft::linalg::unaryOp(out, in, len, raft::mul_const_op<InType>(scalar), stream);
 }
 
 template <typename InType, typename IdxType, typename OutType = InType>
 void eltwiseAdd(
   OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::binaryOp(out, in1, in2, len, thrust::plus<InType>(), stream);
+  raft::linalg::binaryOp(out, in1, in2, len, raft::add_op(), stream);
 }
 
 template <typename InType, typename IdxType, typename OutType = InType>
 void eltwiseSub(
   OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::binaryOp(out, in1, in2, len, thrust::minus<InType>(), stream);
+  raft::linalg::binaryOp(out, in1, in2, len, raft::sub_op(), stream);
 }
 
 template <typename InType, typename IdxType, typename OutType = InType>
 void eltwiseMultiply(
   OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::binaryOp(out, in1, in2, len, thrust::multiplies<InType>(), stream);
+  raft::linalg::binaryOp(out, in1, in2, len, raft::mul_op(), stream);
 }
 
 template <typename InType, typename IdxType, typename OutType = InType>
 void eltwiseDivide(
   OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::binaryOp(out, in1, in2, len, thrust::divides<InType>(), stream);
+  raft::linalg::binaryOp(out, in1, in2, len, raft::div_op(), stream);
 }
 
 template <typename InType, typename IdxType, typename OutType = InType>
 void eltwiseDivideCheckZero(
   OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::binaryOp(out, in1, in2, len, divides_check_zero<InType, OutType>(), stream);
+  raft::linalg::binaryOp(out, in1, in2, len, raft::div_checkzero_op(), stream);
 }
 
 };  // end namespace detail
diff --git a/cpp/include/raft/linalg/detail/functional.cuh b/cpp/include/raft/linalg/detail/functional.cuh
deleted file mode 100644
index 067b1565e0..0000000000
--- a/cpp/include/raft/linalg/detail/functional.cuh
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <thrust/functional.h>
-
-namespace raft {
-namespace linalg {
-namespace detail {
-
-template <typename ArgType, typename ReturnType = ArgType>
-struct divides_scalar {
- public:
-  divides_scalar(ArgType scalar) : scalar_(scalar) {}
-
-  __host__ __device__ inline ReturnType operator()(ArgType in) { return in / scalar_; }
-
- private:
-  ArgType scalar_;
-};
-
-template <typename ArgType, typename ReturnType = ArgType>
-struct adds_scalar {
- public:
-  adds_scalar(ArgType scalar) : scalar_(scalar) {}
-
-  __host__ __device__ inline ReturnType operator()(ArgType in) { return in + scalar_; }
-
- private:
-  ArgType scalar_;
-};
-
-template <typename ArgType, typename ReturnType = ArgType>
-struct multiplies_scalar {
- public:
-  multiplies_scalar(ArgType scalar) : scalar_(scalar) {}
-
-  __host__ __device__ inline ReturnType operator()(ArgType in) { return in * scalar_; }
-
- private:
-  ArgType scalar_;
-};
-
-template <typename ArgType, typename ReturnType = ArgType>
-struct divides_check_zero {
- public:
-  __host__ __device__ inline ReturnType operator()(ArgType a, ArgType b)
-  {
-    return (b == static_cast<ArgType>(0)) ? 0.0 : a / b;
-  }
-};
-
-}  // namespace detail
-}  // namespace linalg
-}  // namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/linalg/detail/multiply.cuh b/cpp/include/raft/linalg/detail/multiply.cuh
index f1a8548bfa..84b832d875 100644
--- a/cpp/include/raft/linalg/detail/multiply.cuh
+++ b/cpp/include/raft/linalg/detail/multiply.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/operators.hpp>
 #include <raft/linalg/unary_op.cuh>
 
 namespace raft {
@@ -26,8 +27,7 @@ template <typename math_t, typename IdxType = int>
 void multiplyScalar(
   math_t* out, const math_t* in, const math_t scalar, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::unaryOp(
-    out, in, len, [scalar] __device__(math_t in) { return in * scalar; }, stream);
+  raft::linalg::unaryOp(out, in, len, raft::mul_const_op<math_t>{scalar}, stream);
 }
 
 };  // end namespace detail
diff --git a/cpp/include/raft/linalg/detail/norm.cuh b/cpp/include/raft/linalg/detail/norm.cuh
index f2f08233d5..ed7e360848 100644
--- a/cpp/include/raft/linalg/detail/norm.cuh
+++ b/cpp/include/raft/linalg/detail/norm.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/operators.hpp>
 #include <raft/linalg/norm_types.hpp>
 #include <raft/linalg/reduce.cuh>
 
@@ -44,8 +45,8 @@ void rowNormCaller(Type* dots,
                                                 true,
                                                 stream,
                                                 false,
-                                                raft::L1Op<Type, IdxType>(),
-                                                raft::Sum<Type>(),
+                                                raft::abs_op(),
+                                                raft::add_op(),
                                                 fin_op);
       break;
     case L2Norm:
@@ -58,8 +59,8 @@ void rowNormCaller(Type* dots,
                                                 true,
                                                 stream,
                                                 false,
-                                                raft::L2Op<Type>(),
-                                                raft::Sum<Type>(),
+                                                raft::sq_op(),
+                                                raft::add_op(),
                                                 fin_op);
       break;
     case LinfNorm:
@@ -72,8 +73,8 @@ void rowNormCaller(Type* dots,
                                                 true,
                                                 stream,
                                                 false,
-                                                raft::L1Op<Type>(),
-                                                raft::Max<Type>(),
+                                                raft::abs_op(),
+                                                raft::max_op(),
                                                 fin_op);
       break;
     default: THROW("Unsupported norm type: %d", type);
@@ -101,8 +102,8 @@ void colNormCaller(Type* dots,
                                                 false,
                                                 stream,
                                                 false,
-                                                raft::L1Op<Type>(),
-                                                raft::Sum<Type>(),
+                                                raft::abs_op(),
+                                                raft::add_op(),
                                                 fin_op);
       break;
     case L2Norm:
@@ -115,8 +116,8 @@ void colNormCaller(Type* dots,
                                                 false,
                                                 stream,
                                                 false,
-                                                raft::L2Op<Type>(),
-                                                raft::Sum<Type>(),
+                                                raft::sq_op(),
+                                                raft::add_op(),
                                                 fin_op);
       break;
     case LinfNorm:
@@ -129,8 +130,8 @@ void colNormCaller(Type* dots,
                                                 false,
                                                 stream,
                                                 false,
-                                                raft::L1Op<Type>(),
-                                                raft::Max<Type>(),
+                                                raft::abs_op(),
+                                                raft::max_op(),
                                                 fin_op);
       break;
     default: THROW("Unsupported norm type: %d", type);
diff --git a/cpp/include/raft/linalg/detail/reduce.cuh b/cpp/include/raft/linalg/detail/reduce.cuh
index 3022973b43..721ca8179f 100644
--- a/cpp/include/raft/linalg/detail/reduce.cuh
+++ b/cpp/include/raft/linalg/detail/reduce.cuh
@@ -16,9 +16,9 @@
 
 #pragma once
 
+#include <raft/core/operators.hpp>
 #include <raft/linalg/coalesced_reduction.cuh>
 #include <raft/linalg/strided_reduction.cuh>
-#include <raft/util/cuda_utils.cuh>
 
 namespace raft {
 namespace linalg {
@@ -27,9 +27,9 @@ namespace detail {
 template <typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void reduce(OutType* dots,
             const InType* data,
             IdxType D,
@@ -39,9 +39,9 @@ void reduce(OutType* dots,
             bool alongRows,
             cudaStream_t stream,
             bool inplace           = false,
-            MainLambda main_op     = raft::Nop<InType, IdxType>(),
-            ReduceLambda reduce_op = raft::Sum<OutType>(),
-            FinalLambda final_op   = raft::Nop<OutType>())
+            MainLambda main_op     = raft::identity_op(),
+            ReduceLambda reduce_op = raft::add_op(),
+            FinalLambda final_op   = raft::identity_op())
 {
   if (rowMajor && alongRows) {
     raft::linalg::coalescedReduction<InType, OutType, IdxType>(
diff --git a/cpp/include/raft/linalg/detail/strided_reduction.cuh b/cpp/include/raft/linalg/detail/strided_reduction.cuh
index d72bd54a32..0e516b4750 100644
--- a/cpp/include/raft/linalg/detail/strided_reduction.cuh
+++ b/cpp/include/raft/linalg/detail/strided_reduction.cuh
@@ -18,6 +18,7 @@
 
 #include "unary_op.cuh"
 #include <cub/cub.cuh>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <type_traits>
@@ -107,9 +108,9 @@ __global__ void stridedReductionKernel(OutType* dots,
 template <typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void stridedReduction(OutType* dots,
                       const InType* data,
                       IdxType D,
@@ -117,15 +118,13 @@ void stridedReduction(OutType* dots,
                       OutType init,
                       cudaStream_t stream,
                       bool inplace           = false,
-                      MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                      ReduceLambda reduce_op = raft::Sum<OutType>(),
-                      FinalLambda final_op   = raft::Nop<OutType>())
+                      MainLambda main_op     = raft::identity_op(),
+                      ReduceLambda reduce_op = raft::add_op(),
+                      FinalLambda final_op   = raft::identity_op())
 {
   ///@todo: this extra should go away once we have eliminated the need
   /// for atomics in stridedKernel (redesign for this is already underway)
-  if (!inplace)
-    raft::linalg::unaryOp(
-      dots, dots, D, [init] __device__(OutType a) { return init; }, stream);
+  if (!inplace) raft::linalg::unaryOp(dots, dots, D, raft::const_op(init), stream);
 
   // Arbitrary numbers for now, probably need to tune
   const dim3 thrds(32, 16);
@@ -137,7 +136,7 @@ void stridedReduction(OutType* dots,
 
   ///@todo: this complication should go away once we have eliminated the need
   /// for atomics in stridedKernel (redesign for this is already underway)
-  if constexpr (std::is_same<ReduceLambda, raft::Sum<OutType>>::value &&
+  if constexpr (std::is_same<ReduceLambda, raft::add_op>::value &&
                 std::is_same<InType, OutType>::value)
     stridedSummationKernel<InType>
       <<<nblks, thrds, shmemSize, stream>>>(dots, data, D, N, init, main_op);
@@ -148,7 +147,7 @@ void stridedReduction(OutType* dots,
   ///@todo: this complication should go away once we have eliminated the need
   /// for atomics in stridedKernel (redesign for this is already underway)
   // Perform final op on output data
-  if (!std::is_same<FinalLambda, raft::Nop<OutType>>::value)
+  if (!std::is_same<FinalLambda, raft::identity_op>::value)
     raft::linalg::unaryOp(dots, dots, D, final_op, stream);
 }
 
diff --git a/cpp/include/raft/linalg/detail/subtract.cuh b/cpp/include/raft/linalg/detail/subtract.cuh
index ae0f09d2fe..6df09df8ed 100644
--- a/cpp/include/raft/linalg/detail/subtract.cuh
+++ b/cpp/include/raft/linalg/detail/subtract.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/operators.hpp>
 #include <raft/linalg/binary_op.cuh>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -27,15 +28,13 @@ namespace detail {
 template <typename InT, typename OutT = InT, typename IdxType = int>
 void subtractScalar(OutT* out, const InT* in, InT scalar, IdxType len, cudaStream_t stream)
 {
-  auto op = [scalar] __device__(InT in) { return OutT(in - scalar); };
-  raft::linalg::unaryOp<InT, decltype(op), IdxType, OutT>(out, in, len, op, stream);
+  raft::linalg::unaryOp(out, in, len, raft::sub_const_op<InT>(scalar), stream);
 }
 
 template <typename InT, typename OutT = InT, typename IdxType = int>
 void subtract(OutT* out, const InT* in1, const InT* in2, IdxType len, cudaStream_t stream)
 {
-  auto op = [] __device__(InT a, InT b) { return OutT(a - b); };
-  raft::linalg::binaryOp<InT, decltype(op), OutT, IdxType>(out, in1, in2, len, op, stream);
+  raft::linalg::binaryOp(out, in1, in2, len, raft::sub_op(), stream);
 }
 
 template <class math_t, typename IdxType>
diff --git a/cpp/include/raft/linalg/divide.cuh b/cpp/include/raft/linalg/divide.cuh
index 53b083045e..526d8a9716 100644
--- a/cpp/include/raft/linalg/divide.cuh
+++ b/cpp/include/raft/linalg/divide.cuh
@@ -21,13 +21,12 @@
 #include "detail/divide.cuh"
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/util/cuda_utils.cuh>
 #include <raft/util/input_validation.hpp>
 
 namespace raft {
 namespace linalg {
 
-using detail::divides_scalar;
-
 /**
  * @defgroup ScalarOps Scalar operations on the input buffer
  * @tparam OutT output data-type upon which the math operation will be performed
diff --git a/cpp/include/raft/linalg/eltwise.cuh b/cpp/include/raft/linalg/eltwise.cuh
index dbc06a4af3..2e6c1a4ab5 100644
--- a/cpp/include/raft/linalg/eltwise.cuh
+++ b/cpp/include/raft/linalg/eltwise.cuh
@@ -23,8 +23,6 @@
 namespace raft {
 namespace linalg {
 
-using detail::adds_scalar;
-
 /**
  * @defgroup ScalarOps Scalar operations on the input buffer
  * @tparam InType data-type upon which the math operation will be performed
@@ -42,8 +40,6 @@ void scalarAdd(OutType* out, const InType* in, InType scalar, IdxType len, cudaS
   detail::scalarAdd(out, in, scalar, len, stream);
 }
 
-using detail::multiplies_scalar;
-
 template <typename InType, typename IdxType, typename OutType = InType>
 void scalarMultiply(OutType* out, const InType* in, InType scalar, IdxType len, cudaStream_t stream)
 {
@@ -90,8 +86,6 @@ void eltwiseDivide(
   detail::eltwiseDivide(out, in1, in2, len, stream);
 }
 
-using detail::divides_check_zero;
-
 template <typename InType, typename IdxType, typename OutType = InType>
 void eltwiseDivideCheckZero(
   OutType* out, const InType* in1, const InType* in2, IdxType len, cudaStream_t stream)
diff --git a/cpp/include/raft/linalg/norm.cuh b/cpp/include/raft/linalg/norm.cuh
index 19757eb86d..b64b128fa2 100644
--- a/cpp/include/raft/linalg/norm.cuh
+++ b/cpp/include/raft/linalg/norm.cuh
@@ -22,6 +22,7 @@
 #include "linalg_types.hpp"
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/norm_types.hpp>
 #include <raft/util/input_validation.hpp>
 
@@ -47,7 +48,7 @@ namespace linalg {
  * @param stream cuda stream where to launch work
  * @param fin_op the final lambda op
  */
-template <typename Type, typename IdxType = int, typename Lambda = raft::Nop<Type, IdxType>>
+template <typename Type, typename IdxType = int, typename Lambda = raft::identity_op>
 void rowNorm(Type* dots,
              const Type* data,
              IdxType D,
@@ -55,7 +56,7 @@ void rowNorm(Type* dots,
              NormType type,
              bool rowMajor,
              cudaStream_t stream,
-             Lambda fin_op = raft::Nop<Type, IdxType>())
+             Lambda fin_op = raft::identity_op())
 {
   detail::rowNormCaller(dots, data, D, N, type, rowMajor, stream, fin_op);
 }
@@ -74,7 +75,7 @@ void rowNorm(Type* dots,
  * @param stream cuda stream where to launch work
  * @param fin_op the final lambda op
  */
-template <typename Type, typename IdxType = int, typename Lambda = raft::Nop<Type, IdxType>>
+template <typename Type, typename IdxType = int, typename Lambda = raft::identity_op>
 void colNorm(Type* dots,
              const Type* data,
              IdxType D,
@@ -82,7 +83,7 @@ void colNorm(Type* dots,
              NormType type,
              bool rowMajor,
              cudaStream_t stream,
-             Lambda fin_op = raft::Nop<Type, IdxType>())
+             Lambda fin_op = raft::identity_op())
 {
   detail::colNormCaller(dots, data, D, N, type, rowMajor, stream, fin_op);
 }
@@ -109,13 +110,13 @@ void colNorm(Type* dots,
 template <typename ElementType,
           typename LayoutPolicy,
           typename IndexType,
-          typename Lambda = raft::Nop<ElementType, IndexType>>
+          typename Lambda = raft::identity_op>
 void norm(const raft::handle_t& handle,
           raft::device_matrix_view<const ElementType, IndexType, LayoutPolicy> in,
           raft::device_vector_view<ElementType, IndexType> out,
           NormType type,
           Apply apply,
-          Lambda fin_op = raft::Nop<ElementType, IndexType>())
+          Lambda fin_op = raft::identity_op())
 {
   RAFT_EXPECTS(raft::is_row_or_column_major(in), "Input must be contiguous");
 
diff --git a/cpp/include/raft/linalg/normalize.cuh b/cpp/include/raft/linalg/normalize.cuh
index e64436762c..bf6ef5a570 100644
--- a/cpp/include/raft/linalg/normalize.cuh
+++ b/cpp/include/raft/linalg/normalize.cuh
@@ -18,6 +18,7 @@
 
 #include "detail/normalize.cuh"
 
+#include <raft/core/operators.hpp>
 #include <raft/linalg/norm_types.hpp>
 
 namespace raft {
@@ -99,34 +100,16 @@ void row_normalize(const raft::handle_t& handle,
 {
   switch (norm_type) {
     case L1Norm:
-      row_normalize(handle,
-                    in,
-                    out,
-                    ElementType(0),
-                    raft::L1Op<ElementType>(),
-                    raft::Sum<ElementType>(),
-                    raft::Nop<ElementType>(),
-                    eps);
+      row_normalize(
+        handle, in, out, ElementType(0), raft::abs_op(), raft::add_op(), raft::identity_op(), eps);
       break;
     case L2Norm:
-      row_normalize(handle,
-                    in,
-                    out,
-                    ElementType(0),
-                    raft::L2Op<ElementType>(),
-                    raft::Sum<ElementType>(),
-                    raft::SqrtOp<ElementType>(),
-                    eps);
+      row_normalize(
+        handle, in, out, ElementType(0), raft::sq_op(), raft::add_op(), raft::sqrt_op(), eps);
       break;
     case LinfNorm:
-      row_normalize(handle,
-                    in,
-                    out,
-                    ElementType(0),
-                    raft::L1Op<ElementType>(),
-                    raft::Max<ElementType>(),
-                    raft::Nop<ElementType>(),
-                    eps);
+      row_normalize(
+        handle, in, out, ElementType(0), raft::abs_op(), raft::max_op(), raft::identity_op(), eps);
       break;
     default: THROW("Unsupported norm type: %d", norm_type);
   }
diff --git a/cpp/include/raft/linalg/power.cuh b/cpp/include/raft/linalg/power.cuh
index acd226b71d..59c2cdf314 100644
--- a/cpp/include/raft/linalg/power.cuh
+++ b/cpp/include/raft/linalg/power.cuh
@@ -19,9 +19,9 @@
 #pragma once
 
 #include <raft/core/host_mdspan.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/binary_op.cuh>
 #include <raft/linalg/unary_op.cuh>
-#include <raft/util/cuda_utils.cuh>
 #include <raft/util/input_validation.hpp>
 
 namespace raft {
@@ -41,8 +41,7 @@ namespace linalg {
 template <typename in_t, typename out_t = in_t, typename IdxType = int>
 void powerScalar(out_t* out, const in_t* in, const in_t scalar, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::unaryOp(
-    out, in, len, [scalar] __device__(in_t in) { return raft::myPow(in, scalar); }, stream);
+  raft::linalg::unaryOp(out, in, len, raft::pow_const_op<in_t>(scalar), stream);
 }
 /** @} */
 
@@ -61,8 +60,7 @@ void powerScalar(out_t* out, const in_t* in, const in_t scalar, IdxType len, cud
 template <typename in_t, typename out_t = in_t, typename IdxType = int>
 void power(out_t* out, const in_t* in1, const in_t* in2, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::binaryOp(
-    out, in1, in2, len, [] __device__(in_t a, in_t b) { return raft::myPow(a, b); }, stream);
+  raft::linalg::binaryOp(out, in1, in2, len, raft::pow_op(), stream);
 }
 /** @} */
 
diff --git a/cpp/include/raft/linalg/reduce.cuh b/cpp/include/raft/linalg/reduce.cuh
index 5579acf355..3eb8196408 100644
--- a/cpp/include/raft/linalg/reduce.cuh
+++ b/cpp/include/raft/linalg/reduce.cuh
@@ -22,6 +22,7 @@
 #include "linalg_types.hpp"
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/util/input_validation.hpp>
 
 namespace raft {
@@ -59,9 +60,9 @@ namespace linalg {
 template <typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void reduce(OutType* dots,
             const InType* data,
             IdxType D,
@@ -71,9 +72,9 @@ void reduce(OutType* dots,
             bool alongRows,
             cudaStream_t stream,
             bool inplace           = false,
-            MainLambda main_op     = raft::Nop<InType, IdxType>(),
-            ReduceLambda reduce_op = raft::Sum<OutType>(),
-            FinalLambda final_op   = raft::Nop<OutType>())
+            MainLambda main_op     = raft::identity_op(),
+            ReduceLambda reduce_op = raft::add_op(),
+            FinalLambda final_op   = raft::identity_op())
 {
   detail::reduce<InType, OutType, IdxType>(
     dots, data, D, N, init, rowMajor, alongRows, stream, inplace, main_op, reduce_op, final_op);
@@ -118,18 +119,18 @@ template <typename InElementType,
           typename LayoutPolicy,
           typename OutElementType = InElementType,
           typename IdxType        = std::uint32_t,
-          typename MainLambda     = raft::Nop<InElementType, IdxType>,
-          typename ReduceLambda   = raft::Sum<OutElementType>,
-          typename FinalLambda    = raft::Nop<OutElementType>>
+          typename MainLambda     = raft::identity_op,
+          typename ReduceLambda   = raft::add_op,
+          typename FinalLambda    = raft::identity_op>
 void reduce(const raft::handle_t& handle,
             raft::device_matrix_view<const InElementType, IdxType, LayoutPolicy> data,
             raft::device_vector_view<OutElementType, IdxType> dots,
             OutElementType init,
             Apply apply,
             bool inplace           = false,
-            MainLambda main_op     = raft::Nop<InElementType, IdxType>(),
-            ReduceLambda reduce_op = raft::Sum<OutElementType>(),
-            FinalLambda final_op   = raft::Nop<OutElementType>())
+            MainLambda main_op     = raft::identity_op(),
+            ReduceLambda reduce_op = raft::add_op(),
+            FinalLambda final_op   = raft::identity_op())
 {
   RAFT_EXPECTS(raft::is_row_or_column_major(data), "Input must be contiguous");
 
diff --git a/cpp/include/raft/linalg/sqrt.cuh b/cpp/include/raft/linalg/sqrt.cuh
index 2951285c3a..ad6cad2eb2 100644
--- a/cpp/include/raft/linalg/sqrt.cuh
+++ b/cpp/include/raft/linalg/sqrt.cuh
@@ -19,8 +19,8 @@
 #pragma once
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/unary_op.cuh>
-#include <raft/util/cuda_utils.cuh>
 
 namespace raft {
 namespace linalg {
@@ -38,8 +38,7 @@ namespace linalg {
 template <typename in_t, typename out_t = in_t, typename IdxType = int>
 void sqrt(out_t* out, const in_t* in, IdxType len, cudaStream_t stream)
 {
-  raft::linalg::unaryOp(
-    out, in, len, [] __device__(in_t in) { return raft::mySqrt(in); }, stream);
+  raft::linalg::unaryOp(out, in, len, raft::sqrt_op{}, stream);
 }
 /** @} */
 
diff --git a/cpp/include/raft/linalg/strided_reduction.cuh b/cpp/include/raft/linalg/strided_reduction.cuh
index 0aa4aecef5..d9c26910e7 100644
--- a/cpp/include/raft/linalg/strided_reduction.cuh
+++ b/cpp/include/raft/linalg/strided_reduction.cuh
@@ -22,6 +22,7 @@
 #include "detail/strided_reduction.cuh"
 
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/handle.hpp>
 
 #include <type_traits>
@@ -59,9 +60,9 @@ namespace linalg {
 template <typename InType,
           typename OutType      = InType,
           typename IdxType      = int,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<OutType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void stridedReduction(OutType* dots,
                       const InType* data,
                       IdxType D,
@@ -69,9 +70,9 @@ void stridedReduction(OutType* dots,
                       OutType init,
                       cudaStream_t stream,
                       bool inplace           = false,
-                      MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                      ReduceLambda reduce_op = raft::Sum<OutType>(),
-                      FinalLambda final_op   = raft::Nop<OutType>())
+                      MainLambda main_op     = raft::identity_op(),
+                      ReduceLambda reduce_op = raft::add_op(),
+                      FinalLambda final_op   = raft::identity_op())
 {
   // Only compile for types supported by myAtomicReduce, but don't make the compilation fail in
   // other cases, because coalescedReduction supports arbitrary types.
@@ -124,17 +125,17 @@ template <typename InValueType,
           typename LayoutPolicy,
           typename OutValueType,
           typename IndexType,
-          typename MainLambda   = raft::Nop<InValueType>,
-          typename ReduceLambda = raft::Sum<OutValueType>,
-          typename FinalLambda  = raft::Nop<OutValueType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void strided_reduction(const raft::handle_t& handle,
                        raft::device_matrix_view<const InValueType, IndexType, LayoutPolicy> data,
                        raft::device_vector_view<OutValueType, IndexType> dots,
                        OutValueType init,
                        bool inplace           = false,
-                       MainLambda main_op     = raft::Nop<InValueType>(),
-                       ReduceLambda reduce_op = raft::Sum<OutValueType>(),
-                       FinalLambda final_op   = raft::Nop<OutValueType>())
+                       MainLambda main_op     = raft::identity_op(),
+                       ReduceLambda reduce_op = raft::add_op(),
+                       FinalLambda final_op   = raft::identity_op())
 {
   if constexpr (std::is_same_v<LayoutPolicy, raft::row_major>) {
     RAFT_EXPECTS(static_cast<IndexType>(dots.size()) == data.extent(1),
diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh
index 3738afba5d..c006f69e47 100644
--- a/cpp/include/raft/matrix/detail/gather.cuh
+++ b/cpp/include/raft/matrix/detail/gather.cuh
@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include <raft/core/operators.hpp>
+
 namespace raft {
 namespace matrix {
 namespace detail {
@@ -183,16 +185,7 @@ void gather(const MatrixIteratorT in,
 {
   typedef typename std::iterator_traits<MapIteratorT>::value_type MapValueT;
   gatherImpl(
-    in,
-    D,
-    N,
-    map,
-    map,
-    map_length,
-    out,
-    [] __device__(MapValueT val) { return true; },
-    [] __device__(MapValueT val) { return val; },
-    stream);
+    in, D, N, map, map, map_length, out, raft::const_op(true), raft::identity_op(), stream);
 }
 
 /**
@@ -227,17 +220,7 @@ void gather(const MatrixIteratorT in,
             cudaStream_t stream)
 {
   typedef typename std::iterator_traits<MapIteratorT>::value_type MapValueT;
-  gatherImpl(
-    in,
-    D,
-    N,
-    map,
-    map,
-    map_length,
-    out,
-    [] __device__(MapValueT val) { return true; },
-    transform_op,
-    stream);
+  gatherImpl(in, D, N, map, map, map_length, out, raft::const_op(true), transform_op, stream);
 }
 
 /**
@@ -279,17 +262,7 @@ void gather_if(const MatrixIteratorT in,
                cudaStream_t stream)
 {
   typedef typename std::iterator_traits<MapIteratorT>::value_type MapValueT;
-  gatherImpl(
-    in,
-    D,
-    N,
-    map,
-    stencil,
-    map_length,
-    out,
-    pred_op,
-    [] __device__(MapValueT val) { return val; },
-    stream);
+  gatherImpl(in, D, N, map, stencil, map_length, out, pred_op, raft::identity_op(), stream);
 }
 
 /**
diff --git a/cpp/include/raft/matrix/detail/math.cuh b/cpp/include/raft/matrix/detail/math.cuh
index 64c85a03a5..c559da3942 100644
--- a/cpp/include/raft/matrix/detail/math.cuh
+++ b/cpp/include/raft/matrix/detail/math.cuh
@@ -19,6 +19,7 @@
 #include <raft/core/handle.hpp>
 
 #include <cub/cub.cuh>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/binary_op.cuh>
 #include <raft/linalg/map_then_reduce.cuh>
 #include <raft/linalg/matrix_vector_op.cuh>
@@ -188,8 +189,7 @@ void reciprocal(math_t* in, math_t* out, IdxType len, cudaStream_t stream)
 template <typename math_t>
 void setValue(math_t* out, const math_t* in, math_t scalar, int len, cudaStream_t stream = 0)
 {
-  raft::linalg::unaryOp(
-    out, in, len, [scalar] __device__(math_t in) { return scalar; }, stream);
+  raft::linalg::unaryOp(out, in, len, raft::const_op(scalar), stream);
 }
 
 template <typename math_t, typename IdxType = int>
@@ -201,8 +201,7 @@ void ratio(
 
   rmm::device_scalar<math_t> d_sum(stream);
   auto* d_sum_ptr = d_sum.data();
-  auto no_op      = [] __device__(math_t in) { return in; };
-  raft::linalg::mapThenSumReduce(d_sum_ptr, len, no_op, stream, src);
+  raft::linalg::mapThenSumReduce(d_sum_ptr, len, raft::identity_op{}, stream, src);
   raft::linalg::unaryOp(
     d_dest, d_src, len, [=] __device__(math_t a) { return a / (*d_sum_ptr); }, stream);
 }
@@ -217,15 +216,7 @@ void matrixVectorBinaryMult(Type* data,
                             cudaStream_t stream)
 {
   raft::linalg::matrixVectorOp(
-    data,
-    data,
-    vec,
-    n_col,
-    n_row,
-    rowMajor,
-    bcastAlongRows,
-    [] __device__(Type a, Type b) { return a * b; },
-    stream);
+    data, data, vec, n_col, n_row, rowMajor, bcastAlongRows, raft::mul_op(), stream);
 }
 
 template <typename Type, typename IdxType = int, int TPB = 256>
@@ -264,15 +255,7 @@ void matrixVectorBinaryDiv(Type* data,
                            cudaStream_t stream)
 {
   raft::linalg::matrixVectorOp(
-    data,
-    data,
-    vec,
-    n_col,
-    n_row,
-    rowMajor,
-    bcastAlongRows,
-    [] __device__(Type a, Type b) { return a / b; },
-    stream);
+    data, data, vec, n_col, n_row, rowMajor, bcastAlongRows, raft::div_op(), stream);
 }
 
 template <typename Type, typename IdxType = int, int TPB = 256>
@@ -330,15 +313,7 @@ void matrixVectorBinaryAdd(Type* data,
                            cudaStream_t stream)
 {
   raft::linalg::matrixVectorOp(
-    data,
-    data,
-    vec,
-    n_col,
-    n_row,
-    rowMajor,
-    bcastAlongRows,
-    [] __device__(Type a, Type b) { return a + b; },
-    stream);
+    data, data, vec, n_col, n_row, rowMajor, bcastAlongRows, raft::add_op(), stream);
 }
 
 template <typename Type, typename IdxType = int, int TPB = 256>
@@ -351,15 +326,7 @@ void matrixVectorBinarySub(Type* data,
                            cudaStream_t stream)
 {
   raft::linalg::matrixVectorOp(
-    data,
-    data,
-    vec,
-    n_col,
-    n_row,
-    rowMajor,
-    bcastAlongRows,
-    [] __device__(Type a, Type b) { return a - b; },
-    stream);
+    data, data, vec, n_col, n_row, rowMajor, bcastAlongRows, raft::sub_op(), stream);
 }
 
 // Computes an argmin/argmax column-wise in a DxN matrix
diff --git a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
index 0707eb2a9b..01b665e207 100644
--- a/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
+++ b/cpp/include/raft/sparse/distance/detail/lp_distance.cuh
@@ -18,6 +18,7 @@
 
 #include <limits.h>
 
+#include <raft/core/operators.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -193,13 +194,12 @@ class lp_unexpanded_distances_t : public distances_t<value_t> {
   {
     unexpanded_lp_distances<value_idx, value_t>(out_dists, config_, PDiff(p), Sum(), AtomicAdd());
 
-    float one_over_p = 1.0f / p;
-    raft::linalg::unaryOp<value_t>(
-      out_dists,
-      out_dists,
-      config_->a_nrows * config_->b_nrows,
-      [=] __device__(value_t input) { return pow(input, one_over_p); },
-      config_->handle.get_stream());
+    value_t one_over_p = value_t{1} / p;
+    raft::linalg::unaryOp<value_t>(out_dists,
+                                   out_dists,
+                                   config_->a_nrows * config_->b_nrows,
+                                   raft::pow_const_op<value_t>(one_over_p),
+                                   config_->handle.get_stream());
   }
 
  private:
@@ -220,12 +220,11 @@ class hamming_unexpanded_distances_t : public distances_t<value_t> {
     unexpanded_lp_distances<value_idx, value_t>(out_dists, config_, NotEqual(), Sum(), AtomicAdd());
 
     value_t n_cols = 1.0 / config_->a_ncols;
-    raft::linalg::unaryOp<value_t>(
-      out_dists,
-      out_dists,
-      config_->a_nrows * config_->b_nrows,
-      [=] __device__(value_t input) { return input * n_cols; },
-      config_->handle.get_stream());
+    raft::linalg::unaryOp<value_t>(out_dists,
+                                   out_dists,
+                                   config_->a_nrows * config_->b_nrows,
+                                   raft::mul_const_op<value_t>(n_cols),
+                                   config_->handle.get_stream());
   }
 
  private:
@@ -302,12 +301,11 @@ class kl_divergence_unexpanded_distances_t : public distances_t<value_t> {
       Sum(),
       AtomicAdd());
 
-    raft::linalg::unaryOp<value_t>(
-      out_dists,
-      out_dists,
-      config_->a_nrows * config_->b_nrows,
-      [=] __device__(value_t input) { return 0.5 * input; },
-      config_->handle.get_stream());
+    raft::linalg::unaryOp<value_t>(out_dists,
+                                   out_dists,
+                                   config_->a_nrows * config_->b_nrows,
+                                   raft::mul_const_op<value_t>(0.5),
+                                   config_->handle.get_stream());
   }
 
  private:
diff --git a/cpp/include/raft/sparse/op/detail/slice.cuh b/cpp/include/raft/sparse/op/detail/slice.cuh
index 193d246b4b..4d2f1a4195 100644
--- a/cpp/include/raft/sparse/op/detail/slice.cuh
+++ b/cpp/include/raft/sparse/op/detail/slice.cuh
@@ -18,6 +18,7 @@
 
 #include <cusparse_v2.h>
 
+#include <raft/core/operators.hpp>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/util/cuda_utils.cuh>
@@ -70,12 +71,11 @@ void csr_row_slice_indptr(value_idx start_row,
   // we add another 1 to stop row.
   raft::copy_async(indptr_out, indptr + start_row, (stop_row + 2) - start_row, stream);
 
-  raft::linalg::unaryOp<value_idx>(
-    indptr_out,
-    indptr_out,
-    (stop_row + 2) - start_row,
-    [s_offset] __device__(value_idx input) { return input - s_offset; },
-    stream);
+  raft::linalg::unaryOp<value_idx>(indptr_out,
+                                   indptr_out,
+                                   (stop_row + 2) - start_row,
+                                   raft::sub_const_op<value_idx>(s_offset),
+                                   stream);
 }
 
 /**
diff --git a/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh b/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh
index e6c0b83161..10f781d817 100644
--- a/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ann_quantized.cuh
@@ -22,6 +22,7 @@
 
 #include "common_faiss.h"
 #include "processing.cuh"
+#include <raft/core/operators.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -196,11 +197,7 @@ void approx_knn_search(const handle_t& handle,
     float p = 0.5;  // standard l2
     if (index->metric == raft::distance::DistanceType::LpUnexpanded) p = 1.0 / index->metricArg;
     raft::linalg::unaryOp<float>(
-      distances,
-      distances,
-      n * k,
-      [p] __device__(float input) { return powf(input, p); },
-      handle.get_stream());
+      distances, distances, n * k, raft::pow_const_op<float>(p), handle.get_stream());
   }
   if constexpr (std::is_same_v<T, float>) { index->metric_processor->postprocess(distances); }
 }
diff --git a/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh b/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh
index 41f1df85fe..85a05877f1 100644
--- a/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh
+++ b/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh
@@ -20,8 +20,10 @@
 #include <raft/linalg/norm.cuh>
 // TODO: Need to hide the PairwiseDistance class impl and expose to public API
 #include "processing.cuh"
+#include <raft/core/operators.hpp>
 #include <raft/distance/detail/distance.cuh>
 #include <raft/distance/detail/pairwise_distance_base.cuh>
+#include <raft/util/cuda_utils.cuh>
 
 namespace raft {
 namespace spatial {
@@ -566,8 +568,6 @@ void fusedL2UnexpKnnImpl(const DataT* x,
     acc += diff * diff;
   };
 
-  auto fin_op = [] __device__(AccT d_val, int g_d_idx) { return d_val; };
-
   typedef cub::KeyValuePair<uint32_t, AccT> Pair;
 
   if (isRowMajor) {
@@ -578,7 +578,7 @@ void fusedL2UnexpKnnImpl(const DataT* x,
                                                           IdxT,
                                                           KPolicy,
                                                           decltype(core_lambda),
-                                                          decltype(fin_op),
+                                                          raft::identity_op,
                                                           32,
                                                           2,
                                                           usePrevTopKs,
@@ -590,7 +590,7 @@ void fusedL2UnexpKnnImpl(const DataT* x,
                                                           IdxT,
                                                           KPolicy,
                                                           decltype(core_lambda),
-                                                          decltype(fin_op),
+                                                          raft::identity_op,
                                                           64,
                                                           3,
                                                           usePrevTopKs,
@@ -630,7 +630,7 @@ void fusedL2UnexpKnnImpl(const DataT* x,
                                                                   ldb,
                                                                   ldd,
                                                                   core_lambda,
-                                                                  fin_op,
+                                                                  raft::identity_op{},
                                                                   sqrt,
                                                                   (uint32_t)numOfNN,
                                                                   (int*)workspace,
@@ -757,8 +757,6 @@ void fusedL2ExpKnnImpl(const DataT* x,
   // Accumulation operation lambda
   auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { acc += x * y; };
 
-  auto fin_op = [] __device__(AccT d_val, int g_d_idx) { return d_val; };
-
   typedef cub::KeyValuePair<uint32_t, AccT> Pair;
 
   if (isRowMajor) {
@@ -769,7 +767,7 @@ void fusedL2ExpKnnImpl(const DataT* x,
                                                         IdxT,
                                                         KPolicy,
                                                         decltype(core_lambda),
-                                                        decltype(fin_op),
+                                                        raft::identity_op,
                                                         32,
                                                         2,
                                                         usePrevTopKs,
@@ -781,7 +779,7 @@ void fusedL2ExpKnnImpl(const DataT* x,
                                                         IdxT,
                                                         KPolicy,
                                                         decltype(core_lambda),
-                                                        decltype(fin_op),
+                                                        raft::identity_op,
                                                         64,
                                                         3,
                                                         usePrevTopKs,
@@ -818,14 +816,15 @@ void fusedL2ExpKnnImpl(const DataT* x,
     DataT* xn = (DataT*)workspace;
     DataT* yn = (DataT*)workspace;
 
-    auto norm_op = [] __device__(DataT in) { return in; };
-
     if (x != y) {
       yn += m;
-      raft::linalg::rowNorm(xn, x, k, m, raft::linalg::L2Norm, isRowMajor, stream, norm_op);
-      raft::linalg::rowNorm(yn, y, k, n, raft::linalg::L2Norm, isRowMajor, stream, norm_op);
+      raft::linalg::rowNorm(
+        xn, x, k, m, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
+      raft::linalg::rowNorm(
+        yn, y, k, n, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
     } else {
-      raft::linalg::rowNorm(xn, x, k, n, raft::linalg::L2Norm, isRowMajor, stream, norm_op);
+      raft::linalg::rowNorm(
+        xn, x, k, n, raft::linalg::L2Norm, isRowMajor, stream, raft::identity_op{});
     }
     fusedL2ExpKnnRowMajor<<<grid, blk, sharedMemSize, stream>>>(x,
                                                                 y,
@@ -838,7 +837,7 @@ void fusedL2ExpKnnImpl(const DataT* x,
                                                                 ldb,
                                                                 ldd,
                                                                 core_lambda,
-                                                                fin_op,
+                                                                raft::identity_op{},
                                                                 sqrt,
                                                                 (uint32_t)numOfNN,
                                                                 mutexes,
diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh
index 14c4dd85f1..f08a97e0f7 100644
--- a/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_build.cuh
@@ -24,6 +24,7 @@
 #include <raft/core/logger.hpp>
 #include <raft/core/mdarray.hpp>
 #include <raft/core/nvtx.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/add.cuh>
 #include <raft/linalg/norm.cuh>
 #include <raft/linalg/unary_op.cuh>
@@ -245,7 +246,7 @@ inline auto extend(const handle_t& handle,
                             raft::linalg::L2Norm,
                             true,
                             stream,
-                            raft::SqrtOp<float>());
+                            raft::sqrt_op());
       RAFT_LOG_TRACE_VEC(ext_index.center_norms()->data_handle(), std::min<uint32_t>(dim, 20));
     }
   }
diff --git a/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh b/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh
index 94f4dc96c6..d2f7d681d7 100644
--- a/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ivf_flat_search.cuh
@@ -25,6 +25,7 @@
 #include <raft/core/handle.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/mdarray.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/norm.cuh>
@@ -1110,7 +1111,7 @@ void search_impl(const handle_t& handle,
                           raft::linalg::L2Norm,
                           true,
                           stream,
-                          raft::SqrtOp<float>());
+                          raft::sqrt_op());
     utils::outer_add(query_norm_dev.data(),
                      (IdxT)n_queries,
                      index.center_norms()->data_handle(),
diff --git a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh
index 9262ef6baf..2a2fc1f10b 100644
--- a/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ivf_pq_build.cuh
@@ -25,6 +25,7 @@
 #include <raft/core/handle.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/nvtx.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/add.cuh>
 #include <raft/linalg/detail/qr.cuh>
@@ -259,14 +260,7 @@ void select_residuals(const handle_t& handle,
     n_rows, (IdxT)dim, dataset, row_ids, (IdxT)dim, tmp.data(), (IdxT)dim, stream);
 
   raft::matrix::linewiseOp(
-    tmp.data(),
-    tmp.data(),
-    IdxT(dim),
-    n_rows,
-    true,
-    [] __device__(float a, float b) { return a - b; },
-    stream,
-    center);
+    tmp.data(), tmp.data(), IdxT(dim), n_rows, true, raft::sub_op{}, stream, center);
 
   float alpha = 1.0;
   float beta  = 0.0;
@@ -1186,7 +1180,7 @@ inline auto build_device(
                           raft::linalg::L2Norm,
                           true,
                           stream,
-                          raft::SqrtOp<float>());
+                          raft::sqrt_op());
     RAFT_CUDA_TRY(cudaMemcpy2DAsync(index.centers().data_handle() + index.dim(),
                                     sizeof(float) * index.dim_ext(),
                                     center_norms.data(),
diff --git a/cpp/include/raft/spatial/knn/detail/ivf_pq_search.cuh b/cpp/include/raft/spatial/knn/detail/ivf_pq_search.cuh
index cd8197bcf0..1df5671be2 100644
--- a/cpp/include/raft/spatial/knn/detail/ivf_pq_search.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ivf_pq_search.cuh
@@ -27,6 +27,7 @@
 #include <raft/core/handle.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/core/nvtx.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/gemm.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -418,14 +419,12 @@ void postprocess_distances(float* out,        // [n_queries, topk]
   switch (metric) {
     case distance::DistanceType::L2Unexpanded:
     case distance::DistanceType::L2Expanded: {
-      linalg::unaryOp(
-        out,
-        in,
-        len,
-        [scaling_factor] __device__(ScoreT x) -> float {
-          return scaling_factor * scaling_factor * float(x);
-        },
-        stream);
+      linalg::unaryOp(out,
+                      in,
+                      len,
+                      raft::compose_op(raft::mul_const_op<float>{scaling_factor * scaling_factor},
+                                       raft::cast_op<float>{}),
+                      stream);
     } break;
     case distance::DistanceType::L2SqrtUnexpanded:
     case distance::DistanceType::L2SqrtExpanded: {
@@ -433,18 +432,17 @@ void postprocess_distances(float* out,        // [n_queries, topk]
         out,
         in,
         len,
-        [scaling_factor] __device__(ScoreT x) -> float { return scaling_factor * sqrtf(float(x)); },
+        raft::compose_op{
+          raft::mul_const_op<float>{scaling_factor}, raft::sqrt_op{}, raft::cast_op<float>{}},
         stream);
     } break;
     case distance::DistanceType::InnerProduct: {
-      linalg::unaryOp(
-        out,
-        in,
-        len,
-        [scaling_factor] __device__(ScoreT x) -> float {
-          return -scaling_factor * scaling_factor * float(x);
-        },
-        stream);
+      linalg::unaryOp(out,
+                      in,
+                      len,
+                      raft::compose_op(raft::mul_const_op<float>{-scaling_factor * scaling_factor},
+                                       raft::cast_op<float>{}),
+                      stream);
     } break;
     default: RAFT_FAIL("Unexpected metric.");
   }
diff --git a/cpp/include/raft/spatial/knn/detail/processing.cuh b/cpp/include/raft/spatial/knn/detail/processing.cuh
index a80c1c1935..b4b1cb2c14 100644
--- a/cpp/include/raft/spatial/knn/detail/processing.cuh
+++ b/cpp/include/raft/spatial/knn/detail/processing.cuh
@@ -17,6 +17,7 @@
 
 #include "processing.hpp"
 
+#include <raft/core/operators.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/matrix_vector_op.cuh>
 #include <raft/linalg/norm.cuh>
@@ -59,32 +60,16 @@ class CosineMetricProcessor : public MetricProcessor<math_t> {
                           raft::linalg::NormType::L2Norm,
                           row_major_,
                           stream_,
-                          [] __device__(math_t in) { return sqrtf(in); });
+                          raft::sqrt_op{});
 
     raft::linalg::matrixVectorOp(
-      data,
-      data,
-      colsums_.data(),
-      n_cols_,
-      n_rows_,
-      row_major_,
-      false,
-      [] __device__(math_t mat_in, math_t vec_in) { return mat_in / vec_in; },
-      stream_);
+      data, data, colsums_.data(), n_cols_, n_rows_, row_major_, false, raft::div_op{}, stream_);
   }
 
   void revert(math_t* data)
   {
     raft::linalg::matrixVectorOp(
-      data,
-      data,
-      colsums_.data(),
-      n_cols_,
-      n_rows_,
-      row_major_,
-      false,
-      [] __device__(math_t mat_in, math_t vec_in) { return mat_in * vec_in; },
-      stream_);
+      data, data, colsums_.data(), n_cols_, n_rows_, row_major_, false, raft::mul_op{}, stream_);
   }
 
   void postprocess(math_t* data)
@@ -122,12 +107,11 @@ class CorrelationMetricProcessor : public CosineMetricProcessor<math_t> {
                          true,
                          cosine::stream_);
 
-    raft::linalg::unaryOp(
-      means_.data(),
-      means_.data(),
-      cosine::n_rows_,
-      [=] __device__(math_t in) { return in * normalizer_const; },
-      cosine::stream_);
+    raft::linalg::unaryOp(means_.data(),
+                          means_.data(),
+                          cosine::n_rows_,
+                          raft::mul_const_op<math_t>(normalizer_const),
+                          cosine::stream_);
 
     raft::stats::meanCenter(data,
                             data,
diff --git a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh
index 25a3721af1..5bd8c4a7ab 100644
--- a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh
+++ b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh
@@ -249,19 +249,18 @@ value_t silhouette_score(
 
   // calculating row-wise minimum in b
   // this prim only supports int indices for now
-  raft::linalg::
-    reduce<value_t, value_t, value_idx, raft::Nop<value_t>, raft::stats::detail::MinOp<value_t>>(
-      b_ptr,
-      b_ptr,
-      n_labels,
-      n_rows,
-      std::numeric_limits<value_t>::max(),
-      true,
-      true,
-      stream,
-      false,
-      raft::Nop<value_t>(),
-      raft::stats::detail::MinOp<value_t>());
+  raft::linalg::reduce<value_t, value_t, value_idx, raft::identity_op, raft::min_op>(
+    b_ptr,
+    b_ptr,
+    n_labels,
+    n_rows,
+    std::numeric_limits<value_t>::max(),
+    true,
+    true,
+    stream,
+    false,
+    raft::identity_op(),
+    raft::min_op());
 
   // calculating the silhouette score per sample
   raft::linalg::binaryOp<value_t, raft::stats::detail::SilOp<value_t>, value_t, value_idx>(
diff --git a/cpp/include/raft/stats/detail/mean_center.cuh b/cpp/include/raft/stats/detail/mean_center.cuh
index 61017511b1..6e1c07e1e3 100644
--- a/cpp/include/raft/stats/detail/mean_center.cuh
+++ b/cpp/include/raft/stats/detail/mean_center.cuh
@@ -49,15 +49,7 @@ void meanCenter(Type* out,
                 cudaStream_t stream)
 {
   raft::linalg::matrixVectorOp(
-    out,
-    data,
-    mu,
-    D,
-    N,
-    rowMajor,
-    bcastAlongRows,
-    [] __device__(Type a, Type b) { return a - b; },
-    stream);
+    out, data, mu, D, N, rowMajor, bcastAlongRows, raft::sub_op{}, stream);
 }
 
 /**
@@ -85,15 +77,7 @@ void meanAdd(Type* out,
              cudaStream_t stream)
 {
   raft::linalg::matrixVectorOp(
-    out,
-    data,
-    mu,
-    D,
-    N,
-    rowMajor,
-    bcastAlongRows,
-    [] __device__(Type a, Type b) { return a + b; },
-    stream);
+    out, data, mu, D, N, rowMajor, bcastAlongRows, raft::add_op{}, stream);
 }
 
 };  // end namespace detail
diff --git a/cpp/include/raft/stats/detail/silhouette_score.cuh b/cpp/include/raft/stats/detail/silhouette_score.cuh
index 076d9b13e5..3cf95c3941 100644
--- a/cpp/include/raft/stats/detail/silhouette_score.cuh
+++ b/cpp/include/raft/stats/detail/silhouette_score.cuh
@@ -21,6 +21,7 @@
 #include <iostream>
 #include <math.h>
 #include <numeric>
+#include <raft/core/operators.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/add.cuh>
@@ -172,20 +173,6 @@ struct SilOp {
   }
 };
 
-/**
- * @brief structure that defines the reduction Lambda to find minimum between elements
- */
-template <typename DataT>
-struct MinOp {
-  HDI DataT operator()(DataT a, DataT b)
-  {
-    if (a > b)
-      return b;
-    else
-      return a;
-  }
-};
-
 /**
  * @brief main function that returns the average silhouette score for a given set of data and its
  * clusterings
@@ -278,19 +265,19 @@ DataT silhouette_score(
   RAFT_CUDA_TRY(cudaMemsetAsync(
     averageDistanceBetweenSampleAndCluster.data(), 0, nRows * nLabels * sizeof(DataT), stream));
 
-  raft::linalg::matrixVectorOp<DataT, DivOp<DataT>>(averageDistanceBetweenSampleAndCluster.data(),
-                                                    sampleToClusterSumOfDistances.data(),
-                                                    binCountArray.data(),
-                                                    binCountArray.data(),
-                                                    nLabels,
-                                                    nRows,
-                                                    true,
-                                                    true,
-                                                    DivOp<DataT>(),
-                                                    stream);
+  raft::linalg::matrixVectorOp(averageDistanceBetweenSampleAndCluster.data(),
+                               sampleToClusterSumOfDistances.data(),
+                               binCountArray.data(),
+                               binCountArray.data(),
+                               nLabels,
+                               nRows,
+                               true,
+                               true,
+                               DivOp<DataT>(),
+                               stream);
 
   // calculating row-wise minimum
-  raft::linalg::reduce<DataT, DataT, int, raft::Nop<DataT>, MinOp<DataT>>(
+  raft::linalg::reduce<DataT, DataT, int, raft::identity_op, raft::min_op>(
     d_bArray.data(),
     averageDistanceBetweenSampleAndCluster.data(),
     nLabels,
@@ -300,8 +287,8 @@ DataT silhouette_score(
     true,
     stream,
     false,
-    raft::Nop<DataT>(),
-    MinOp<DataT>());
+    raft::identity_op{},
+    raft::min_op{});
 
   // calculating the silhouette score per sample using the d_aArray and d_bArray
   raft::linalg::binaryOp<DataT, SilOp<DataT>>(
@@ -311,12 +298,12 @@ DataT silhouette_score(
   rmm::device_scalar<DataT> d_avgSilhouetteScore(stream);
   RAFT_CUDA_TRY(cudaMemsetAsync(d_avgSilhouetteScore.data(), 0, sizeof(DataT), stream));
 
-  raft::linalg::mapThenSumReduce<double, raft::Nop<DataT>>(d_avgSilhouetteScore.data(),
-                                                           nRows,
-                                                           raft::Nop<DataT>(),
-                                                           stream,
-                                                           perSampleSilScore,
-                                                           perSampleSilScore);
+  raft::linalg::mapThenSumReduce<double, raft::identity_op>(d_avgSilhouetteScore.data(),
+                                                            nRows,
+                                                            raft::identity_op(),
+                                                            stream,
+                                                            perSampleSilScore,
+                                                            perSampleSilScore);
 
   DataT avgSilhouetteScore = d_avgSilhouetteScore.value(stream);
 
diff --git a/cpp/include/raft/stats/detail/weighted_mean.cuh b/cpp/include/raft/stats/detail/weighted_mean.cuh
index 43dbe4e7f1..ada0995f7d 100644
--- a/cpp/include/raft/stats/detail/weighted_mean.cuh
+++ b/cpp/include/raft/stats/detail/weighted_mean.cuh
@@ -18,6 +18,7 @@
 
 #include <raft/linalg/reduce.cuh>
 #include <raft/stats/sum.cuh>
+#include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
 namespace raft {
@@ -66,8 +67,8 @@ void weightedMean(Type* mu,
     stream,
     false,
     [weights] __device__(Type v, IdxType i) { return v * weights[i]; },
-    [] __device__(Type a, Type b) { return a + b; },
-    [WS] __device__(Type v) { return v / WS; });
+    raft::add_op{},
+    raft::div_const_op<Type>(WS));
 }
 };  // end namespace detail
 };  // end namespace stats
diff --git a/cpp/include/raft/util/cuda_utils.cuh b/cpp/include/raft/util/cuda_utils.cuh
index 5818fc21f3..61dd6e0ad8 100644
--- a/cpp/include/raft/util/cuda_utils.cuh
+++ b/cpp/include/raft/util/cuda_utils.cuh
@@ -21,6 +21,7 @@
 #include <type_traits>
 
 #include <raft/core/cudart_utils.hpp>
+#include <raft/core/operators.hpp>
 
 #ifndef ENABLE_MEMCPY_ASYNC
 // enable memcpy_async interface by default for newer GPUs
@@ -508,43 +509,69 @@ HDI double myATanh(double x)
 /** @} */
 
 /**
- * @defgroup LambdaOps Lambda operations in reduction kernels
+ * @defgroup LambdaOps Legacy lambda operations, to be deprecated
  * @{
  */
-// IdxType mostly to be used for MainLambda in *Reduction kernels
 template <typename Type, typename IdxType = int>
 struct Nop {
-  HDI Type operator()(Type in, IdxType i = 0) { return in; }
+  [[deprecated("Nop is deprecated. Use identity_op instead.")]] HDI Type
+  operator()(Type in, IdxType i = 0) const
+  {
+    return in;
+  }
 };
 
 template <typename Type, typename IdxType = int>
 struct SqrtOp {
-  HDI Type operator()(Type in, IdxType i = 0) { return mySqrt(in); }
+  [[deprecated("SqrtOp is deprecated. Use sqrt_op instead.")]] HDI Type
+  operator()(Type in, IdxType i = 0) const
+  {
+    return mySqrt(in);
+  }
 };
 
 template <typename Type, typename IdxType = int>
 struct L0Op {
-  HDI Type operator()(Type in, IdxType i = 0) { return in != Type(0) ? Type(1) : Type(0); }
+  [[deprecated("L0Op is deprecated. Use nz_op instead.")]] HDI Type operator()(Type in,
+                                                                               IdxType i = 0) const
+  {
+    return in != Type(0) ? Type(1) : Type(0);
+  }
 };
 
 template <typename Type, typename IdxType = int>
 struct L1Op {
-  HDI Type operator()(Type in, IdxType i = 0) { return myAbs(in); }
+  [[deprecated("L1Op is deprecated. Use abs_op instead.")]] HDI Type operator()(Type in,
+                                                                                IdxType i = 0) const
+  {
+    return myAbs(in);
+  }
 };
 
 template <typename Type, typename IdxType = int>
 struct L2Op {
-  HDI Type operator()(Type in, IdxType i = 0) { return in * in; }
+  [[deprecated("L2Op is deprecated. Use sq_op instead.")]] HDI Type operator()(Type in,
+                                                                               IdxType i = 0) const
+  {
+    return in * in;
+  }
 };
 
-template <typename Type>
+template <typename InT, typename OutT = InT>
 struct Sum {
-  HDI Type operator()(Type a, Type b) { return a + b; }
+  [[deprecated("Sum is deprecated. Use add_op instead.")]] HDI OutT operator()(InT a, InT b) const
+  {
+    return a + b;
+  }
 };
 
 template <typename Type>
 struct Max {
-  HDI Type operator()(Type a, Type b) { return myMax(a, b); }
+  [[deprecated("Max is deprecated. Use max_op instead.")]] HDI Type operator()(Type a, Type b) const
+  {
+    if (b > a) { return b; }
+    return a;
+  }
 };
 /** @} */
 
@@ -939,7 +966,7 @@ DI T warpReduce(T val, ReduceLambda reduce_op)
 template <typename T>
 DI T warpReduce(T val)
 {
-  return warpReduce(val, raft::Sum<T>{});
+  return warpReduce(val, raft::add_op{});
 }
 
 /**
diff --git a/cpp/include/raft/util/scatter.cuh b/cpp/include/raft/util/scatter.cuh
index c20afa5454..e69be36ad9 100644
--- a/cpp/include/raft/util/scatter.cuh
+++ b/cpp/include/raft/util/scatter.cuh
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <raft/core/operators.hpp>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/detail/scatter.cuh>
 
@@ -37,13 +38,13 @@ namespace raft {
  * will be applied to every element before scattering it to the right location.
  * The second param in this method will be the destination index.
  */
-template <typename DataT, typename IdxT, typename Lambda = raft::Nop<DataT, IdxT>, int TPB = 256>
+template <typename DataT, typename IdxT, typename Lambda = raft::identity_op, int TPB = 256>
 void scatter(DataT* out,
              const DataT* in,
              const IdxT* idx,
              IdxT len,
              cudaStream_t stream,
-             Lambda op = raft::Nop<DataT, IdxT>())
+             Lambda op = raft::identity_op())
 {
   if (len <= 0) return;
   constexpr size_t DataSize   = sizeof(DataT);
diff --git a/cpp/src/distance/cluster/cluster_cost.cuh b/cpp/src/distance/cluster/cluster_cost.cuh
index ac8247af95..4f208ab1cd 100644
--- a/cpp/src/distance/cluster/cluster_cost.cuh
+++ b/cpp/src/distance/cluster/cluster_cost.cuh
@@ -15,9 +15,11 @@
  */
 
 #include <raft/cluster/kmeans.cuh>
+#include <raft/core/operators.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/distance/fused_l2_nn.cuh>
 #include <raft/handle.hpp>
+#include <raft/util/cuda_utils.cuh>
 
 namespace raft::runtime::cluster::kmeans {
 template <typename ElementType, typename IndexType>
@@ -59,20 +61,18 @@ void cluster_cost(const raft::handle_t& handle,
                                      handle.get_stream());
 
   auto distances = raft::make_device_vector<ElementType, IndexType>(handle, n_samples);
-  thrust::transform(
-    handle.get_thrust_policy(),
-    min_cluster_distance.data_handle(),
-    min_cluster_distance.data_handle() + n_samples,
-    distances.data_handle(),
-    [] __device__(const raft::KeyValuePair<IndexType, ElementType>& a) { return a.value; });
+  thrust::transform(handle.get_thrust_policy(),
+                    min_cluster_distance.data_handle(),
+                    min_cluster_distance.data_handle() + n_samples,
+                    distances.data_handle(),
+                    raft::value_op{});
 
   rmm::device_scalar<ElementType> device_cost(0, handle.get_stream());
-  raft::cluster::kmeans::cluster_cost(
-    handle,
-    distances.view(),
-    workspace,
-    make_device_scalar_view<ElementType>(device_cost.data()),
-    [] __device__(const ElementType& a, const ElementType& b) { return a + b; });
+  raft::cluster::kmeans::cluster_cost(handle,
+                                      distances.view(),
+                                      workspace,
+                                      make_device_scalar_view<ElementType>(device_cost.data()),
+                                      raft::add_op{});
 
   raft::update_host(cost, device_cost.data(), 1, handle.get_stream());
 }
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index a75eb3bff6..5be8401a6f 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -86,6 +86,8 @@ if(BUILD_TESTS)
     CORE_TEST
     PATH
     test/common/logger.cpp
+    test/core/operators_device.cu
+    test/core/operators_host.cpp
     test/handle.cpp
     test/interruptible.cu
     test/nvtx.cpp
diff --git a/cpp/test/cluster/kmeans.cu b/cpp/test/cluster/kmeans.cu
index 698d23ac27..9644541a0c 100644
--- a/cpp/test/cluster/kmeans.cu
+++ b/cpp/test/cluster/kmeans.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <optional>
 #include <vector>
@@ -22,6 +22,7 @@
 #include <raft/cluster/kmeans.cuh>
 #include <raft/core/cudart_utils.hpp>
 #include <raft/core/handle.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/random/make_blobs.cuh>
 #include <raft/stats/adjusted_rand_index.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -50,11 +51,7 @@ void run_cluster_cost(const raft::handle_t& handle,
                       raft::device_scalar_view<DataT> clusterCost)
 {
   raft::cluster::kmeans::cluster_cost(
-    handle,
-    minClusterDistance,
-    workspace,
-    clusterCost,
-    [] __device__(const DataT& a, const DataT& b) { return a + b; });
+    handle, minClusterDistance, workspace, clusterCost, raft::add_op{});
 }
 
 template <typename T>
diff --git a/cpp/test/cluster/linkage.cu b/cpp/test/cluster/linkage.cu
index 5533f552bd..53aa5c55e3 100644
--- a/cpp/test/cluster/linkage.cu
+++ b/cpp/test/cluster/linkage.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/transpose.cuh>
diff --git a/cpp/test/core/operators_device.cu b/cpp/test/core/operators_device.cu
new file mode 100644
index 0000000000..1697a09fcf
--- /dev/null
+++ b/cpp/test/core/operators_device.cu
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <cmath>
+
+#include <gtest/gtest.h>
+
+#include "../test_utils.cuh"
+#include <raft/core/kvp.hpp>
+#include <raft/core/operators.hpp>
+#include <rmm/cuda_stream.hpp>
+#include <rmm/device_scalar.hpp>
+
+template <typename OutT, typename OpT, typename... Args>
+__global__ void eval_op_on_device_kernel(OutT* out, OpT op, Args... args)
+{
+  out[0] = op(std::forward<Args>(args)...);
+}
+
+template <typename OpT, typename... Args>
+auto eval_op_on_device(OpT op, Args&&... args)
+{
+  typedef decltype(op(args...)) OutT;
+  auto stream = rmm::cuda_stream_default;
+  rmm::device_scalar<OutT> result(stream);
+  eval_op_on_device_kernel<<<1, 1, 0, stream>>>(result.data(), op, std::forward<Args>(args)...);
+  return result.value(stream);
+}
+
+TEST(OperatorsDevice, IdentityOp)
+{
+  raft::identity_op op;
+  ASSERT_TRUE(raft::match(12.34f, eval_op_on_device(op, 12.34f, 0), raft::Compare<float>()));
+}
+
+TEST(OperatorsDevice, CastOp)
+{
+  raft::cast_op<float> op;
+  ASSERT_TRUE(
+    raft::match(1234.0f, eval_op_on_device(op, 1234, 0), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsDevice, KeyOp)
+{
+  raft::key_op op;
+  raft::KeyValuePair<int, float> kvp(12, 3.4f);
+  ASSERT_TRUE(raft::match(12, eval_op_on_device(op, kvp, 0), raft::Compare<int>()));
+}
+
+TEST(OperatorsDevice, ValueOp)
+{
+  raft::value_op op;
+  raft::KeyValuePair<int, float> kvp(12, 3.4f);
+  ASSERT_TRUE(
+    raft::match(3.4f, eval_op_on_device(op, kvp, 0), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsDevice, SqrtOpF)
+{
+  raft::sqrt_op op;
+  ASSERT_TRUE(raft::match(
+    std::sqrt(12.34f), eval_op_on_device(op, 12.34f, 0), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(
+    std::sqrt(12.34), eval_op_on_device(op, 12.34, 0), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(OperatorsDevice, NZOp)
+{
+  raft::nz_op op;
+  ASSERT_TRUE(
+    raft::match(0.0f, eval_op_on_device(op, 0.0f, 0), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(
+    raft::match(1.0f, eval_op_on_device(op, 12.34f, 0), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsDevice, AbsOp)
+{
+  raft::abs_op op;
+  ASSERT_TRUE(
+    raft::match(12.34f, eval_op_on_device(op, -12.34f, 0), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(
+    raft::match(12.34, eval_op_on_device(op, -12.34, 0), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(1234, eval_op_on_device(op, -1234, 0), raft::Compare<int>()));
+}
+
+TEST(OperatorsDevice, SqOp)
+{
+  raft::sq_op op;
+  ASSERT_TRUE(
+    raft::match(152.2756f, eval_op_on_device(op, 12.34f, 0), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(raft::match(289, eval_op_on_device(op, -17, 0), raft::Compare<int>()));
+}
+
+TEST(OperatorsDevice, AddOp)
+{
+  raft::add_op op;
+  ASSERT_TRUE(
+    raft::match(12.34f, eval_op_on_device(op, 12.0f, 0.34f), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(raft::match(1234, eval_op_on_device(op, 1200, 34), raft::Compare<int>()));
+}
+
+TEST(OperatorsDevice, SubOp)
+{
+  raft::sub_op op;
+  ASSERT_TRUE(
+    raft::match(12.34f, eval_op_on_device(op, 13.0f, 0.66f), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(raft::match(1234, eval_op_on_device(op, 1300, 66), raft::Compare<int>()));
+}
+
+TEST(OperatorsDevice, MulOp)
+{
+  raft::mul_op op;
+  ASSERT_TRUE(
+    raft::match(12.34f, eval_op_on_device(op, 2.0f, 6.17f), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsDevice, DivOp)
+{
+  raft::div_op op;
+  ASSERT_TRUE(
+    raft::match(12.34f, eval_op_on_device(op, 37.02f, 3.0f), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsDevice, DivCheckZeroOp)
+{
+  raft::div_checkzero_op op;
+  ASSERT_TRUE(
+    raft::match(12.34f, eval_op_on_device(op, 37.02f, 3.0f), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(
+    raft::match(0.0f, eval_op_on_device(op, 37.02f, 0.0f), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsDevice, PowOp)
+{
+  raft::pow_op op;
+  ASSERT_TRUE(
+    raft::match(1000.0f, eval_op_on_device(op, 10.0f, 3.0f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(1000.0, eval_op_on_device(op, 10.0, 3.0), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(OperatorsDevice, MinOp)
+{
+  raft::min_op op;
+  ASSERT_TRUE(
+    raft::match(3.0f, eval_op_on_device(op, 3.0f, 5.0f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(3.0, eval_op_on_device(op, 5.0, 3.0), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(3, eval_op_on_device(op, 3, 5), raft::Compare<int>()));
+}
+
+TEST(OperatorsDevice, MaxOp)
+{
+  raft::max_op op;
+  ASSERT_TRUE(
+    raft::match(5.0f, eval_op_on_device(op, 3.0f, 5.0f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(5.0, eval_op_on_device(op, 5.0, 3.0), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(5, eval_op_on_device(op, 3, 5), raft::Compare<int>()));
+}
+
+TEST(OperatorsDevice, SqDiffOp)
+{
+  raft::sqdiff_op op;
+  ASSERT_TRUE(
+    raft::match(4.0f, eval_op_on_device(op, 3.0f, 5.0f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(
+    raft::match(4.0, eval_op_on_device(op, 5.0, 3.0), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(OperatorsDevice, ArgminOp)
+{
+  raft::argmin_op op;
+  raft::KeyValuePair<int, float> kvp_a(0, 1.2f);
+  raft::KeyValuePair<int, float> kvp_b(0, 3.4f);
+  raft::KeyValuePair<int, float> kvp_c(1, 1.2f);
+  ASSERT_TRUE(raft::match(
+    kvp_a, eval_op_on_device(op, kvp_a, kvp_b), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(raft::match(
+    kvp_a, eval_op_on_device(op, kvp_b, kvp_a), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(raft::match(
+    kvp_a, eval_op_on_device(op, kvp_a, kvp_c), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(raft::match(
+    kvp_a, eval_op_on_device(op, kvp_c, kvp_a), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(raft::match(
+    kvp_c, eval_op_on_device(op, kvp_b, kvp_c), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(raft::match(
+    kvp_c, eval_op_on_device(op, kvp_c, kvp_b), raft::Compare<raft::KeyValuePair<int, float>>()));
+}
+
+TEST(OperatorsDevice, ArgmaxOp)
+{
+  raft::argmax_op op;
+  raft::KeyValuePair<int, float> kvp_a(0, 1.2f);
+  raft::KeyValuePair<int, float> kvp_b(0, 3.4f);
+  raft::KeyValuePair<int, float> kvp_c(1, 1.2f);
+  ASSERT_TRUE(raft::match(
+    kvp_b, eval_op_on_device(op, kvp_a, kvp_b), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(raft::match(
+    kvp_b, eval_op_on_device(op, kvp_b, kvp_a), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(raft::match(
+    kvp_a, eval_op_on_device(op, kvp_a, kvp_c), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(raft::match(
+    kvp_a, eval_op_on_device(op, kvp_c, kvp_a), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(raft::match(
+    kvp_b, eval_op_on_device(op, kvp_b, kvp_c), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(raft::match(
+    kvp_b, eval_op_on_device(op, kvp_c, kvp_b), raft::Compare<raft::KeyValuePair<int, float>>()));
+}
+
+TEST(OperatorsDevice, ConstOp)
+{
+  raft::const_op op(12.34f);
+  ASSERT_TRUE(raft::match(12.34f, eval_op_on_device(op), raft::Compare<float>()));
+  ASSERT_TRUE(raft::match(12.34f, eval_op_on_device(op, 42), raft::Compare<float>()));
+  ASSERT_TRUE(raft::match(12.34f, eval_op_on_device(op, 13, 37.0f), raft::Compare<float>()));
+}
+
+template <typename T>
+struct trinary_add {
+  const T c;
+  constexpr explicit trinary_add(const T& c_) : c{c_} {}
+  constexpr RAFT_INLINE_FUNCTION auto operator()(T a, T b) const { return a + b + c; }
+};
+
+TEST(OperatorsDevice, PlugConstOp)
+{
+  // First, wrap around a default-constructible op
+  {
+    raft::plug_const_op<float, raft::add_op> op(0.34f);
+    ASSERT_TRUE(
+      raft::match(12.34f, eval_op_on_device(op, 12.0f), raft::CompareApprox<float>(0.0001f)));
+  }
+
+  // Second, wrap around a non-default-constructible op
+  {
+    auto op = raft::plug_const_op(10.0f, trinary_add<float>(2.0f));
+    ASSERT_TRUE(
+      raft::match(12.34f, eval_op_on_device(op, 0.34f), raft::CompareApprox<float>(0.0001f)));
+  }
+}
+
+TEST(OperatorsDevice, AddConstOp)
+{
+  raft::add_const_op<float> op(0.34f);
+  ASSERT_TRUE(
+    raft::match(12.34f, eval_op_on_device(op, 12.0f), raft::CompareApprox<float>(0.0001f)));
+}
+
+TEST(OperatorsDevice, SubConstOp)
+{
+  raft::sub_const_op<float> op(0.66f);
+  ASSERT_TRUE(
+    raft::match(12.34f, eval_op_on_device(op, 13.0f), raft::CompareApprox<float>(0.0001f)));
+}
+
+TEST(OperatorsDevice, MulConstOp)
+{
+  raft::mul_const_op<float> op(2.0f);
+  ASSERT_TRUE(
+    raft::match(12.34f, eval_op_on_device(op, 6.17f), raft::CompareApprox<float>(0.0001f)));
+}
+
+TEST(OperatorsDevice, DivConstOp)
+{
+  raft::div_const_op<float> op(3.0f);
+  ASSERT_TRUE(
+    raft::match(12.34f, eval_op_on_device(op, 37.02f), raft::CompareApprox<float>(0.0001f)));
+}
+
+TEST(OperatorsDevice, DivCheckZeroConstOp)
+{
+  // Non-zero denominator
+  {
+    raft::div_checkzero_const_op<float> op(3.0f);
+    ASSERT_TRUE(
+      raft::match(12.34f, eval_op_on_device(op, 37.02f), raft::CompareApprox<float>(0.0001f)));
+  }
+  // Zero denominator
+  {
+    raft::div_checkzero_const_op<float> op(0.0f);
+    ASSERT_TRUE(
+      raft::match(0.0f, eval_op_on_device(op, 37.02f), raft::CompareApprox<float>(0.0001f)));
+  }
+}
+
+TEST(OperatorsDevice, PowConstOp)
+{
+  raft::pow_const_op<float> op(3.0f);
+  ASSERT_TRUE(
+    raft::match(1000.0f, eval_op_on_device(op, 10.0f), raft::CompareApprox<float>(0.0001f)));
+}
+
+TEST(OperatorsDevice, ComposeOp)
+{
+  // All ops are default-constructible
+  {
+    raft::compose_op<raft::sqrt_op, raft::abs_op, raft::cast_op<float>> op;
+    ASSERT_TRUE(raft::match(
+      std::sqrt(42.0f), eval_op_on_device(op, -42, 0), raft::CompareApprox<float>(0.0001f)));
+  }
+  // Some ops are not default-constructible
+  {
+    auto op = raft::compose_op(
+      raft::sqrt_op(), raft::abs_op(), raft::add_const_op<float>(8.0f), raft::cast_op<float>());
+    ASSERT_TRUE(raft::match(
+      std::sqrt(42.0f), eval_op_on_device(op, -50, 0), raft::CompareApprox<float>(0.0001f)));
+  }
+}
+
+TEST(OperatorsDevice, MapArgsOp)
+{
+  // All ops are default-constructible
+  {
+    raft::map_args_op<raft::add_op, raft::sq_op, raft::abs_op> op;
+    ASSERT_TRUE(
+      raft::match(42.0f, eval_op_on_device(op, 5.0f, -17.0f), raft::CompareApprox<float>(0.0001f)));
+  }
+  // Some ops are not default-constructible
+  {
+    auto op = raft::map_args_op(
+      raft::add_op(), raft::pow_const_op<float>(2.0f), raft::mul_const_op<float>(-1.0f));
+    ASSERT_TRUE(
+      raft::match(42.0f, eval_op_on_device(op, 5.0f, -17.0f), raft::CompareApprox<float>(0.0001f)));
+  }
+}
diff --git a/cpp/test/core/operators_host.cpp b/cpp/test/core/operators_host.cpp
new file mode 100644
index 0000000000..de66fda919
--- /dev/null
+++ b/cpp/test/core/operators_host.cpp
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <cmath>
+
+#include <gtest/gtest.h>
+
+#include "../test_utils.h"
+#include <raft/core/kvp.hpp>
+#include <raft/core/operators.hpp>
+
+TEST(OperatorsHost, IdentityOp)
+{
+  raft::identity_op op;
+  ASSERT_TRUE(raft::match(12.34f, op(12.34f, 0), raft::Compare<float>()));
+}
+
+TEST(OperatorsHost, CastOp)
+{
+  raft::cast_op<float> op;
+  ASSERT_TRUE(raft::match(1234.0f, op(1234, 0), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsHost, KeyOp)
+{
+  raft::key_op op;
+  raft::KeyValuePair<int, float> kvp(12, 3.4f);
+  ASSERT_TRUE(raft::match(12, op(kvp, 0), raft::Compare<int>()));
+}
+
+TEST(OperatorsHost, ValueOp)
+{
+  raft::value_op op;
+  raft::KeyValuePair<int, float> kvp(12, 3.4f);
+  ASSERT_TRUE(raft::match(3.4f, op(kvp, 0), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsHost, SqrtOpF)
+{
+  raft::sqrt_op op;
+  ASSERT_TRUE(raft::match(std::sqrt(12.34f), op(12.34f, 0), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(std::sqrt(12.34), op(12.34, 0), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(OperatorsHost, NZOp)
+{
+  raft::nz_op op;
+  ASSERT_TRUE(raft::match(0.0f, op(0.0f, 0), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(raft::match(1.0f, op(12.34f, 0), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsHost, AbsOp)
+{
+  raft::abs_op op;
+  ASSERT_TRUE(raft::match(12.34f, op(-12.34f, 0), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(raft::match(12.34, op(-12.34, 0), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(1234, op(-1234, 0), raft::Compare<int>()));
+}
+
+TEST(OperatorsHost, SqOp)
+{
+  raft::sq_op op;
+  ASSERT_TRUE(raft::match(152.2756f, op(12.34f, 0), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(raft::match(289, op(-17, 0), raft::Compare<int>()));
+}
+
+TEST(OperatorsHost, AddOp)
+{
+  raft::add_op op;
+  ASSERT_TRUE(raft::match(12.34f, op(12.0f, 0.34f), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(raft::match(1234, op(1200, 34), raft::Compare<int>()));
+}
+
+TEST(OperatorsHost, SubOp)
+{
+  raft::sub_op op;
+  ASSERT_TRUE(raft::match(12.34f, op(13.0f, 0.66f), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(raft::match(1234, op(1300, 66), raft::Compare<int>()));
+}
+
+TEST(OperatorsHost, MulOp)
+{
+  raft::mul_op op;
+  ASSERT_TRUE(raft::match(12.34f, op(2.0f, 6.17f), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsHost, DivOp)
+{
+  raft::div_op op;
+  ASSERT_TRUE(raft::match(12.34f, op(37.02f, 3.0f), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsHost, DivCheckZeroOp)
+{
+  raft::div_checkzero_op op;
+  ASSERT_TRUE(raft::match(12.34f, op(37.02f, 3.0f), raft::CompareApprox<float>(0.00001f)));
+  ASSERT_TRUE(raft::match(0.0f, op(37.02f, 0.0f), raft::CompareApprox<float>(0.00001f)));
+}
+
+TEST(OperatorsHost, PowOp)
+{
+  raft::pow_op op;
+  ASSERT_TRUE(raft::match(1000.0f, op(10.0f, 3.0f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(1000.0, op(10.0, 3.0), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(OperatorsHost, MinOp)
+{
+  raft::min_op op;
+  ASSERT_TRUE(raft::match(3.0f, op(3.0f, 5.0f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(3.0, op(5.0, 3.0), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(3, op(3, 5), raft::Compare<int>()));
+}
+
+TEST(OperatorsHost, MaxOp)
+{
+  raft::max_op op;
+  ASSERT_TRUE(raft::match(5.0f, op(3.0f, 5.0f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(5.0, op(5.0, 3.0), raft::CompareApprox<double>(0.000001)));
+  ASSERT_TRUE(raft::match(5, op(3, 5), raft::Compare<int>()));
+}
+
+TEST(OperatorsHost, SqDiffOp)
+{
+  raft::sqdiff_op op;
+  ASSERT_TRUE(raft::match(4.0f, op(3.0f, 5.0f), raft::CompareApprox<float>(0.0001f)));
+  ASSERT_TRUE(raft::match(4.0, op(5.0, 3.0), raft::CompareApprox<double>(0.000001)));
+}
+
+TEST(OperatorsHost, ArgminOp)
+{
+  raft::argmin_op op;
+  raft::KeyValuePair<int, float> kvp_a(0, 1.2f);
+  raft::KeyValuePair<int, float> kvp_b(0, 3.4f);
+  raft::KeyValuePair<int, float> kvp_c(1, 1.2f);
+  ASSERT_TRUE(
+    raft::match(kvp_a, op(kvp_a, kvp_b), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(
+    raft::match(kvp_a, op(kvp_b, kvp_a), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(
+    raft::match(kvp_a, op(kvp_a, kvp_c), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(
+    raft::match(kvp_a, op(kvp_c, kvp_a), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(
+    raft::match(kvp_c, op(kvp_b, kvp_c), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(
+    raft::match(kvp_c, op(kvp_c, kvp_b), raft::Compare<raft::KeyValuePair<int, float>>()));
+}
+
+TEST(OperatorsHost, ArgmaxOp)
+{
+  raft::argmax_op op;
+  raft::KeyValuePair<int, float> kvp_a(0, 1.2f);
+  raft::KeyValuePair<int, float> kvp_b(0, 3.4f);
+  raft::KeyValuePair<int, float> kvp_c(1, 1.2f);
+  ASSERT_TRUE(
+    raft::match(kvp_b, op(kvp_a, kvp_b), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(
+    raft::match(kvp_b, op(kvp_b, kvp_a), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(
+    raft::match(kvp_a, op(kvp_a, kvp_c), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(
+    raft::match(kvp_a, op(kvp_c, kvp_a), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(
+    raft::match(kvp_b, op(kvp_b, kvp_c), raft::Compare<raft::KeyValuePair<int, float>>()));
+  ASSERT_TRUE(
+    raft::match(kvp_b, op(kvp_c, kvp_b), raft::Compare<raft::KeyValuePair<int, float>>()));
+}
+
+TEST(OperatorsHost, ConstOp)
+{
+  raft::const_op op(12.34f);
+  ASSERT_TRUE(raft::match(12.34f, op(), raft::Compare<float>()));
+  ASSERT_TRUE(raft::match(12.34f, op(42), raft::Compare<float>()));
+  ASSERT_TRUE(raft::match(12.34f, op(13, 37.0f), raft::Compare<float>()));
+}
+
+template <typename T>
+struct trinary_add {
+  const T c;
+  constexpr explicit trinary_add(const T& c_) : c{c_} {}
+  constexpr RAFT_INLINE_FUNCTION auto operator()(T a, T b) const { return a + b + c; }
+};
+
+TEST(OperatorsHost, PlugConstOp)
+{
+  // First, wrap around a default-constructible op
+  {
+    raft::plug_const_op<float, raft::add_op> op(0.34f);
+    ASSERT_TRUE(raft::match(12.34f, op(12.0f), raft::CompareApprox<float>(0.0001f)));
+  }
+
+  // Second, wrap around a non-default-constructible op
+  {
+    auto op = raft::plug_const_op(10.0f, trinary_add<float>(2.0f));
+    ASSERT_TRUE(raft::match(12.34f, op(0.34f), raft::CompareApprox<float>(0.0001f)));
+  }
+}
+
+TEST(OperatorsHost, AddConstOp)
+{
+  raft::add_const_op<float> op(0.34f);
+  ASSERT_TRUE(raft::match(12.34f, op(12.0f), raft::CompareApprox<float>(0.0001f)));
+}
+
+TEST(OperatorsHost, SubConstOp)
+{
+  raft::sub_const_op<float> op(0.66f);
+  ASSERT_TRUE(raft::match(12.34f, op(13.0f), raft::CompareApprox<float>(0.0001f)));
+}
+
+TEST(OperatorsHost, MulConstOp)
+{
+  raft::mul_const_op<float> op(2.0f);
+  ASSERT_TRUE(raft::match(12.34f, op(6.17f), raft::CompareApprox<float>(0.0001f)));
+}
+
+TEST(OperatorsHost, DivConstOp)
+{
+  raft::div_const_op<float> op(3.0f);
+  ASSERT_TRUE(raft::match(12.34f, op(37.02f), raft::CompareApprox<float>(0.0001f)));
+}
+
+TEST(OperatorsHost, DivCheckZeroConstOp)
+{
+  // Non-zero denominator
+  {
+    raft::div_checkzero_const_op<float> op(3.0f);
+    ASSERT_TRUE(raft::match(12.34f, op(37.02f), raft::CompareApprox<float>(0.0001f)));
+  }
+  // Zero denominator
+  {
+    raft::div_checkzero_const_op<float> op(0.0f);
+    ASSERT_TRUE(raft::match(0.0f, op(37.02f), raft::CompareApprox<float>(0.0001f)));
+  }
+}
+
+TEST(OperatorsHost, PowConstOp)
+{
+  raft::pow_const_op<float> op(3.0f);
+  ASSERT_TRUE(raft::match(1000.0f, op(10.0f), raft::CompareApprox<float>(0.0001f)));
+}
+
+TEST(OperatorsHost, ComposeOp)
+{
+  // All ops are default-constructible
+  {
+    raft::compose_op<raft::sqrt_op, raft::abs_op, raft::cast_op<float>> op;
+    ASSERT_TRUE(raft::match(std::sqrt(42.0f), op(-42, 0), raft::CompareApprox<float>(0.0001f)));
+  }
+  // Some ops are not default-constructible
+  {
+    auto op = raft::compose_op(
+      raft::sqrt_op(), raft::abs_op(), raft::add_const_op<float>(8.0f), raft::cast_op<float>());
+    ASSERT_TRUE(raft::match(std::sqrt(42.0f), op(-50, 0), raft::CompareApprox<float>(0.0001f)));
+  }
+}
+
+TEST(OperatorsHost, MapArgsOp)
+{
+  // All ops are default-constructible
+  {
+    raft::map_args_op<raft::add_op, raft::sq_op, raft::abs_op> op;
+    ASSERT_TRUE(raft::match(42.0f, op(5.0f, -17.0f), raft::CompareApprox<float>(0.0001f)));
+  }
+  // Some ops are not default-constructible
+  {
+    auto op = raft::map_args_op(
+      raft::add_op(), raft::pow_const_op<float>(2.0f), raft::mul_const_op<float>(-1.0f));
+    ASSERT_TRUE(raft::match(42.0f, op(5.0f, -17.0f), raft::CompareApprox<float>(0.0001f)));
+  }
+}
diff --git a/cpp/test/distance/dist_adj.cu b/cpp/test/distance/dist_adj.cu
index f3f36b4576..4f6dfaac24 100644
--- a/cpp/test/distance/dist_adj.cu
+++ b/cpp/test/distance/dist_adj.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/distance/distance.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/distance/dist_canberra.cu b/cpp/test/distance/dist_canberra.cu
index 1f368fbee8..db5555d9c8 100644
--- a/cpp/test/distance/dist_canberra.cu
+++ b/cpp/test/distance/dist_canberra.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_chebyshev.cu b/cpp/test/distance/dist_chebyshev.cu
index 8f506601ca..abad828de7 100644
--- a/cpp/test/distance/dist_chebyshev.cu
+++ b/cpp/test/distance/dist_chebyshev.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_correlation.cu b/cpp/test/distance/dist_correlation.cu
index 77d770b4d1..0e3f0ee0b5 100644
--- a/cpp/test/distance/dist_correlation.cu
+++ b/cpp/test/distance/dist_correlation.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_cos.cu b/cpp/test/distance/dist_cos.cu
index 900a71e514..9faf7651f7 100644
--- a/cpp/test/distance/dist_cos.cu
+++ b/cpp/test/distance/dist_cos.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_euc_exp.cu b/cpp/test/distance/dist_euc_exp.cu
index 5371b8a3e2..567e279691 100644
--- a/cpp/test/distance/dist_euc_exp.cu
+++ b/cpp/test/distance/dist_euc_exp.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_euc_unexp.cu b/cpp/test/distance/dist_euc_unexp.cu
index 81e6be7116..311ad190e2 100644
--- a/cpp/test/distance/dist_euc_unexp.cu
+++ b/cpp/test/distance/dist_euc_unexp.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_eucsqrt_exp.cu b/cpp/test/distance/dist_eucsqrt_exp.cu
index c4f2dc80c2..d717158649 100644
--- a/cpp/test/distance/dist_eucsqrt_exp.cu
+++ b/cpp/test/distance/dist_eucsqrt_exp.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_hamming.cu b/cpp/test/distance/dist_hamming.cu
index 616ce8f729..1eef9fba4e 100644
--- a/cpp/test/distance/dist_hamming.cu
+++ b/cpp/test/distance/dist_hamming.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_hellinger.cu b/cpp/test/distance/dist_hellinger.cu
index d6f994aaf6..85a157aa31 100644
--- a/cpp/test/distance/dist_hellinger.cu
+++ b/cpp/test/distance/dist_hellinger.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_jensen_shannon.cu b/cpp/test/distance/dist_jensen_shannon.cu
index 43e4f3aa0f..a1e2f9f38c 100644
--- a/cpp/test/distance/dist_jensen_shannon.cu
+++ b/cpp/test/distance/dist_jensen_shannon.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_kl_divergence.cu b/cpp/test/distance/dist_kl_divergence.cu
index 6a5fe8d7ac..94330d9450 100644
--- a/cpp/test/distance/dist_kl_divergence.cu
+++ b/cpp/test/distance/dist_kl_divergence.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_l1.cu b/cpp/test/distance/dist_l1.cu
index 322fb52d5c..dc6bcf72b7 100644
--- a/cpp/test/distance/dist_l1.cu
+++ b/cpp/test/distance/dist_l1.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_minkowski.cu b/cpp/test/distance/dist_minkowski.cu
index 3e0a2ead92..af2661da3a 100644
--- a/cpp/test/distance/dist_minkowski.cu
+++ b/cpp/test/distance/dist_minkowski.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/dist_russell_rao.cu b/cpp/test/distance/dist_russell_rao.cu
index e92a01c70a..3c5124c31f 100644
--- a/cpp/test/distance/dist_russell_rao.cu
+++ b/cpp/test/distance/dist_russell_rao.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "distance_base.cuh"
 
 namespace raft {
diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh
index 19d449c18b..067b1b2c0e 100644
--- a/cpp/test/distance/distance_base.cuh
+++ b/cpp/test/distance/distance_base.cuh
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/common/nvtx.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/distance/distance.cuh>
 #include <raft/mdarray.hpp>
 #include <raft/util/cuda_utils.cuh>
@@ -166,7 +167,7 @@ __global__ void naiveLpUnexpDistanceKernel(DataType* dist,
     int yidx  = isRowMajor ? i + nidx * k : i * n + nidx;
     auto a    = x[xidx];
     auto b    = y[yidx];
-    auto diff = raft::L1Op<DataType>()(a - b);
+    auto diff = raft::myAbs(a - b);
     acc += raft::myPow(diff, p);
   }
   auto one_over_p = 1 / p;
diff --git a/cpp/test/distance/fused_l2_nn.cu b/cpp/test/distance/fused_l2_nn.cu
index 800f45c7fc..252f56607f 100644
--- a/cpp/test/distance/fused_l2_nn.cu
+++ b/cpp/test/distance/fused_l2_nn.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/kvp.hpp>
 #include <raft/distance/detail/fused_l2_nn.cuh>
@@ -208,8 +208,8 @@ struct CompareApproxAbsKVP {
   CompareApproxAbsKVP(T eps_) : eps(eps_) {}
   bool operator()(const KVP& a, const KVP& b) const
   {
-    T diff  = raft::abs(raft::abs(a.value) - raft::abs(b.value));
-    T m     = std::max(raft::abs(a.value), raft::abs(b.value));
+    T diff  = std::abs(std::abs(a.value) - std::abs(b.value));
+    T m     = std::max(std::abs(a.value), std::abs(b.value));
     T ratio = m >= eps ? diff / m : diff;
     return (ratio <= eps);
   }
diff --git a/cpp/test/distance/gram.cu b/cpp/test/distance/gram.cu
index 168e3d93f8..4366e023a0 100644
--- a/cpp/test/distance/gram.cu
+++ b/cpp/test/distance/gram.cu
@@ -18,7 +18,7 @@
 #include <raft/distance/specializations.cuh>
 #endif
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <iostream>
 #include <memory>
diff --git a/cpp/test/label/label.cu b/cpp/test/label/label.cu
index 02b3191c4d..bda87d423c 100644
--- a/cpp/test/label/label.cu
+++ b/cpp/test/label/label.cu
@@ -18,7 +18,7 @@
 
 #include <raft/label/classlabels.cuh>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
diff --git a/cpp/test/label/merge_labels.cu b/cpp/test/label/merge_labels.cu
index 184ab4922f..c3d2f82a84 100644
--- a/cpp/test/label/merge_labels.cu
+++ b/cpp/test/label/merge_labels.cu
@@ -17,7 +17,7 @@
 #include <gtest/gtest.h>
 #include <raft/label/merge_labels.cuh>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <raft/core/handle.hpp>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_scalar.hpp>
diff --git a/cpp/test/linalg/add.cu b/cpp/test/linalg/add.cu
index c73791086b..0e5fc40232 100644
--- a/cpp/test/linalg/add.cu
+++ b/cpp/test/linalg/add.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "add.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/add.cuh>
diff --git a/cpp/test/linalg/axpy.cu b/cpp/test/linalg/axpy.cu
index f6cabae012..2eb11f314d 100644
--- a/cpp/test/linalg/axpy.cu
+++ b/cpp/test/linalg/axpy.cu
@@ -15,7 +15,7 @@
  */
 #include <raft/linalg/axpy.cuh>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
diff --git a/cpp/test/linalg/binary_op.cu b/cpp/test/linalg/binary_op.cu
index b92fa09427..ac143842cb 100644
--- a/cpp/test/linalg/binary_op.cu
+++ b/cpp/test/linalg/binary_op.cu
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "binary_op.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/binary_op.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -36,8 +37,7 @@ void binaryOpLaunch(
   auto in1_view = raft::make_device_vector_view(in1, len);
   auto in2_view = raft::make_device_vector_view(in2, len);
 
-  binary_op(
-    handle, in1_view, in2_view, out_view, [] __device__(InType a, InType b) { return a + b; });
+  binary_op(handle, in1_view, in2_view, out_view, raft::add_op{});
 }
 
 template <typename InType, typename IdxType, typename OutType = InType>
@@ -139,12 +139,7 @@ class BinaryOpAlignment : public ::testing::Test {
     RAFT_CUDA_TRY(cudaMemsetAsync(x.data(), 0, n * sizeof(math_t), stream));
     RAFT_CUDA_TRY(cudaMemsetAsync(y.data(), 0, n * sizeof(math_t), stream));
     raft::linalg::binaryOp(
-      z.data() + 9,
-      x.data() + 137,
-      y.data() + 19,
-      256,
-      [] __device__(math_t x, math_t y) { return x + y; },
-      handle.get_stream());
+      z.data() + 9, x.data() + 137, y.data() + 19, 256, raft::add_op{}, handle.get_stream());
   }
 
   raft::handle_t handle;
diff --git a/cpp/test/linalg/binary_op.cuh b/cpp/test/linalg/binary_op.cuh
index 62820ddb97..8b0bc609d2 100644
--- a/cpp/test/linalg/binary_op.cuh
+++ b/cpp/test/linalg/binary_op.cuh
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <raft/linalg/binary_op.cuh>
 #include <raft/util/cuda_utils.cuh>
 
diff --git a/cpp/test/linalg/cholesky_r1.cu b/cpp/test/linalg/cholesky_r1.cu
index 134b7645ff..9d90b03a6e 100644
--- a/cpp/test/linalg/cholesky_r1.cu
+++ b/cpp/test/linalg/cholesky_r1.cu
@@ -22,7 +22,7 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <sstream>
 #include <vector>
 namespace raft {
diff --git a/cpp/test/linalg/coalesced_reduction.cu b/cpp/test/linalg/coalesced_reduction.cu
index 791537b430..dc82ab9511 100644
--- a/cpp/test/linalg/coalesced_reduction.cu
+++ b/cpp/test/linalg/coalesced_reduction.cu
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "reduce.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/coalesced_reduction.cuh>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -47,8 +48,7 @@ void coalescedReductionLaunch(
 {
   auto dots_view = raft::make_device_vector_view(dots, rows);
   auto data_view = raft::make_device_matrix_view(data, rows, cols);
-  coalesced_reduction(
-    handle, data_view, dots_view, (T)0, inplace, [] __device__(T in, int i) { return in * in; });
+  coalesced_reduction(handle, data_view, dots_view, (T)0, inplace, raft::sq_op{});
 }
 
 template <typename T>
@@ -80,9 +80,9 @@ class coalescedReductionTest : public ::testing::TestWithParam<coalescedReductio
                             stream,
                             T(0),
                             false,
-                            raft::L2Op<T, int>{},
-                            raft::Sum<T>{},
-                            raft::Nop<T>{});
+                            raft::sq_op{},
+                            raft::add_op{},
+                            raft::identity_op{});
     naiveCoalescedReduction(dots_exp.data(),
                             data.data(),
                             cols,
@@ -90,9 +90,9 @@ class coalescedReductionTest : public ::testing::TestWithParam<coalescedReductio
                             stream,
                             T(0),
                             true,
-                            raft::L2Op<T, int>{},
-                            raft::Sum<T>{},
-                            raft::Nop<T>{});
+                            raft::sq_op{},
+                            raft::add_op{},
+                            raft::identity_op{});
 
     coalescedReductionLaunch(handle, dots_act.data(), data.data(), cols, rows);
     coalescedReductionLaunch(handle, dots_act.data(), data.data(), cols, rows, true);
diff --git a/cpp/test/linalg/divide.cu b/cpp/test/linalg/divide.cu
index 4e2e5cdba7..4b5ea0a2dc 100644
--- a/cpp/test/linalg/divide.cu
+++ b/cpp/test/linalg/divide.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "unary_op.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/divide.cuh>
diff --git a/cpp/test/linalg/dot.cu b/cpp/test/linalg/dot.cu
index b5007aea32..80a9f24aba 100644
--- a/cpp/test/linalg/dot.cu
+++ b/cpp/test/linalg/dot.cu
@@ -15,7 +15,7 @@
  */
 #include <raft/linalg/dot.cuh>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
diff --git a/cpp/test/linalg/eig.cu b/cpp/test/linalg/eig.cu
index a913b14fcb..4b834c1aa8 100644
--- a/cpp/test/linalg/eig.cu
+++ b/cpp/test/linalg/eig.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/eig.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/linalg/eig_sel.cu b/cpp/test/linalg/eig_sel.cu
index 9d57c4fa0a..c2b12e5d4a 100644
--- a/cpp/test/linalg/eig_sel.cu
+++ b/cpp/test/linalg/eig_sel.cu
@@ -16,7 +16,7 @@
 
 #if CUDART_VERSION >= 10010
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/eig.cuh>
 #include <raft/util/cuda_utils.cuh>
diff --git a/cpp/test/linalg/eltwise.cu b/cpp/test/linalg/eltwise.cu
index 07ded5ec79..d9ab7e0984 100644
--- a/cpp/test/linalg/eltwise.cu
+++ b/cpp/test/linalg/eltwise.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/eltwise.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/linalg/gemm_layout.cu b/cpp/test/linalg/gemm_layout.cu
index dbe10ab4cc..a992a32304 100644
--- a/cpp/test/linalg/gemm_layout.cu
+++ b/cpp/test/linalg/gemm_layout.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/gemm.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/linalg/gemv.cu b/cpp/test/linalg/gemv.cu
index 2bd9abc200..594810bab2 100644
--- a/cpp/test/linalg/gemv.cu
+++ b/cpp/test/linalg/gemv.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/gemv.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/linalg/map.cu b/cpp/test/linalg/map.cu
index 95a2aff130..7e3a1562d9 100644
--- a/cpp/test/linalg/map.cu
+++ b/cpp/test/linalg/map.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/eltwise.cuh>
 #include <raft/linalg/map.cuh>
diff --git a/cpp/test/linalg/map_then_reduce.cu b/cpp/test/linalg/map_then_reduce.cu
index adf784f601..1e7f58ec38 100644
--- a/cpp/test/linalg/map_then_reduce.cu
+++ b/cpp/test/linalg/map_then_reduce.cu
@@ -14,12 +14,14 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <limits>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/map_reduce.cuh>
 #include <raft/linalg/map_then_reduce.cuh>
 #include <raft/random/rng.cuh>
+#include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
@@ -63,9 +65,8 @@ template <typename InType, typename OutType>
 void mapReduceLaunch(
   OutType* out_ref, OutType* out, const InType* in, size_t len, cudaStream_t stream)
 {
-  auto op = [] __device__(InType in) { return in; };
-  naiveMapReduce(out_ref, in, len, op, stream);
-  mapThenSumReduce(out, len, op, 0, in);
+  naiveMapReduce(out_ref, in, len, raft::identity_op{}, stream);
+  mapThenSumReduce(out, len, raft::identity_op{}, 0, in);
 }
 
 template <typename InType, typename OutType>
@@ -150,23 +151,21 @@ class MapGenericReduceTest : public ::testing::Test {
 
   void testMin()
   {
-    auto op          = [] __device__(InType in) { return in; };
     OutType neutral  = std::numeric_limits<InType>::max();
     auto output_view = raft::make_device_scalar_view(output.data());
     auto input_view  = raft::make_device_vector_view<const InType>(
       input.data(), static_cast<std::uint32_t>(input.size()));
-    map_reduce(handle, input_view, output_view, neutral, op, cub::Min());
+    map_reduce(handle, input_view, output_view, neutral, raft::identity_op{}, cub::Min());
     EXPECT_TRUE(raft::devArrMatch(
       OutType(1), output.data(), 1, raft::Compare<OutType>(), handle.get_stream()));
   }
   void testMax()
   {
-    auto op          = [] __device__(InType in) { return in; };
     OutType neutral  = std::numeric_limits<InType>::min();
     auto output_view = raft::make_device_scalar_view(output.data());
     auto input_view  = raft::make_device_vector_view<const InType>(
       input.data(), static_cast<std::uint32_t>(input.size()));
-    map_reduce(handle, input_view, output_view, neutral, op, cub::Max());
+    map_reduce(handle, input_view, output_view, neutral, raft::identity_op{}, cub::Max());
     EXPECT_TRUE(raft::devArrMatch(
       OutType(5), output.data(), 1, raft::Compare<OutType>(), handle.get_stream()));
   }
diff --git a/cpp/test/linalg/matrix_vector.cu b/cpp/test/linalg/matrix_vector.cu
index f103b5918b..7018e1da96 100644
--- a/cpp/test/linalg/matrix_vector.cu
+++ b/cpp/test/linalg/matrix_vector.cu
@@ -14,12 +14,14 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "matrix_vector_op.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/matrix_vector.cuh>
 #include <raft/random/rng.cuh>
+#include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
 namespace raft {
@@ -113,34 +115,25 @@ void naive_matrix_vector_op_launch(const raft::handle_t& handle,
       return mat_element;
     }
   };
-  auto operation_div = [] __device__(T mat_element, T vec_element) {
-    return mat_element / vec_element;
-  };
   auto operation_bin_div_skip_zero = [] __device__(T mat_element, T vec_element) {
     if (raft::myAbs(vec_element) < T(1e-10))
       return T(0);
     else
       return mat_element / vec_element;
   };
-  auto operation_bin_add = [] __device__(T mat_element, T vec_element) {
-    return mat_element + vec_element;
-  };
-  auto operation_bin_sub = [] __device__(T mat_element, T vec_element) {
-    return mat_element - vec_element;
-  };
 
   if (operation_type == 0) {
     naiveMatVec(
       in, in, vec1, D, N, row_major, bcast_along_rows, operation_bin_mult_skip_zero, stream);
   } else if (operation_type == 1) {
-    naiveMatVec(in, in, vec1, D, N, row_major, bcast_along_rows, operation_div, stream);
+    naiveMatVec(in, in, vec1, D, N, row_major, bcast_along_rows, raft::div_op{}, stream);
   } else if (operation_type == 2) {
     naiveMatVec(
       in, in, vec1, D, N, row_major, bcast_along_rows, operation_bin_div_skip_zero, stream);
   } else if (operation_type == 3) {
-    naiveMatVec(in, in, vec1, D, N, row_major, bcast_along_rows, operation_bin_add, stream);
+    naiveMatVec(in, in, vec1, D, N, row_major, bcast_along_rows, raft::add_op{}, stream);
   } else if (operation_type == 4) {
-    naiveMatVec(in, in, vec1, D, N, row_major, bcast_along_rows, operation_bin_sub, stream);
+    naiveMatVec(in, in, vec1, D, N, row_major, bcast_along_rows, raft::sub_op{}, stream);
   } else {
     THROW("Unknown operation type '%d'!", (int)operation_type);
   }
diff --git a/cpp/test/linalg/matrix_vector_op.cu b/cpp/test/linalg/matrix_vector_op.cu
index 1c96c3fc74..e2775c168d 100644
--- a/cpp/test/linalg/matrix_vector_op.cu
+++ b/cpp/test/linalg/matrix_vector_op.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "matrix_vector_op.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/matrix_vector_op.cuh>
diff --git a/cpp/test/linalg/matrix_vector_op.cuh b/cpp/test/linalg/matrix_vector_op.cuh
index 602d05d153..cf316ef111 100644
--- a/cpp/test/linalg/matrix_vector_op.cuh
+++ b/cpp/test/linalg/matrix_vector_op.cuh
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <raft/linalg/matrix_vector_op.cuh>
 #include <raft/util/cuda_utils.cuh>
 
diff --git a/cpp/test/linalg/mean_squared_error.cu b/cpp/test/linalg/mean_squared_error.cu
index 795f831417..18e7debcb1 100644
--- a/cpp/test/linalg/mean_squared_error.cu
+++ b/cpp/test/linalg/mean_squared_error.cu
@@ -15,7 +15,7 @@
  */
 #include <raft/linalg/mean_squared_error.cuh>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/random/rng.cuh>
 #include <raft/util/cuda_utils.cuh>
diff --git a/cpp/test/linalg/multiply.cu b/cpp/test/linalg/multiply.cu
index 1d6446c5c0..c90fb93fd0 100644
--- a/cpp/test/linalg/multiply.cu
+++ b/cpp/test/linalg/multiply.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "unary_op.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/multiply.cuh>
diff --git a/cpp/test/linalg/norm.cu b/cpp/test/linalg/norm.cu
index f0b8d3bb55..94540b9ff6 100644
--- a/cpp/test/linalg/norm.cu
+++ b/cpp/test/linalg/norm.cu
@@ -14,10 +14,12 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/norm.cuh>
 #include <raft/random/rng.cuh>
+#include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <raft/util/itertools.hpp>
 
@@ -95,11 +97,10 @@ class RowNormTest : public ::testing::TestWithParam<NormInputs<T, IdxT>> {
     auto input_col_major = raft::make_device_matrix_view<const T, IdxT, raft::col_major>(
       data.data(), params.rows, params.cols);
     if (params.do_sqrt) {
-      auto fin_op = [] __device__(const T in) { return raft::mySqrt(in); };
       if (params.rowMajor) {
-        norm(handle, input_row_major, output_view, params.type, Apply::ALONG_ROWS, fin_op);
+        norm(handle, input_row_major, output_view, params.type, Apply::ALONG_ROWS, raft::sqrt_op{});
       } else {
-        norm(handle, input_col_major, output_view, params.type, Apply::ALONG_ROWS, fin_op);
+        norm(handle, input_col_major, output_view, params.type, Apply::ALONG_ROWS, raft::sqrt_op{});
       }
     } else {
       if (params.rowMajor) {
@@ -171,11 +172,12 @@ class ColNormTest : public ::testing::TestWithParam<NormInputs<T, IdxT>> {
     auto input_col_major = raft::make_device_matrix_view<const T, IdxT, raft::col_major>(
       data.data(), params.rows, params.cols);
     if (params.do_sqrt) {
-      auto fin_op = [] __device__(const T in) { return raft::mySqrt(in); };
       if (params.rowMajor) {
-        norm(handle, input_row_major, output_view, params.type, Apply::ALONG_COLUMNS, fin_op);
+        norm(
+          handle, input_row_major, output_view, params.type, Apply::ALONG_COLUMNS, raft::sqrt_op{});
       } else {
-        norm(handle, input_col_major, output_view, params.type, Apply::ALONG_COLUMNS, fin_op);
+        norm(
+          handle, input_col_major, output_view, params.type, Apply::ALONG_COLUMNS, raft::sqrt_op{});
       }
     } else {
       if (params.rowMajor) {
diff --git a/cpp/test/linalg/normalize.cu b/cpp/test/linalg/normalize.cu
index cb949b6a5d..0a6786b1ee 100644
--- a/cpp/test/linalg/normalize.cu
+++ b/cpp/test/linalg/normalize.cu
@@ -14,12 +14,14 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/matrix_vector_op.cuh>
 #include <raft/linalg/norm.cuh>
 #include <raft/linalg/normalize.cuh>
 #include <raft/random/rng.cuh>
+#include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <raft/util/itertools.hpp>
 
@@ -48,20 +50,13 @@ void rowNormalizeRef(
 {
   rmm::device_uvector<T> norm(rows, stream);
   if (norm_type == raft::linalg::L2Norm) {
-    raft::linalg::rowNorm(norm.data(), in, cols, rows, norm_type, true, stream, raft::SqrtOp<T>());
+    raft::linalg::rowNorm(norm.data(), in, cols, rows, norm_type, true, stream, raft::sqrt_op());
   } else {
-    raft::linalg::rowNorm(norm.data(), in, cols, rows, norm_type, true, stream, raft::Nop<T>());
+    raft::linalg::rowNorm(
+      norm.data(), in, cols, rows, norm_type, true, stream, raft::identity_op());
   }
   raft::linalg::matrixVectorOp(
-    out,
-    in,
-    norm.data(),
-    cols,
-    rows,
-    true,
-    false,
-    [] __device__(T a, T b) { return a / b; },
-    stream);
+    out, in, norm.data(), cols, rows, true, false, raft::div_op{}, stream);
 }
 
 template <typename T, typename IdxT>
diff --git a/cpp/test/linalg/power.cu b/cpp/test/linalg/power.cu
index bdab49d5c8..54c2e2a7aa 100644
--- a/cpp/test/linalg/power.cu
+++ b/cpp/test/linalg/power.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/power.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/linalg/reduce.cu b/cpp/test/linalg/reduce.cu
index 00f3810d28..4ad382c4f7 100644
--- a/cpp/test/linalg/reduce.cu
+++ b/cpp/test/linalg/reduce.cu
@@ -14,10 +14,11 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "reduce.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/detail/macros.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/reduce.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -101,9 +102,9 @@ void reduceLaunch(OutType* dots,
 template <typename InType,
           typename OutType,
           typename IdxType,
-          typename MainLambda   = raft::L2Op<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::SqrtOp<InType>>
+          typename MainLambda   = raft::sq_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::sqrt_op>
 class ReduceTest : public ::testing::TestWithParam<ReduceInputs<InType, OutType, IdxType>> {
  public:
   ReduceTest()
@@ -301,7 +302,7 @@ REDUCE_TEST((ReduceTest<short,
                         int,
                         ValueToKVP<short, int>,
                         ArgMaxOp<int, short>,
-                        raft::Nop<raft::KeyValuePair<int, short>, int>>),
+                        raft::identity_op>),
             ReduceTestKVPISI32,
             inputs_kvpis_i32);
 REDUCE_TEST((ReduceTest<float,
@@ -309,7 +310,7 @@ REDUCE_TEST((ReduceTest<float,
                         int,
                         ValueToKVP<float, int>,
                         ArgMaxOp<int, float>,
-                        raft::Nop<raft::KeyValuePair<int, float>, int>>),
+                        raft::identity_op>),
             ReduceTestKVPIFI32,
             inputs_kvpif_i32);
 REDUCE_TEST((ReduceTest<double,
@@ -317,7 +318,7 @@ REDUCE_TEST((ReduceTest<double,
                         int,
                         ValueToKVP<double, int>,
                         ArgMaxOp<int, double>,
-                        raft::Nop<raft::KeyValuePair<int, double>, int>>),
+                        raft::identity_op>),
             ReduceTestKVPIDI32,
             inputs_kvpid_i32);
 
diff --git a/cpp/test/linalg/reduce.cuh b/cpp/test/linalg/reduce.cuh
index 0dcffd3f41..17e91ce202 100644
--- a/cpp/test/linalg/reduce.cuh
+++ b/cpp/test/linalg/reduce.cuh
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cublas_v2.h>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -61,9 +62,9 @@ __global__ void naiveCoalescedReductionKernel(OutType* dots,
 template <typename InType,
           typename OutType,
           typename IdxType,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<InType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void naiveCoalescedReduction(OutType* dots,
                              const InType* data,
                              IdxType D,
@@ -71,9 +72,9 @@ void naiveCoalescedReduction(OutType* dots,
                              cudaStream_t stream,
                              OutType init,
                              bool inplace           = false,
-                             MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                             ReduceLambda reduce_op = raft::Sum<OutType>(),
-                             FinalLambda fin_op     = raft::Nop<InType>())
+                             MainLambda main_op     = raft::identity_op(),
+                             ReduceLambda reduce_op = raft::add_op(),
+                             FinalLambda fin_op     = raft::identity_op())
 {
   static const IdxType TPB = 64;
   IdxType nblks            = raft::ceildiv(N, TPB);
@@ -115,9 +116,9 @@ __global__ void naiveStridedReductionKernel(OutType* dots,
 template <typename InType,
           typename OutType,
           typename IdxType,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<InType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void naiveStridedReduction(OutType* dots,
                            const InType* data,
                            IdxType D,
@@ -125,9 +126,9 @@ void naiveStridedReduction(OutType* dots,
                            cudaStream_t stream,
                            OutType init,
                            bool inplace           = false,
-                           MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                           ReduceLambda reduce_op = raft::Sum<OutType>(),
-                           FinalLambda fin_op     = raft::Nop<InType>())
+                           MainLambda main_op     = raft::identity_op(),
+                           ReduceLambda reduce_op = raft::add_op(),
+                           FinalLambda fin_op     = raft::identity_op())
 {
   static const IdxType TPB = 64;
   IdxType nblks            = raft::ceildiv(D, TPB);
@@ -139,9 +140,9 @@ void naiveStridedReduction(OutType* dots,
 template <typename InType,
           typename OutType,
           typename IdxType,
-          typename MainLambda   = raft::Nop<InType, IdxType>,
-          typename ReduceLambda = raft::Sum<OutType>,
-          typename FinalLambda  = raft::Nop<InType>>
+          typename MainLambda   = raft::identity_op,
+          typename ReduceLambda = raft::add_op,
+          typename FinalLambda  = raft::identity_op>
 void naiveReduction(OutType* dots,
                     const InType* data,
                     IdxType D,
@@ -151,9 +152,9 @@ void naiveReduction(OutType* dots,
                     cudaStream_t stream,
                     OutType init,
                     bool inplace           = false,
-                    MainLambda main_op     = raft::Nop<InType, IdxType>(),
-                    ReduceLambda reduce_op = raft::Sum<OutType>(),
-                    FinalLambda fin_op     = raft::Nop<InType>())
+                    MainLambda main_op     = raft::identity_op(),
+                    ReduceLambda reduce_op = raft::add_op(),
+                    FinalLambda fin_op     = raft::identity_op())
 {
   if (rowMajor && alongRows) {
     naiveCoalescedReduction(dots, data, D, N, stream, init, inplace, main_op, reduce_op, fin_op);
diff --git a/cpp/test/linalg/reduce_cols_by_key.cu b/cpp/test/linalg/reduce_cols_by_key.cu
index 6fa616ba8c..3870bfb830 100644
--- a/cpp/test/linalg/reduce_cols_by_key.cu
+++ b/cpp/test/linalg/reduce_cols_by_key.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/interruptible.hpp>
 #include <raft/linalg/reduce_cols_by_key.cuh>
diff --git a/cpp/test/linalg/reduce_rows_by_key.cu b/cpp/test/linalg/reduce_rows_by_key.cu
index 7b124cb7bb..22229d2224 100644
--- a/cpp/test/linalg/reduce_rows_by_key.cu
+++ b/cpp/test/linalg/reduce_rows_by_key.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <iostream>
 #include <raft/linalg/reduce_rows_by_key.cuh>
diff --git a/cpp/test/linalg/rsvd.cu b/cpp/test/linalg/rsvd.cu
index f774d59631..04e17468c3 100644
--- a/cpp/test/linalg/rsvd.cu
+++ b/cpp/test/linalg/rsvd.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/handle.hpp>
 #include <raft/linalg/rsvd.cuh>
diff --git a/cpp/test/linalg/sqrt.cu b/cpp/test/linalg/sqrt.cu
index ed57e94914..9008313b58 100644
--- a/cpp/test/linalg/sqrt.cu
+++ b/cpp/test/linalg/sqrt.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/sqrt.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/linalg/strided_reduction.cu b/cpp/test/linalg/strided_reduction.cu
index 77ca585ea5..6a8c43ad52 100644
--- a/cpp/test/linalg/strided_reduction.cu
+++ b/cpp/test/linalg/strided_reduction.cu
@@ -14,11 +14,13 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "reduce.cuh"
 #include <gtest/gtest.h>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/strided_reduction.cuh>
 #include <raft/random/rng.cuh>
+#include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 
 namespace raft {
@@ -38,7 +40,7 @@ void stridedReductionLaunch(
   raft::handle_t handle{stream};
   auto dots_view = raft::make_device_vector_view(dots, cols);
   auto data_view = raft::make_device_matrix_view(data, rows, cols);
-  strided_reduction(handle, data_view, dots_view, (T)0, inplace, raft::L2Op<T, int>{});
+  strided_reduction(handle, data_view, dots_view, (T)0, inplace, raft::sq_op{});
 }
 
 template <typename T>
@@ -70,9 +72,9 @@ class stridedReductionTest : public ::testing::TestWithParam<stridedReductionInp
                           stream,
                           T(0),
                           false,
-                          raft::L2Op<T, int>{},
-                          raft::Sum<T>{},
-                          raft::Nop<T>{});
+                          raft::sq_op{},
+                          raft::add_op{},
+                          raft::identity_op{});
     naiveStridedReduction(dots_exp.data(),
                           data.data(),
                           cols,
@@ -80,9 +82,9 @@ class stridedReductionTest : public ::testing::TestWithParam<stridedReductionInp
                           stream,
                           T(0),
                           true,
-                          raft::L2Op<T, int>{},
-                          raft::Sum<T>{},
-                          raft::Nop<T>{});
+                          raft::sq_op{},
+                          raft::add_op{},
+                          raft::identity_op{});
     stridedReductionLaunch(dots_act.data(), data.data(), cols, rows, false, stream);
     stridedReductionLaunch(dots_act.data(), data.data(), cols, rows, true, stream);
     handle.sync_stream(stream);
diff --git a/cpp/test/linalg/subtract.cu b/cpp/test/linalg/subtract.cu
index 3904f9f33f..426fc98f9f 100644
--- a/cpp/test/linalg/subtract.cu
+++ b/cpp/test/linalg/subtract.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/subtract.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/linalg/svd.cu b/cpp/test/linalg/svd.cu
index c18417dc9e..7918d481db 100644
--- a/cpp/test/linalg/svd.cu
+++ b/cpp/test/linalg/svd.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/svd.cuh>
 #include <raft/matrix/matrix.cuh>
diff --git a/cpp/test/linalg/ternary_op.cu b/cpp/test/linalg/ternary_op.cu
index e172d771cd..c78df08820 100644
--- a/cpp/test/linalg/ternary_op.cu
+++ b/cpp/test/linalg/ternary_op.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/ternary_op.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/linalg/transpose.cu b/cpp/test/linalg/transpose.cu
index 6a05317f49..110dc527d3 100644
--- a/cpp/test/linalg/transpose.cu
+++ b/cpp/test/linalg/transpose.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <raft/linalg/transpose.cuh>
 #include <raft/util/cuda_utils.cuh>
diff --git a/cpp/test/linalg/unary_op.cu b/cpp/test/linalg/unary_op.cu
index 57b009a0ac..3ebf70e69f 100644
--- a/cpp/test/linalg/unary_op.cu
+++ b/cpp/test/linalg/unary_op.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "unary_op.cuh"
 #include <gtest/gtest.h>
 #include <raft/linalg/unary_op.cuh>
diff --git a/cpp/test/linalg/unary_op.cuh b/cpp/test/linalg/unary_op.cuh
index 190d531a9f..28bcc004a4 100644
--- a/cpp/test/linalg/unary_op.cuh
+++ b/cpp/test/linalg/unary_op.cuh
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <raft/linalg/unary_op.cuh>
 #include <raft/util/cuda_utils.cuh>
 
diff --git a/cpp/test/matrix/argmax.cu b/cpp/test/matrix/argmax.cu
index 0219eb1aff..33af0ce5a4 100644
--- a/cpp/test/matrix/argmax.cu
+++ b/cpp/test/matrix/argmax.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <cstdint>
 #include <gtest/gtest.h>
 #include <raft/core/device_mdarray.hpp>
diff --git a/cpp/test/matrix/argmin.cu b/cpp/test/matrix/argmin.cu
index bdf178cd8a..22f0a6cac0 100644
--- a/cpp/test/matrix/argmin.cu
+++ b/cpp/test/matrix/argmin.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <cstdint>
 #include <gtest/gtest.h>
 #include <raft/core/device_mdarray.hpp>
diff --git a/cpp/test/matrix/columnSort.cu b/cpp/test/matrix/columnSort.cu
index aba1c4e1f0..000a911efd 100644
--- a/cpp/test/matrix/columnSort.cu
+++ b/cpp/test/matrix/columnSort.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <numeric>
diff --git a/cpp/test/matrix/diagonal.cu b/cpp/test/matrix/diagonal.cu
index e1ad9e144b..f6cd178b23 100644
--- a/cpp/test/matrix/diagonal.cu
+++ b/cpp/test/matrix/diagonal.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
diff --git a/cpp/test/matrix/gather.cu b/cpp/test/matrix/gather.cu
index 4b3244913b..0bea62e9cf 100644
--- a/cpp/test/matrix/gather.cu
+++ b/cpp/test/matrix/gather.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/cudart_utils.hpp>
 #include <raft/core/device_mdspan.hpp>
diff --git a/cpp/test/matrix/linewise_op.cu b/cpp/test/matrix/linewise_op.cu
index 2e3d54dcf5..9ce1371944 100644
--- a/cpp/test/matrix/linewise_op.cu
+++ b/cpp/test/matrix/linewise_op.cu
@@ -15,11 +15,12 @@
  */
 
 #include "../linalg/matrix_vector_op.cuh"
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <cuda_profiler_api.h>
 #include <gtest/gtest.h>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/nvtx.hpp>
+#include <raft/core/operators.hpp>
 #include <raft/linalg/matrix_vector_op.cuh>
 #include <raft/matrix/linewise_op.cuh>
 #include <raft/random/rng.cuh>
@@ -58,7 +59,6 @@ struct LinewiseTest : public ::testing::TestWithParam<typename ParamsReader::Par
   template <typename layout>
   void runLinewiseSum(T* out, const T* in, const I lineLen, const I nLines, const T* vec)
   {
-    auto f                  = [] __device__(T a, T b) -> T { return a + b; };
     constexpr auto rowmajor = std::is_same_v<layout, row_major>;
 
     I m = rowmajor ? lineLen : nLines;
@@ -68,7 +68,8 @@ struct LinewiseTest : public ::testing::TestWithParam<typename ParamsReader::Par
     auto out_view = raft::make_device_matrix_view<T, I, layout>(out, n, m);
 
     auto vec_view = raft::make_device_vector_view<const T>(vec, lineLen);
-    matrix::linewise_op(handle, in_view, out_view, raft::is_row_major(in_view), f, vec_view);
+    matrix::linewise_op(
+      handle, in_view, out_view, raft::is_row_major(in_view), raft::add_op{}, vec_view);
   }
 
   template <typename layout>
@@ -107,9 +108,8 @@ struct LinewiseTest : public ::testing::TestWithParam<typename ParamsReader::Par
                             const bool alongLines,
                             const T* vec)
   {
-    auto f        = [] __device__(T a, T b) -> T { return a + b; };
     auto vec_view = raft::make_device_vector_view<const T, I>(vec, alongLines ? lineLen : nLines);
-    matrix::linewise_op(handle, in, out, alongLines, f, vec_view);
+    matrix::linewise_op(handle, in, out, alongLines, raft::add_op{}, vec_view);
   }
 
   /**
diff --git a/cpp/test/matrix/math.cu b/cpp/test/matrix/math.cu
index 684b550dfc..f2c1a6249c 100644
--- a/cpp/test/matrix/math.cu
+++ b/cpp/test/matrix/math.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 
 #include <raft/core/device_mdspan.hpp>
diff --git a/cpp/test/matrix/matrix.cu b/cpp/test/matrix/matrix.cu
index 78391d5ff2..8cfbdac32b 100644
--- a/cpp/test/matrix/matrix.cu
+++ b/cpp/test/matrix/matrix.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/core/device_mdarray.hpp>
 
diff --git a/cpp/test/matrix/norm.cu b/cpp/test/matrix/norm.cu
index 38fdd409eb..b1e10c9047 100644
--- a/cpp/test/matrix/norm.cu
+++ b/cpp/test/matrix/norm.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/matrix/norm.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/matrix/reverse.cu b/cpp/test/matrix/reverse.cu
index c905b8711e..49d501b6d0 100644
--- a/cpp/test/matrix/reverse.cu
+++ b/cpp/test/matrix/reverse.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/matrix/reverse.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/matrix/slice.cu b/cpp/test/matrix/slice.cu
index 5faf672d13..9060357b3f 100644
--- a/cpp/test/matrix/slice.cu
+++ b/cpp/test/matrix/slice.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/matrix/slice.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/matrix/triangular.cu b/cpp/test/matrix/triangular.cu
index 9af3defb5d..9c6c49066b 100644
--- a/cpp/test/matrix/triangular.cu
+++ b/cpp/test/matrix/triangular.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/matrix/init.cuh>
 #include <raft/matrix/triangular.cuh>
diff --git a/cpp/test/mst.cu b/cpp/test/mst.cu
index 544ca80a46..d11f0b5842 100644
--- a/cpp/test/mst.cu
+++ b/cpp/test/mst.cu
@@ -16,7 +16,7 @@
 
 #include <bits/stdc++.h>
 
-#include "test_utils.h"
+#include "test_utils.cuh"
 #include <gtest/gtest.h>
 #include <iostream>
 #include <rmm/device_uvector.hpp>
diff --git a/cpp/test/neighbors/ann_ivf_flat.cu b/cpp/test/neighbors/ann_ivf_flat.cu
index 735d569318..1207b75a4a 100644
--- a/cpp/test/neighbors/ann_ivf_flat.cu
+++ b/cpp/test/neighbors/ann_ivf_flat.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "ann_utils.cuh"
 
 #include <raft/core/device_mdspan.hpp>
diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh
index 9d6ad11ccb..7c3ec044b1 100644
--- a/cpp/test/neighbors/ann_ivf_pq.cuh
+++ b/cpp/test/neighbors/ann_ivf_pq.cuh
@@ -15,7 +15,7 @@
  */
 #pragma once
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "ann_utils.cuh"
 
 #include <raft/core/logger.hpp>
diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh
index 07ef410d36..05fe6ab92d 100644
--- a/cpp/test/neighbors/ann_utils.cuh
+++ b/cpp/test/neighbors/ann_utils.cuh
@@ -25,7 +25,7 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 
 namespace raft::neighbors {
diff --git a/cpp/test/neighbors/ball_cover.cu b/cpp/test/neighbors/ball_cover.cu
index 47030b0d62..7405863b9f 100644
--- a/cpp/test/neighbors/ball_cover.cu
+++ b/cpp/test/neighbors/ball_cover.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "spatial_data.h"
 #include <raft/core/device_mdspan.hpp>
 #include <raft/distance/distance_types.hpp>
diff --git a/cpp/test/neighbors/epsilon_neighborhood.cu b/cpp/test/neighbors/epsilon_neighborhood.cu
index c83817f6f8..4f33db489e 100644
--- a/cpp/test/neighbors/epsilon_neighborhood.cu
+++ b/cpp/test/neighbors/epsilon_neighborhood.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <memory>
 #include <raft/core/device_mdspan.hpp>
diff --git a/cpp/test/neighbors/faiss_mr.cu b/cpp/test/neighbors/faiss_mr.cu
index 91ba1cc94c..38e793d120 100644
--- a/cpp/test/neighbors/faiss_mr.cu
+++ b/cpp/test/neighbors/faiss_mr.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <faiss/gpu/GpuResources.h>
 #include <raft/distance/distance_types.hpp>
diff --git a/cpp/test/neighbors/fused_l2_knn.cu b/cpp/test/neighbors/fused_l2_knn.cu
index 8df193d53d..d57f99da50 100644
--- a/cpp/test/neighbors/fused_l2_knn.cu
+++ b/cpp/test/neighbors/fused_l2_knn.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <faiss/gpu/GpuDistance.h>
 #include <faiss/gpu/StandardGpuResources.h>
diff --git a/cpp/test/neighbors/haversine.cu b/cpp/test/neighbors/haversine.cu
index 78bd377156..91a2ca07df 100644
--- a/cpp/test/neighbors/haversine.cu
+++ b/cpp/test/neighbors/haversine.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <iostream>
 #include <raft/distance/distance_types.hpp>
diff --git a/cpp/test/neighbors/knn.cu b/cpp/test/neighbors/knn.cu
index eb5ecf663f..ff3a6a80b4 100644
--- a/cpp/test/neighbors/knn.cu
+++ b/cpp/test/neighbors/knn.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/logger.hpp>
diff --git a/cpp/test/neighbors/refine.cu b/cpp/test/neighbors/refine.cu
index 06c1317b1e..674171e030 100644
--- a/cpp/test/neighbors/refine.cu
+++ b/cpp/test/neighbors/refine.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include "ann_utils.cuh"
 
 #include "refine_helper.cuh"
diff --git a/cpp/test/neighbors/selection.cu b/cpp/test/neighbors/selection.cu
index bfcfca5ead..d793ea46ee 100644
--- a/cpp/test/neighbors/selection.cu
+++ b/cpp/test/neighbors/selection.cu
@@ -20,7 +20,7 @@
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <raft/sparse/detail/utils.h>
 #include <raft/spatial/knn/knn.cuh>
diff --git a/cpp/test/random/make_blobs.cu b/cpp/test/random/make_blobs.cu
index 1f14fd23f7..741b374c8c 100644
--- a/cpp/test/random/make_blobs.cu
+++ b/cpp/test/random/make_blobs.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
 #include <raft/core/device_mdarray.hpp>
diff --git a/cpp/test/random/make_regression.cu b/cpp/test/random/make_regression.cu
index 65d4c4cb31..c2e447adf8 100644
--- a/cpp/test/random/make_regression.cu
+++ b/cpp/test/random/make_regression.cu
@@ -19,7 +19,7 @@
 #include <thrust/device_ptr.h>
 #include <thrust/device_vector.h>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/linalg/subtract.cuh>
 #include <raft/linalg/transpose.cuh>
diff --git a/cpp/test/random/multi_variable_gaussian.cu b/cpp/test/random/multi_variable_gaussian.cu
index f9f79e6845..04626a53c7 100644
--- a/cpp/test/random/multi_variable_gaussian.cu
+++ b/cpp/test/random/multi_variable_gaussian.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <cmath>
 #include <gtest/gtest.h>
 #include <iostream>
diff --git a/cpp/test/random/permute.cu b/cpp/test/random/permute.cu
index 32e5540d51..be4f2a005f 100644
--- a/cpp/test/random/permute.cu
+++ b/cpp/test/random/permute.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <raft/random/permute.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/random/rmat_rectangular_generator.cu b/cpp/test/random/rmat_rectangular_generator.cu
index 0baaaf28cf..c1c4752453 100644
--- a/cpp/test/random/rmat_rectangular_generator.cu
+++ b/cpp/test/random/rmat_rectangular_generator.cu
@@ -19,7 +19,7 @@
 #include <sys/timeb.h>
 #include <vector>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <raft/random/rmat_rectangular_generator.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/random/rng.cu b/cpp/test/random/rng.cu
index 82f6e0e247..bdce79b76e 100644
--- a/cpp/test/random/rng.cu
+++ b/cpp/test/random/rng.cu
@@ -17,7 +17,7 @@
 #include <memory>
 #include <sys/timeb.h>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/random/rng_discrete.cu b/cpp/test/random/rng_discrete.cu
index b7aef51af5..1dee281527 100644
--- a/cpp/test/random/rng_discrete.cu
+++ b/cpp/test/random/rng_discrete.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <cmath>
 #include <gtest/gtest.h>
diff --git a/cpp/test/random/rng_int.cu b/cpp/test/random/rng_int.cu
index d5270c456e..89d6d208a5 100644
--- a/cpp/test/random/rng_int.cu
+++ b/cpp/test/random/rng_int.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <cub/cub.cuh>
 #include <gtest/gtest.h>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/random/sample_without_replacement.cu b/cpp/test/random/sample_without_replacement.cu
index 355df1fcc1..a6cf3569e6 100644
--- a/cpp/test/random/sample_without_replacement.cu
+++ b/cpp/test/random/sample_without_replacement.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/random/rng.cuh>
 #include <raft/random/sample_without_replacement.cuh>
diff --git a/cpp/test/sparse/add.cu b/cpp/test/sparse/add.cu
index 862cbffdc7..692094a861 100644
--- a/cpp/test/sparse/add.cu
+++ b/cpp/test/sparse/add.cu
@@ -20,7 +20,7 @@
 #include <raft/sparse/csr.hpp>
 #include <raft/sparse/linalg/add.cuh>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <raft/util/cudart_utils.hpp>
 
 #include <iostream>
diff --git a/cpp/test/sparse/convert_coo.cu b/cpp/test/sparse/convert_coo.cu
index 1142c6f3f2..21f81bfa6a 100644
--- a/cpp/test/sparse/convert_coo.cu
+++ b/cpp/test/sparse/convert_coo.cu
@@ -22,7 +22,7 @@
 
 #include <raft/util/cudart_utils.hpp>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <iostream>
 #include <limits>
diff --git a/cpp/test/sparse/convert_csr.cu b/cpp/test/sparse/convert_csr.cu
index 007cbd7fdb..bc81dcaba5 100644
--- a/cpp/test/sparse/convert_csr.cu
+++ b/cpp/test/sparse/convert_csr.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/util/cuda_utils.cuh>
 
diff --git a/cpp/test/sparse/csr_row_slice.cu b/cpp/test/sparse/csr_row_slice.cu
index 39b235d5f1..6b10f8d798 100644
--- a/cpp/test/sparse/csr_row_slice.cu
+++ b/cpp/test/sparse/csr_row_slice.cu
@@ -24,7 +24,7 @@
 
 #include <rmm/device_uvector.hpp>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 namespace raft {
 namespace sparse {
diff --git a/cpp/test/sparse/csr_to_dense.cu b/cpp/test/sparse/csr_to_dense.cu
index 5811c5c22b..b99d6fa7fd 100644
--- a/cpp/test/sparse/csr_to_dense.cu
+++ b/cpp/test/sparse/csr_to_dense.cu
@@ -24,7 +24,7 @@
 
 #include <rmm/device_uvector.hpp>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 namespace raft {
 namespace sparse {
diff --git a/cpp/test/sparse/csr_transpose.cu b/cpp/test/sparse/csr_transpose.cu
index 108d38a8b4..2342f6e7ef 100644
--- a/cpp/test/sparse/csr_transpose.cu
+++ b/cpp/test/sparse/csr_transpose.cu
@@ -23,7 +23,7 @@
 #include <raft/sparse/linalg/transpose.cuh>
 #include <raft/util/cudart_utils.hpp>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 namespace raft {
 namespace sparse {
diff --git a/cpp/test/sparse/degree.cu b/cpp/test/sparse/degree.cu
index a4af021c05..b5b22ff15c 100644
--- a/cpp/test/sparse/degree.cu
+++ b/cpp/test/sparse/degree.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/util/cudart_utils.hpp>
 
diff --git a/cpp/test/sparse/dist_coo_spmv.cu b/cpp/test/sparse/dist_coo_spmv.cu
index c004aeaef0..be73bd3130 100644
--- a/cpp/test/sparse/dist_coo_spmv.cu
+++ b/cpp/test/sparse/dist_coo_spmv.cu
@@ -16,6 +16,7 @@
 
 #include <gtest/gtest.h>
 
+#include <raft/core/operators.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/sparse/detail/cusparse_wrappers.h>
@@ -26,7 +27,7 @@
 #include <raft/sparse/distance/detail/coo_spmv.cuh>
 #include <raft/sparse/distance/detail/operators.cuh>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <type_traits>
 
@@ -158,13 +159,12 @@ class SparseDistanceCOOSPMVTest
       case raft::distance::DistanceType::LpUnexpanded: {
         compute_dist(
           detail::PDiff(params.input_configuration.metric_arg), detail::Sum(), detail::AtomicAdd());
-        float p = 1.0f / params.input_configuration.metric_arg;
-        raft::linalg::unaryOp<value_t>(
-          out_dists.data(),
-          out_dists.data(),
-          dist_config.a_nrows * dist_config.b_nrows,
-          [=] __device__(value_t input) { return powf(input, p); },
-          dist_config.handle.get_stream());
+        value_t p = value_t{1} / params.input_configuration.metric_arg;
+        raft::linalg::unaryOp<value_t>(out_dists.data(),
+                                       out_dists.data(),
+                                       dist_config.a_nrows * dist_config.b_nrows,
+                                       raft::pow_const_op<value_t>{p},
+                                       dist_config.handle.get_stream());
 
       } break;
       default: throw raft::exception("Unknown distance");
diff --git a/cpp/test/sparse/distance.cu b/cpp/test/sparse/distance.cu
index 4ce2f4cbde..367bfddad1 100644
--- a/cpp/test/sparse/distance.cu
+++ b/cpp/test/sparse/distance.cu
@@ -24,7 +24,7 @@
 
 #include <raft/sparse/distance/distance.cuh>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 namespace raft {
 namespace sparse {
diff --git a/cpp/test/sparse/filter.cu b/cpp/test/sparse/filter.cu
index ba80c84fd5..6ada5ddcad 100644
--- a/cpp/test/sparse/filter.cu
+++ b/cpp/test/sparse/filter.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/util/cudart_utils.hpp>
 
diff --git a/cpp/test/sparse/neighbors/brute_force.cu b/cpp/test/sparse/neighbors/brute_force.cu
index 8fa5e8322d..96ba3bc48f 100644
--- a/cpp/test/sparse/neighbors/brute_force.cu
+++ b/cpp/test/sparse/neighbors/brute_force.cu
@@ -17,7 +17,7 @@
 #include <cusparse_v2.h>
 #include <gtest/gtest.h>
 
-#include "../../test_utils.h"
+#include "../../test_utils.cuh"
 #include <raft/distance/distance_types.hpp>
 #include <raft/sparse/neighbors/knn.cuh>
 
diff --git a/cpp/test/sparse/neighbors/connect_components.cu b/cpp/test/sparse/neighbors/connect_components.cu
index fc4eecd4ee..f469cc1aa2 100644
--- a/cpp/test/sparse/neighbors/connect_components.cu
+++ b/cpp/test/sparse/neighbors/connect_components.cu
@@ -34,7 +34,7 @@
 #include <raft/sparse/hierarchy/single_linkage.cuh>
 #include <rmm/device_uvector.hpp>
 
-#include "../../test_utils.h"
+#include "../../test_utils.cuh"
 
 namespace raft {
 namespace sparse {
diff --git a/cpp/test/sparse/neighbors/knn_graph.cu b/cpp/test/sparse/neighbors/knn_graph.cu
index d6f9e8386f..c3700a536c 100644
--- a/cpp/test/sparse/neighbors/knn_graph.cu
+++ b/cpp/test/sparse/neighbors/knn_graph.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../../test_utils.h"
+#include "../../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_scalar.hpp>
diff --git a/cpp/test/sparse/norm.cu b/cpp/test/sparse/norm.cu
index 8e54edd6c9..1a69acc535 100644
--- a/cpp/test/sparse/norm.cu
+++ b/cpp/test/sparse/norm.cu
@@ -16,7 +16,7 @@
 
 #include <gtest/gtest.h>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <raft/core/handle.hpp>
 #include <raft/sparse/csr.hpp>
diff --git a/cpp/test/sparse/reduce.cu b/cpp/test/sparse/reduce.cu
index 4280192723..5752624435 100644
--- a/cpp/test/sparse/reduce.cu
+++ b/cpp/test/sparse/reduce.cu
@@ -16,7 +16,7 @@
 
 #include <gtest/gtest.h>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <iostream>
 #include <limits>
 #include <raft/core/handle.hpp>
diff --git a/cpp/test/sparse/row_op.cu b/cpp/test/sparse/row_op.cu
index 732bd06103..6393c5ee86 100644
--- a/cpp/test/sparse/row_op.cu
+++ b/cpp/test/sparse/row_op.cu
@@ -19,7 +19,7 @@
 #include <raft/sparse/csr.hpp>
 #include <raft/sparse/op/row_op.cuh>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <raft/util/cudart_utils.hpp>
 
 #include <iostream>
diff --git a/cpp/test/sparse/sort.cu b/cpp/test/sparse/sort.cu
index 9b75965498..23c2f5b67a 100644
--- a/cpp/test/sparse/sort.cu
+++ b/cpp/test/sparse/sort.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/random/rng.cuh>
 #include <raft/util/cudart_utils.hpp>
diff --git a/cpp/test/sparse/spgemmi.cu b/cpp/test/sparse/spgemmi.cu
index a132c94fde..653c2fa29b 100644
--- a/cpp/test/sparse/spgemmi.cu
+++ b/cpp/test/sparse/spgemmi.cu
@@ -16,7 +16,7 @@
 
 #include <gtest/gtest.h>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <raft/core/handle.hpp>
 #include <raft/linalg/transpose.cuh>
diff --git a/cpp/test/sparse/symmetrize.cu b/cpp/test/sparse/symmetrize.cu
index 7cf1a1e07d..6f2f877304 100644
--- a/cpp/test/sparse/symmetrize.cu
+++ b/cpp/test/sparse/symmetrize.cu
@@ -22,7 +22,7 @@
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <iostream>
 
diff --git a/cpp/test/stats/accuracy.cu b/cpp/test/stats/accuracy.cu
index 192c187794..eaccdecab4 100644
--- a/cpp/test/stats/accuracy.cu
+++ b/cpp/test/stats/accuracy.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <optional>
 #include <raft/interruptible.hpp>
diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/test/stats/adjusted_rand_index.cu
index f113af821d..52bc72174a 100644
--- a/cpp/test/stats/adjusted_rand_index.cu
+++ b/cpp/test/stats/adjusted_rand_index.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
diff --git a/cpp/test/stats/completeness_score.cu b/cpp/test/stats/completeness_score.cu
index 2f8a40afdc..a9d1748f88 100644
--- a/cpp/test/stats/completeness_score.cu
+++ b/cpp/test/stats/completeness_score.cu
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu
index 7943610689..d27114388e 100644
--- a/cpp/test/stats/contingencyMatrix.cu
+++ b/cpp/test/stats/contingencyMatrix.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
diff --git a/cpp/test/stats/cov.cu b/cpp/test/stats/cov.cu
index 890c5b7826..59a2c6e081 100644
--- a/cpp/test/stats/cov.cu
+++ b/cpp/test/stats/cov.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/random/rng.cuh>
 #include <raft/stats/cov.cuh>
diff --git a/cpp/test/stats/dispersion.cu b/cpp/test/stats/dispersion.cu
index 4f18c9fb54..e414fcf5f4 100644
--- a/cpp/test/stats/dispersion.cu
+++ b/cpp/test/stats/dispersion.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <optional>
 #include <raft/interruptible.hpp>
diff --git a/cpp/test/stats/entropy.cu b/cpp/test/stats/entropy.cu
index 04aa9f7a80..96b2b9f590 100644
--- a/cpp/test/stats/entropy.cu
+++ b/cpp/test/stats/entropy.cu
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
diff --git a/cpp/test/stats/histogram.cu b/cpp/test/stats/histogram.cu
index d9793a57df..76677ac27c 100644
--- a/cpp/test/stats/histogram.cu
+++ b/cpp/test/stats/histogram.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/interruptible.hpp>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/stats/homogeneity_score.cu b/cpp/test/stats/homogeneity_score.cu
index 9bd6d9266b..ecbf160770 100644
--- a/cpp/test/stats/homogeneity_score.cu
+++ b/cpp/test/stats/homogeneity_score.cu
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
diff --git a/cpp/test/stats/information_criterion.cu b/cpp/test/stats/information_criterion.cu
index 4a9a2128c6..2cfbd787c6 100644
--- a/cpp/test/stats/information_criterion.cu
+++ b/cpp/test/stats/information_criterion.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <raft/stats/information_criterion.cuh>
 
diff --git a/cpp/test/stats/kl_divergence.cu b/cpp/test/stats/kl_divergence.cu
index 58a64f7199..b5a6c393f3 100644
--- a/cpp/test/stats/kl_divergence.cu
+++ b/cpp/test/stats/kl_divergence.cu
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
diff --git a/cpp/test/stats/mean.cu b/cpp/test/stats/mean.cu
index b299f81f68..19398d6d8e 100644
--- a/cpp/test/stats/mean.cu
+++ b/cpp/test/stats/mean.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/random/rng.cuh>
 #include <raft/stats/mean.cuh>
diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu
index 30dcdd475b..31947ef527 100644
--- a/cpp/test/stats/mean_center.cu
+++ b/cpp/test/stats/mean_center.cu
@@ -15,7 +15,7 @@
  */
 
 #include "../linalg/matrix_vector_op.cuh"
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/random/rng.cuh>
 #include <raft/stats/mean.cuh>
diff --git a/cpp/test/stats/meanvar.cu b/cpp/test/stats/meanvar.cu
index 424395c5e8..fb9fc13dec 100644
--- a/cpp/test/stats/meanvar.cu
+++ b/cpp/test/stats/meanvar.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/matrix/math.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu
index a2ba6bfc9e..1171995d5c 100644
--- a/cpp/test/stats/minmax.cu
+++ b/cpp/test/stats/minmax.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <limits>
 #include <raft/core/device_mdspan.hpp>
diff --git a/cpp/test/stats/mutual_info_score.cu b/cpp/test/stats/mutual_info_score.cu
index fb9362df52..8b6e7b2095 100644
--- a/cpp/test/stats/mutual_info_score.cu
+++ b/cpp/test/stats/mutual_info_score.cu
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
diff --git a/cpp/test/stats/r2_score.cu b/cpp/test/stats/r2_score.cu
index d77daacb04..7fb15505ab 100644
--- a/cpp/test/stats/r2_score.cu
+++ b/cpp/test/stats/r2_score.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <optional>
 #include <raft/interruptible.hpp>
diff --git a/cpp/test/stats/rand_index.cu b/cpp/test/stats/rand_index.cu
index 67e4ab5517..0010f3cbcd 100644
--- a/cpp/test/stats/rand_index.cu
+++ b/cpp/test/stats/rand_index.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <raft/util/cudart_utils.hpp>
 
diff --git a/cpp/test/stats/regression_metrics.cu b/cpp/test/stats/regression_metrics.cu
index effc3d04dd..86ac03c8b3 100644
--- a/cpp/test/stats/regression_metrics.cu
+++ b/cpp/test/stats/regression_metrics.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <optional>
diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu
index 37a6fff786..876926b71a 100644
--- a/cpp/test/stats/silhouette_score.cu
+++ b/cpp/test/stats/silhouette_score.cu
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
diff --git a/cpp/test/stats/stddev.cu b/cpp/test/stats/stddev.cu
index 73f30f17e9..7f54eee2ab 100644
--- a/cpp/test/stats/stddev.cu
+++ b/cpp/test/stats/stddev.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <raft/matrix/math.cuh>
 #include <raft/random/rng.cuh>
diff --git a/cpp/test/stats/sum.cu b/cpp/test/stats/sum.cu
index e67988abb0..c69bd04c6e 100644
--- a/cpp/test/stats/sum.cu
+++ b/cpp/test/stats/sum.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 
 #include <raft/core/handle.hpp>
 #include <raft/linalg/eltwise.cuh>
diff --git a/cpp/test/stats/trustworthiness.cu b/cpp/test/stats/trustworthiness.cu
index cbb8228f8f..a95cddf5aa 100644
--- a/cpp/test/stats/trustworthiness.cu
+++ b/cpp/test/stats/trustworthiness.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <gtest/gtest.h>
 #include <iostream>
 #include <raft/distance/distance.cuh>
diff --git a/cpp/test/stats/v_measure.cu b/cpp/test/stats/v_measure.cu
index 0cbc2da7d9..79899c1d75 100644
--- a/cpp/test/stats/v_measure.cu
+++ b/cpp/test/stats/v_measure.cu
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
diff --git a/cpp/test/stats/weighted_mean.cu b/cpp/test/stats/weighted_mean.cu
index 9f33855572..8b4af07898 100644
--- a/cpp/test/stats/weighted_mean.cu
+++ b/cpp/test/stats/weighted_mean.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "../test_utils.h"
+#include "../test_utils.cuh"
 #include <cstdint>
 #include <gtest/gtest.h>
 #include <raft/core/device_mdspan.hpp>
diff --git a/cpp/test/test_utils.cuh b/cpp/test/test_utils.cuh
new file mode 100644
index 0000000000..5704eefae3
--- /dev/null
+++ b/cpp/test/test_utils.cuh
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "test_utils.h"
+
+#include <gtest/gtest.h>
+#include <iostream>
+#include <memory>
+#include <raft/random/rng.cuh>
+#include <raft/util/cuda_utils.cuh>
+#include <raft/util/cudart_utils.hpp>
+#include <rmm/exec_policy.hpp>
+#include <thrust/for_each.h>
+
+#include <fstream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+namespace raft {
+
+/*
+ * @brief Helper function to compare 2 device n-D arrays with custom comparison
+ * @tparam T the data type of the arrays
+ * @tparam L the comparator lambda or object function
+ * @param expected expected value(s)
+ * @param actual actual values
+ * @param eq_compare the comparator
+ * @param stream cuda stream
+ * @return the testing assertion to be later used by ASSERT_TRUE/EXPECT_TRUE
+ * @{
+ */
+template <typename T, typename L>
+testing::AssertionResult devArrMatch(
+  const T* expected, const T* actual, size_t size, L eq_compare, cudaStream_t stream = 0)
+{
+  std::unique_ptr<T[]> exp_h(new T[size]);
+  std::unique_ptr<T[]> act_h(new T[size]);
+  raft::update_host<T>(exp_h.get(), expected, size, stream);
+  raft::update_host<T>(act_h.get(), actual, size, stream);
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  for (size_t i(0); i < size; ++i) {
+    auto exp = exp_h.get()[i];
+    auto act = act_h.get()[i];
+    if (!eq_compare(exp, act)) {
+      return testing::AssertionFailure() << "actual=" << act << " != expected=" << exp << " @" << i;
+    }
+  }
+  return testing::AssertionSuccess();
+}
+
+template <typename T, typename L>
+testing::AssertionResult devArrMatch(
+  T expected, const T* actual, size_t size, L eq_compare, cudaStream_t stream = 0)
+{
+  std::unique_ptr<T[]> act_h(new T[size]);
+  raft::update_host<T>(act_h.get(), actual, size, stream);
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  for (size_t i(0); i < size; ++i) {
+    auto act = act_h.get()[i];
+    if (!eq_compare(expected, act)) {
+      return testing::AssertionFailure()
+             << "actual=" << act << " != expected=" << expected << " @" << i;
+    }
+  }
+  return testing::AssertionSuccess();
+}
+
+template <typename T, typename L>
+testing::AssertionResult devArrMatch(const T* expected,
+                                     const T* actual,
+                                     size_t rows,
+                                     size_t cols,
+                                     L eq_compare,
+                                     cudaStream_t stream = 0)
+{
+  size_t size = rows * cols;
+  std::unique_ptr<T[]> exp_h(new T[size]);
+  std::unique_ptr<T[]> act_h(new T[size]);
+  raft::update_host<T>(exp_h.get(), expected, size, stream);
+  raft::update_host<T>(act_h.get(), actual, size, stream);
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  for (size_t i(0); i < rows; ++i) {
+    for (size_t j(0); j < cols; ++j) {
+      auto idx = i * cols + j;  // row major assumption!
+      auto exp = exp_h.get()[idx];
+      auto act = act_h.get()[idx];
+      if (!eq_compare(exp, act)) {
+        return testing::AssertionFailure()
+               << "actual=" << act << " != expected=" << exp << " @" << i << "," << j;
+      }
+    }
+  }
+  return testing::AssertionSuccess();
+}
+
+template <typename T, typename L>
+testing::AssertionResult devArrMatch(
+  T expected, const T* actual, size_t rows, size_t cols, L eq_compare, cudaStream_t stream = 0)
+{
+  size_t size = rows * cols;
+  std::unique_ptr<T[]> act_h(new T[size]);
+  raft::update_host<T>(act_h.get(), actual, size, stream);
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  for (size_t i(0); i < rows; ++i) {
+    for (size_t j(0); j < cols; ++j) {
+      auto idx = i * cols + j;  // row major assumption!
+      auto act = act_h.get()[idx];
+      if (!eq_compare(expected, act)) {
+        return testing::AssertionFailure()
+               << "actual=" << act << " != expected=" << expected << " @" << i << "," << j;
+      }
+    }
+  }
+  return testing::AssertionSuccess();
+}
+
+/*
+ * @brief Helper function to compare a device n-D arrays with an expected array
+ * on the host, using a custom comparison
+ * @tparam T the data type of the arrays
+ * @tparam L the comparator lambda or object function
+ * @param expected_h host array of expected value(s)
+ * @param actual_d device array actual values
+ * @param eq_compare the comparator
+ * @param stream cuda stream
+ * @return the testing assertion to be later used by ASSERT_TRUE/EXPECT_TRUE
+ */
+template <typename T, typename L>
+testing::AssertionResult devArrMatchHost(
+  const T* expected_h, const T* actual_d, size_t size, L eq_compare, cudaStream_t stream = 0)
+{
+  std::unique_ptr<T[]> act_h(new T[size]);
+  raft::update_host<T>(act_h.get(), actual_d, size, stream);
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  bool ok   = true;
+  auto fail = testing::AssertionFailure();
+  for (size_t i(0); i < size; ++i) {
+    auto exp = expected_h[i];
+    auto act = act_h.get()[i];
+    if (!eq_compare(exp, act)) {
+      ok = false;
+      fail << "actual=" << act << " != expected=" << exp << " @" << i << "; ";
+    }
+  }
+  if (!ok) return fail;
+  return testing::AssertionSuccess();
+}
+
+/**
+ * @brief Helper function to compare host vectors using a custom comparison
+ * @tparam T the element type
+ * @tparam L the comparator lambda or object function
+ * @param expected_h host vector of expected value(s)
+ * @param actual_h host vector actual values
+ * @param eq_compare the comparator
+ * @return the testing assertion to be later used by ASSERT_TRUE/EXPECT_TRUE
+ */
+template <typename T, typename L>
+testing::AssertionResult hostVecMatch(const std::vector<T>& expected_h,
+                                      const std::vector<T>& actual_h,
+                                      L eq_compare)
+{
+  auto n = actual_h.size();
+  if (n != expected_h.size())
+    return testing::AssertionFailure()
+           << "vector sizez mismatch: "
+           << "actual=" << n << " != expected=" << expected_h.size() << "; ";
+  for (size_t i = 0; i < n; ++i) {
+    auto exp = expected_h[i];
+    auto act = actual_h[i];
+    if (!eq_compare(exp, act)) {
+      return testing::AssertionFailure()
+             << "actual=" << act << " != expected=" << exp << " @" << i << "; ";
+    }
+  }
+  return testing::AssertionSuccess();
+}
+
+/*
+ * @brief Helper function to compare diagonal values of a 2D matrix
+ * @tparam T the data type of the arrays
+ * @tparam L the comparator lambda or object function
+ * @param expected expected value along diagonal
+ * @param actual actual matrix
+ * @param eq_compare the comparator
+ * @param stream cuda stream
+ * @return the testing assertion to be later used by ASSERT_TRUE/EXPECT_TRUE
+ */
+template <typename T, typename L>
+testing::AssertionResult diagonalMatch(
+  T expected, const T* actual, size_t rows, size_t cols, L eq_compare, cudaStream_t stream = 0)
+{
+  size_t size = rows * cols;
+  std::unique_ptr<T[]> act_h(new T[size]);
+  raft::update_host<T>(act_h.get(), actual, size, stream);
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  for (size_t i(0); i < rows; ++i) {
+    for (size_t j(0); j < cols; ++j) {
+      if (i != j) continue;
+      auto idx = i * cols + j;  // row major assumption!
+      auto act = act_h.get()[idx];
+      if (!eq_compare(expected, act)) {
+        return testing::AssertionFailure()
+               << "actual=" << act << " != expected=" << expected << " @" << i << "," << j;
+      }
+    }
+  }
+  return testing::AssertionSuccess();
+}
+
+template <typename T, typename IdxT>
+typename std::enable_if_t<std::is_floating_point_v<T>> gen_uniform(T* out,
+                                                                   raft::random::RngState& rng,
+                                                                   IdxT len,
+                                                                   cudaStream_t stream,
+                                                                   T range_min = T(-1),
+                                                                   T range_max = T(1))
+{
+  raft::random::uniform(rng, out, len, range_min, range_max, stream);
+}
+
+template <typename T, typename IdxT>
+typename std::enable_if_t<std::is_integral_v<T>> gen_uniform(T* out,
+                                                             raft::random::RngState& rng,
+                                                             IdxT len,
+                                                             cudaStream_t stream,
+                                                             T range_min = T(0),
+                                                             T range_max = T(100))
+{
+  raft::random::uniformInt(rng, out, len, range_min, range_max, stream);
+}
+
+template <typename T1, typename T2, typename IdxT>
+void gen_uniform(raft::KeyValuePair<T1, T2>* out,
+                 raft::random::RngState& rng,
+                 IdxT len,
+                 cudaStream_t stream)
+{
+  rmm::device_uvector<T1> keys(len, stream);
+  rmm::device_uvector<T2> values(len, stream);
+
+  gen_uniform(keys.data(), rng, len, stream);
+  gen_uniform(values.data(), rng, len, stream);
+
+  const T1* d_keys   = keys.data();
+  const T2* d_values = values.data();
+  auto counting      = thrust::make_counting_iterator<IdxT>(0);
+  thrust::for_each(rmm::exec_policy(stream),
+                   counting,
+                   counting + len,
+                   [out, d_keys, d_values] __device__(int idx) {
+                     out[idx].key   = d_keys[idx];
+                     out[idx].value = d_values[idx];
+                   });
+}
+
+/** @} */
+
+/** time the function call 'func' using cuda events */
+#define TIMEIT_LOOP(ms, count, func)                       \
+  do {                                                     \
+    cudaEvent_t start, stop;                               \
+    RAFT_CUDA_TRY(cudaEventCreate(&start));                \
+    RAFT_CUDA_TRY(cudaEventCreate(&stop));                 \
+    RAFT_CUDA_TRY(cudaEventRecord(start));                 \
+    for (int i = 0; i < count; ++i) {                      \
+      func;                                                \
+    }                                                      \
+    RAFT_CUDA_TRY(cudaEventRecord(stop));                  \
+    RAFT_CUDA_TRY(cudaEventSynchronize(stop));             \
+    ms = 0.f;                                              \
+    RAFT_CUDA_TRY(cudaEventElapsedTime(&ms, start, stop)); \
+    ms /= args.runs;                                       \
+  } while (0)
+
+inline std::vector<float> read_csv(std::string filename, bool skip_first_n_columns = 1)
+{
+  std::vector<float> result;
+  std::ifstream myFile(filename);
+  if (!myFile.is_open()) throw std::runtime_error("Could not open file");
+
+  std::string line, colname;
+  int val;
+
+  if (myFile.good()) {
+    std::getline(myFile, line);
+    std::stringstream ss(line);
+    while (std::getline(ss, colname, ',')) {}
+  }
+
+  int n_lines = 0;
+  while (std::getline(myFile, line)) {
+    std::stringstream ss(line);
+    int colIdx = 0;
+    while (ss >> val) {
+      if (colIdx >= skip_first_n_columns) {
+        result.push_back(val);
+        if (ss.peek() == ',') ss.ignore();
+      }
+      colIdx++;
+    }
+    n_lines++;
+  }
+
+  printf("lines read: %d\n", n_lines);
+  myFile.close();
+  return result;
+}
+
+};  // end namespace raft
\ No newline at end of file
diff --git a/cpp/test/test_utils.h b/cpp/test/test_utils.h
index 26483e6b2d..75590463b0 100644
--- a/cpp/test/test_utils.h
+++ b/cpp/test/test_utils.h
@@ -15,23 +15,13 @@
  */
 
 #pragma once
-#include <gtest/gtest.h>
+
+#include <cmath>
 #include <iostream>
-#include <memory>
-#include <raft/core/kvp.hpp>
-#include <raft/random/rng.cuh>
-#include <raft/util/cuda_utils.cuh>
-#include <raft/util/cudart_utils.hpp>
-#include <rmm/exec_policy.hpp>
-#include <thrust/for_each.h>
 
-#include <fstream>
-#include <sstream>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
+#include <gtest/gtest.h>
+
+#include <raft/core/kvp.hpp>
 
 namespace raft {
 
@@ -40,13 +30,22 @@ struct Compare {
   bool operator()(const T& a, const T& b) const { return a == b; }
 };
 
+template <typename Key, typename Value>
+struct Compare<raft::KeyValuePair<Key, Value>> {
+  bool operator()(const raft::KeyValuePair<Key, Value>& a,
+                  const raft::KeyValuePair<Key, Value>& b) const
+  {
+    return a.key == b.key && a.value == b.value;
+  }
+};
+
 template <typename T>
 struct CompareApprox {
   CompareApprox(T eps_) : eps(eps_) {}
   bool operator()(const T& a, const T& b) const
   {
-    T diff  = abs(a - b);
-    T m     = std::max(abs(a), abs(b));
+    T diff  = std::abs(a - b);
+    T m     = std::max(std::abs(a), std::abs(b));
     T ratio = diff > eps ? diff / m : diff;
 
     return (ratio <= eps);
@@ -85,8 +84,8 @@ struct CompareApproxAbs {
   CompareApproxAbs(T eps_) : eps(eps_) {}
   bool operator()(const T& a, const T& b) const
   {
-    T diff  = abs(abs(a) - abs(b));
-    T m     = std::max(abs(a), abs(b));
+    T diff  = std::abs(std::abs(a) - std::abs(b));
+    T m     = std::max(std::abs(a), std::abs(b));
     T ratio = diff >= eps ? diff / m : diff;
     return (ratio <= eps);
   }
@@ -98,210 +97,14 @@ struct CompareApproxAbs {
 template <typename T>
 struct CompareApproxNoScaling {
   CompareApproxNoScaling(T eps_) : eps(eps_) {}
-  bool operator()(const T& a, const T& b) const { return (abs(a - b) <= eps); }
+  bool operator()(const T& a, const T& b) const { return (std::abs(a - b) <= eps); }
 
  private:
   T eps;
 };
 
-template <typename T>
-__host__ __device__ T abs(const T& a)
-{
-  return a > T(0) ? a : -a;
-}
-
-/*
- * @brief Helper function to compare 2 device n-D arrays with custom comparison
- * @tparam T the data type of the arrays
- * @tparam L the comparator lambda or object function
- * @param expected expected value(s)
- * @param actual actual values
- * @param eq_compare the comparator
- * @param stream cuda stream
- * @return the testing assertion to be later used by ASSERT_TRUE/EXPECT_TRUE
- * @{
- */
-template <typename T, typename L>
-testing::AssertionResult devArrMatch(
-  const T* expected, const T* actual, size_t size, L eq_compare, cudaStream_t stream = 0)
-{
-  std::unique_ptr<T[]> exp_h(new T[size]);
-  std::unique_ptr<T[]> act_h(new T[size]);
-  raft::update_host<T>(exp_h.get(), expected, size, stream);
-  raft::update_host<T>(act_h.get(), actual, size, stream);
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  for (size_t i(0); i < size; ++i) {
-    auto exp = exp_h.get()[i];
-    auto act = act_h.get()[i];
-    if (!eq_compare(exp, act)) {
-      return testing::AssertionFailure() << "actual=" << act << " != expected=" << exp << " @" << i;
-    }
-  }
-  return testing::AssertionSuccess();
-}
-
-template <typename T, typename L>
-testing::AssertionResult devArrMatch(
-  T expected, const T* actual, size_t size, L eq_compare, cudaStream_t stream = 0)
-{
-  std::unique_ptr<T[]> act_h(new T[size]);
-  raft::update_host<T>(act_h.get(), actual, size, stream);
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  for (size_t i(0); i < size; ++i) {
-    auto act = act_h.get()[i];
-    if (!eq_compare(expected, act)) {
-      return testing::AssertionFailure()
-             << "actual=" << act << " != expected=" << expected << " @" << i;
-    }
-  }
-  return testing::AssertionSuccess();
-}
-
-template <typename T, typename L>
-testing::AssertionResult devArrMatch(const T* expected,
-                                     const T* actual,
-                                     size_t rows,
-                                     size_t cols,
-                                     L eq_compare,
-                                     cudaStream_t stream = 0)
-{
-  size_t size = rows * cols;
-  std::unique_ptr<T[]> exp_h(new T[size]);
-  std::unique_ptr<T[]> act_h(new T[size]);
-  raft::update_host<T>(exp_h.get(), expected, size, stream);
-  raft::update_host<T>(act_h.get(), actual, size, stream);
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  for (size_t i(0); i < rows; ++i) {
-    for (size_t j(0); j < cols; ++j) {
-      auto idx = i * cols + j;  // row major assumption!
-      auto exp = exp_h.get()[idx];
-      auto act = act_h.get()[idx];
-      if (!eq_compare(exp, act)) {
-        return testing::AssertionFailure()
-               << "actual=" << act << " != expected=" << exp << " @" << i << "," << j;
-      }
-    }
-  }
-  return testing::AssertionSuccess();
-}
-
-template <typename T, typename L>
-testing::AssertionResult devArrMatch(
-  T expected, const T* actual, size_t rows, size_t cols, L eq_compare, cudaStream_t stream = 0)
-{
-  size_t size = rows * cols;
-  std::unique_ptr<T[]> act_h(new T[size]);
-  raft::update_host<T>(act_h.get(), actual, size, stream);
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  for (size_t i(0); i < rows; ++i) {
-    for (size_t j(0); j < cols; ++j) {
-      auto idx = i * cols + j;  // row major assumption!
-      auto act = act_h.get()[idx];
-      if (!eq_compare(expected, act)) {
-        return testing::AssertionFailure()
-               << "actual=" << act << " != expected=" << expected << " @" << i << "," << j;
-      }
-    }
-  }
-  return testing::AssertionSuccess();
-}
-
-/*
- * @brief Helper function to compare a device n-D arrays with an expected array
- * on the host, using a custom comparison
- * @tparam T the data type of the arrays
- * @tparam L the comparator lambda or object function
- * @param expected_h host array of expected value(s)
- * @param actual_d device array actual values
- * @param eq_compare the comparator
- * @param stream cuda stream
- * @return the testing assertion to be later used by ASSERT_TRUE/EXPECT_TRUE
- */
-template <typename T, typename L>
-testing::AssertionResult devArrMatchHost(
-  const T* expected_h, const T* actual_d, size_t size, L eq_compare, cudaStream_t stream = 0)
-{
-  std::unique_ptr<T[]> act_h(new T[size]);
-  raft::update_host<T>(act_h.get(), actual_d, size, stream);
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  bool ok   = true;
-  auto fail = testing::AssertionFailure();
-  for (size_t i(0); i < size; ++i) {
-    auto exp = expected_h[i];
-    auto act = act_h.get()[i];
-    if (!eq_compare(exp, act)) {
-      ok = false;
-      fail << "actual=" << act << " != expected=" << exp << " @" << i << "; ";
-    }
-  }
-  if (!ok) return fail;
-  return testing::AssertionSuccess();
-}
-
-/**
- * @brief Helper function to compare host vectors using a custom comparison
- * @tparam T the element type
- * @tparam L the comparator lambda or object function
- * @param expected_h host vector of expected value(s)
- * @param actual_h host vector actual values
- * @param eq_compare the comparator
- * @return the testing assertion to be later used by ASSERT_TRUE/EXPECT_TRUE
- */
-template <typename T, typename L>
-testing::AssertionResult hostVecMatch(const std::vector<T>& expected_h,
-                                      const std::vector<T>& actual_h,
-                                      L eq_compare)
-{
-  auto n = actual_h.size();
-  if (n != expected_h.size())
-    return testing::AssertionFailure()
-           << "vector sizez mismatch: "
-           << "actual=" << n << " != expected=" << expected_h.size() << "; ";
-  for (size_t i = 0; i < n; ++i) {
-    auto exp = expected_h[i];
-    auto act = actual_h[i];
-    if (!eq_compare(exp, act)) {
-      return testing::AssertionFailure()
-             << "actual=" << act << " != expected=" << exp << " @" << i << "; ";
-    }
-  }
-  return testing::AssertionSuccess();
-}
-
-/*
- * @brief Helper function to compare diagonal values of a 2D matrix
- * @tparam T the data type of the arrays
- * @tparam L the comparator lambda or object function
- * @param expected expected value along diagonal
- * @param actual actual matrix
- * @param eq_compare the comparator
- * @param stream cuda stream
- * @return the testing assertion to be later used by ASSERT_TRUE/EXPECT_TRUE
- */
-template <typename T, typename L>
-testing::AssertionResult diagonalMatch(
-  T expected, const T* actual, size_t rows, size_t cols, L eq_compare, cudaStream_t stream = 0)
-{
-  size_t size = rows * cols;
-  std::unique_ptr<T[]> act_h(new T[size]);
-  raft::update_host<T>(act_h.get(), actual, size, stream);
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  for (size_t i(0); i < rows; ++i) {
-    for (size_t j(0); j < cols; ++j) {
-      if (i != j) continue;
-      auto idx = i * cols + j;  // row major assumption!
-      auto act = act_h.get()[idx];
-      if (!eq_compare(expected, act)) {
-        return testing::AssertionFailure()
-               << "actual=" << act << " != expected=" << expected << " @" << i << "," << j;
-      }
-    }
-  }
-  return testing::AssertionSuccess();
-}
-
 template <typename T, typename L>
-testing::AssertionResult match(const T expected, T actual, L eq_compare)
+testing::AssertionResult match(const T& expected, const T& actual, L eq_compare)
 {
   if (!eq_compare(expected, actual)) {
     return testing::AssertionFailure() << "actual=" << actual << " != expected=" << expected;
@@ -309,103 +112,4 @@ testing::AssertionResult match(const T expected, T actual, L eq_compare)
   return testing::AssertionSuccess();
 }
 
-template <typename T, typename IdxT>
-typename std::enable_if_t<std::is_floating_point_v<T>> gen_uniform(T* out,
-                                                                   raft::random::RngState& rng,
-                                                                   IdxT len,
-                                                                   cudaStream_t stream,
-                                                                   T range_min = T(-1),
-                                                                   T range_max = T(1))
-{
-  raft::random::uniform(rng, out, len, range_min, range_max, stream);
-}
-
-template <typename T, typename IdxT>
-typename std::enable_if_t<std::is_integral_v<T>> gen_uniform(T* out,
-                                                             raft::random::RngState& rng,
-                                                             IdxT len,
-                                                             cudaStream_t stream,
-                                                             T range_min = T(0),
-                                                             T range_max = T(100))
-{
-  raft::random::uniformInt(rng, out, len, range_min, range_max, stream);
-}
-
-template <typename T1, typename T2, typename IdxT>
-void gen_uniform(raft::KeyValuePair<T1, T2>* out,
-                 raft::random::RngState& rng,
-                 IdxT len,
-                 cudaStream_t stream)
-{
-  rmm::device_uvector<T1> keys(len, stream);
-  rmm::device_uvector<T2> values(len, stream);
-
-  gen_uniform(keys.data(), rng, len, stream);
-  gen_uniform(values.data(), rng, len, stream);
-
-  const T1* d_keys   = keys.data();
-  const T2* d_values = values.data();
-  auto counting      = thrust::make_counting_iterator<IdxT>(0);
-  thrust::for_each(rmm::exec_policy(stream),
-                   counting,
-                   counting + len,
-                   [out, d_keys, d_values] __device__(int idx) {
-                     out[idx].key   = d_keys[idx];
-                     out[idx].value = d_values[idx];
-                   });
-}
-
-/** @} */
-
-/** time the function call 'func' using cuda events */
-#define TIMEIT_LOOP(ms, count, func)                       \
-  do {                                                     \
-    cudaEvent_t start, stop;                               \
-    RAFT_CUDA_TRY(cudaEventCreate(&start));                \
-    RAFT_CUDA_TRY(cudaEventCreate(&stop));                 \
-    RAFT_CUDA_TRY(cudaEventRecord(start));                 \
-    for (int i = 0; i < count; ++i) {                      \
-      func;                                                \
-    }                                                      \
-    RAFT_CUDA_TRY(cudaEventRecord(stop));                  \
-    RAFT_CUDA_TRY(cudaEventSynchronize(stop));             \
-    ms = 0.f;                                              \
-    RAFT_CUDA_TRY(cudaEventElapsedTime(&ms, start, stop)); \
-    ms /= args.runs;                                       \
-  } while (0)
-
-inline std::vector<float> read_csv(std::string filename, bool skip_first_n_columns = 1)
-{
-  std::vector<float> result;
-  std::ifstream myFile(filename);
-  if (!myFile.is_open()) throw std::runtime_error("Could not open file");
-
-  std::string line, colname;
-  int val;
-
-  if (myFile.good()) {
-    std::getline(myFile, line);
-    std::stringstream ss(line);
-    while (std::getline(ss, colname, ',')) {}
-  }
-
-  int n_lines = 0;
-  while (std::getline(myFile, line)) {
-    std::stringstream ss(line);
-    int colIdx = 0;
-    while (ss >> val) {
-      if (colIdx >= skip_first_n_columns) {
-        result.push_back(val);
-        if (ss.peek() == ',') ss.ignore();
-      }
-      colIdx++;
-    }
-    n_lines++;
-  }
-
-  printf("lines read: %d\n", n_lines);
-  myFile.close();
-  return result;
-}
-
 };  // end namespace raft