diff --git a/build.sh b/build.sh
index a31d97c22c..d1dd8bdde1 100755
--- a/build.sh
+++ b/build.sh
@@ -357,7 +357,8 @@ if (( ${NUMARGS} == 0 )) || hasArg pylibraft; then
 fi
 
 if hasArg docs; then
-    cmake --build ${LIBRAFT_BUILD_DIR} --target docs_raft
+    set -x
+    cmake --build ${LIBRAFT_BUILD_DIR} -v --target docs_raft
     cd ${SPHINX_BUILD_DIR}
     make html
 fi
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ce6eb00bc1..32fe654965 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -569,7 +569,6 @@ endif()
 
 ##############################################################################
 # - doxygen targets ----------------------------------------------------------
-
 include(cmake/doxygen.cmake)
 add_doxygen_target(IN_DOXYFILE doxygen/Doxyfile.in
         OUT_DOXYFILE ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
diff --git a/cpp/cmake/doxygen.cmake b/cpp/cmake/doxygen.cmake
index 5b2da57eb5..7d06ec194c 100644
--- a/cpp/cmake/doxygen.cmake
+++ b/cpp/cmake/doxygen.cmake
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,6 +22,8 @@ function(add_doxygen_target)
     set(multiValueArgs "")
     cmake_parse_arguments(dox "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
     configure_file(${dox_IN_DOXYFILE} ${dox_OUT_DOXYFILE} @ONLY)
+
+    message("Command: ${DOXYGEN_EXECUTABLE} ${dox_OUT_DOXYFILE}")
     add_custom_target(docs_raft
       ${DOXYGEN_EXECUTABLE} ${dox_OUT_DOXYFILE}
       WORKING_DIRECTORY ${dox_CWD}
diff --git a/cpp/doxygen/Doxyfile.in b/cpp/doxygen/Doxyfile.in
index 549862600a..5517562a9f 100644
--- a/cpp/doxygen/Doxyfile.in
+++ b/cpp/doxygen/Doxyfile.in
@@ -459,7 +459,7 @@ LOOKUP_CACHE_SIZE      = 0
 # DOT_NUM_THREADS setting.
 # Minimum value: 0, maximum value: 32, default value: 1.
 
-NUM_PROC_THREADS       = 1
+NUM_PROC_THREADS       = 0
 
 #---------------------------------------------------------------------------
 # Build related configuration options
@@ -2495,7 +2495,7 @@ PLANTUML_INCLUDE_PATH  =
 # Minimum value: 0, maximum value: 10000, default value: 50.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_GRAPH_MAX_NODES    = 50
+DOT_GRAPH_MAX_NODES    = 100
 
 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
 # generated by dot. A depth value of 3 means that only nodes reachable from the
diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh
index 250ce579e5..37cdc280f9 100644
--- a/cpp/include/raft/stats/accuracy.cuh
+++ b/cpp/include/raft/stats/accuracy.cuh
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/scores.cuh>
 
 namespace raft {
@@ -39,6 +40,29 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n,
   return detail::accuracy_score(predictions, ref_predictions, n, stream);
 }
 
+/**
+ * @brief Compute accuracy of predictions. Useful for classification.
+ * @tparam value_t: data type for predictions (e.g., int for classification)
+ * @tparam idx_t Index type of matrix extent.
+ * @param[in] handle: the raft handle.
+ * @param[in] predictions: array of predictions (GPU pointer).
+ * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer).
+ * @return: Accuracy score in [0, 1]; higher is better.
+ */
+template <typename value_t, typename idx_t>
+float accuracy(const raft::handle_t& handle,
+               raft::device_vector_view<const value_t, idx_t> predictions,
+               raft::device_vector_view<const value_t, idx_t> ref_predictions)
+{
+  RAFT_EXPECTS(predictions.size() == ref_predictions.size(), "Size mismatch");
+  RAFT_EXPECTS(predictions.is_exhaustive(), "predictions must be contiguous");
+  RAFT_EXPECTS(ref_predictions.is_exhaustive(), "ref_predictions must be contiguous");
+
+  return detail::accuracy_score(predictions.data_handle(),
+                                ref_predictions.data_handle(),
+                                predictions.extent(0),
+                                handle.get_stream());
+}
 }  // namespace stats
 }  // namespace raft
 
diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh
index a59d7b4c81..e1b6a241c4 100644
--- a/cpp/include/raft/stats/adjusted_rand_index.cuh
+++ b/cpp/include/raft/stats/adjusted_rand_index.cuh
@@ -24,6 +24,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/adjusted_rand_index.cuh>
 
 namespace raft {
@@ -48,6 +49,32 @@ double adjusted_rand_index(const T* firstClusterArray,
   return detail::compute_adjusted_rand_index(firstClusterArray, secondClusterArray, size, stream);
 }
 
+/**
+ * @brief Function to calculate Adjusted RandIndex as described
+ *        <a href="https://en.wikipedia.org/wiki/Rand_index">here</a>
+ * @tparam value_t data-type for input label arrays
+ * @tparam math_t integral data-type used for computing n-choose-r
+ * @tparam idx_t Index type of matrix extent.
+ * @param[in] handle: the raft handle.
+ * @param[in] first_cluster_array: the array of classes
+ * @param[in] second_cluster_array: the array of classes
+ * @return the Adjusted RandIndex
+ */
+template <typename value_t, typename math_t, typename idx_t>
+double adjusted_rand_index(const raft::handle_t& handle,
+                           raft::device_vector_view<const value_t, idx_t> first_cluster_array,
+                           raft::device_vector_view<const value_t, idx_t> second_cluster_array)
+{
+  RAFT_EXPECTS(first_cluster_array.size() == second_cluster_array.size(), "Size mismatch");
+  RAFT_EXPECTS(first_cluster_array.is_exhaustive(), "first_cluster_array must be contiguous");
+  RAFT_EXPECTS(second_cluster_array.is_exhaustive(), "second_cluster_array must be contiguous");
+
+  return detail::compute_adjusted_rand_index<value_t, math_t>(first_cluster_array.data_handle(),
+                                                              second_cluster_array.data_handle(),
+                                                              first_cluster_array.extent(0),
+                                                              handle.get_stream());
+}
+
 };  // end namespace stats
 };  // end namespace raft
 
diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh
index 407986de05..fd535e77d5 100644
--- a/cpp/include/raft/stats/completeness_score.cuh
+++ b/cpp/include/raft/stats/completeness_score.cuh
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/homogeneity_score.cuh>
 
 namespace raft {
@@ -30,20 +31,50 @@ namespace stats {
  * @param truthClusterArray: the array of truth classes of type T
  * @param predClusterArray: the array of predicted classes of type T
  * @param size: the size of the data points of type int
- * @param lowerLabelRange: the lower bound of the range of labels
- * @param upperLabelRange: the upper bound of the range of labels
+ * @param lower_label_range: the lower bound of the range of labels
+ * @param upper_label_range: the upper bound of the range of labels
  * @param stream: the cudaStream object
  */
 template <typename T>
 double completeness_score(const T* truthClusterArray,
                           const T* predClusterArray,
                           int size,
-                          T lowerLabelRange,
-                          T upperLabelRange,
+                          T lower_label_range,
+                          T upper_label_range,
                           cudaStream_t stream)
 {
   return detail::homogeneity_score(
-    predClusterArray, truthClusterArray, size, lowerLabelRange, upperLabelRange, stream);
+    predClusterArray, truthClusterArray, size, lower_label_range, upper_label_range, stream);
+}
+
+/**
+ * @brief Function to calculate the completeness score between two clusters
+ *
+ * @tparam value_t the data type
+ * @tparam idx_t Index type of matrix extent.
+ * @param[in] handle: the raft handle.
+ * @param[in] truth_cluster_array: the array of truth classes of type value_t
+ * @param[in] pred_cluster_array: the array of predicted classes of type value_t
+ * @param[in] lower_label_range: the lower bound of the range of labels
+ * @param[in] upper_label_range: the upper bound of the range of labels
+ * @return the cluster completeness score
+ */
+template <typename value_t, typename idx_t>
+double completeness_score(const raft::handle_t& handle,
+                          raft::device_vector_view<const value_t, idx_t> truth_cluster_array,
+                          raft::device_vector_view<const value_t, idx_t> pred_cluster_array,
+                          value_t lower_label_range,
+                          value_t upper_label_range)
+{
+  RAFT_EXPECTS(truth_cluster_array.size() == pred_cluster_array.size(), "Size mismatch");
+  RAFT_EXPECTS(truth_cluster_array.is_exhaustive(), "truth_cluster_array must be contiguous");
+  RAFT_EXPECTS(pred_cluster_array.is_exhaustive(), "pred_cluster_array must be contiguous");
+  return detail::homogeneity_score(pred_cluster_array.data_handle(),
+                                   truth_cluster_array.data_handle(),
+                                   truth_cluster_array.extent(0),
+                                   lower_label_range,
+                                   upper_label_range,
+                                   handle.get_stream());
 }
 
 };  // end namespace stats
diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh
index 081782432c..10dedc44eb 100644
--- a/cpp/include/raft/stats/contingency_matrix.cuh
+++ b/cpp/include/raft/stats/contingency_matrix.cuh
@@ -19,6 +19,10 @@
 
 #pragma once
 
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/handle.hpp>
+#include <raft/core/host_mdspan.hpp>
 #include <raft/stats/detail/contingencyMatrix.cuh>
 
 namespace raft {
@@ -40,6 +44,31 @@ void getInputClassCardinality(
   detail::getInputClassCardinality(groundTruth, nSamples, stream, minLabel, maxLabel);
 }
 
+/**
+ * @brief use this to allocate output matrix size
+ * size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int)
+ * @tparam value_t label type
+ * @tparam idx_t Index type of matrix extent.
+ * @param[in]  handle: the raft handle.
+ * @param[in]  groundTruth: device 1-d array for ground truth (num of rows)
+ * @param[out] minLabel: calculated min value in input array
+ * @param[out] maxLabel: calculated max value in input array
+ */
+template <typename value_t, typename idx_t>
+void get_input_class_cardinality(const raft::handle_t& handle,
+                                 raft::device_vector_view<const value_t, idx_t> groundTruth,
+                                 raft::host_scalar_view<value_t> minLabel,
+                                 raft::host_scalar_view<value_t> maxLabel)
+{
+  RAFT_EXPECTS(minLabel.data_handle() != nullptr, "Invalid minLabel pointer");
+  RAFT_EXPECTS(maxLabel.data_handle() != nullptr, "Invalid maxLabel pointer");
+  detail::getInputClassCardinality(groundTruth.data_handle(),
+                                   groundTruth.extent(0),
+                                   handle.get_stream(),
+                                   *minLabel.data_handle(),
+                                   *maxLabel.data_handle());
+}
+
 /**
  * @brief Calculate workspace size for running contingency matrix calculations
  * @tparam T label type
@@ -71,7 +100,7 @@ size_t getContingencyMatrixWorkspaceSize(int nSamples,
  * @param groundTruth: device 1-d array for ground truth (num of rows)
  * @param predictedLabel: device 1-d array for prediction (num of columns)
  * @param nSamples: number of elements in input array
- * @param outMat: output buffer for contingecy matrix
+ * @param outMat: output buffer for contingency matrix
  * @param stream: cuda stream for execution
  * @param workspace: Optional, workspace memory allocation
  * @param workspaceSize: Optional, size of workspace memory
@@ -100,6 +129,82 @@ void contingencyMatrix(const T* groundTruth,
                                      maxLabel);
 }
 
+/**
+ * @brief contruct contingency matrix given input ground truth and prediction
+ *        labels. Users should call function getInputClassCardinality to find
+ *        and allocate memory for output. Similarly workspace requirements
+ *        should be checked using function getContingencyMatrixWorkspaceSize
+ * @tparam value_t label type
+ * @tparam out_t output matrix type
+ * @tparam idx_t Index type of matrix extent.
+ * @tparam layout_t Layout type of the input data.
+ * @param[in]  handle: the raft handle.
+ * @param[in]  ground_truth: device 1-d array for ground truth (num of rows)
+ * @param[in]  predicted_label: device 1-d array for prediction (num of columns)
+ * @param[out] out_mat: output buffer for contingency matrix
+ * @param[in]  min_label: Optional, min value in input ground truth array
+ * @param[in]  max_label: Optional, max value in input ground truth array
+ */
+template <typename value_t, typename out_t, typename idx_t, typename layout_t>
+void contingency_matrix(const raft::handle_t& handle,
+                        raft::device_vector_view<const value_t, idx_t> ground_truth,
+                        raft::device_vector_view<const value_t, idx_t> predicted_label,
+                        raft::device_matrix_view<out_t, idx_t, layout_t> out_mat,
+                        std::optional<value_t> min_label = std::nullopt,
+                        std::optional<value_t> max_label = std::nullopt)
+{
+  RAFT_EXPECTS(ground_truth.size() == predicted_label.size(), "Size mismatch");
+  RAFT_EXPECTS(ground_truth.is_exhaustive(), "ground_truth must be contiguous");
+  RAFT_EXPECTS(predicted_label.is_exhaustive(), "predicted_label must be contiguous");
+  RAFT_EXPECTS(out_mat.is_exhaustive(), "out_mat must be contiguous");
+
+  value_t min_label_value = std::numeric_limits<value_t>::max();
+  value_t max_label_value = std::numeric_limits<value_t>::max();
+  if (min_label.has_value()) { min_label_value = min_label.value(); }
+  if (max_label.has_value()) { max_label_value = max_label.value(); }
+
+  auto workspace_sz = detail::getContingencyMatrixWorkspaceSize(ground_truth.extent(0),
+                                                                ground_truth.data_handle(),
+                                                                handle.get_stream(),
+                                                                min_label_value,
+                                                                max_label_value);
+  auto workspace    = raft::make_device_vector<char>(handle, workspace_sz);
+
+  detail::contingencyMatrix<value_t, out_t>(ground_truth.data_handle(),
+                                            predicted_label.data_handle(),
+                                            ground_truth.extent(0),
+                                            out_mat.data_handle(),
+                                            handle.get_stream(),
+                                            workspace.data_handle(),
+                                            workspace_sz,
+                                            min_label_value,
+                                            max_label_value);
+}
+
+/**
+ * @brief Overload of `contingency_matrix` to help the
+ *   compiler find the above overload, in case users pass in
+ *   `std::nullopt` for the optional arguments.
+ *
+ * Please see above for documentation of `contingency_matrix`.
+ */
+template <typename value_t,
+          typename out_t,
+          typename idx_t,
+          typename layout_t,
+          typename opt_min_label_t,
+          typename opt_max_label_t>
+void contingency_matrix(const raft::handle_t& handle,
+                        raft::device_vector_view<const value_t, idx_t> ground_truth,
+                        raft::device_vector_view<const value_t, idx_t> predicted_label,
+                        raft::device_matrix_view<out_t, idx_t, layout_t> out_mat,
+                        opt_min_label_t&& min_label = std::nullopt,
+                        opt_max_label_t&& max_label = std::nullopt)
+{
+  std::optional<value_t> opt_min_label = std::forward<opt_min_label_t>(min_label);
+  std::optional<value_t> opt_max_label = std::forward<opt_max_label_t>(max_label);
+  contingency_matrix(handle, ground_truth, predicted_label, out_mat, opt_min_label, opt_max_label);
+}
 };  // namespace stats
 };  // namespace raft
 
diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh
index 06e8ba0215..a0c2ed2090 100644
--- a/cpp/include/raft/stats/cov.cuh
+++ b/cpp/include/raft/stats/cov.cuh
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/cov.cuh>
 namespace raft {
 namespace stats {
@@ -57,6 +58,55 @@ void cov(const raft::handle_t& handle,
 {
   detail::cov(handle, covar, data, mu, D, N, sample, rowMajor, stable, stream);
 }
+
+/**
+ * @brief Compute covariance of the input matrix
+ *
+ * Mean operation is assumed to be performed on a given column.
+ *
+ * @tparam value_t the data type
+ * @tparam idx_t the index type
+ * @tparam layout_t Layout type of the input data.
+ * @param[in]  handle the raft handle
+ * @param[in]  data the input matrix (this will get mean-centered at the end!)
+ * (length = nrows * ncols)
+ * @param[in]  mu mean vector of the input matrix (length = ncols)
+ * @param[out] covar the output covariance matrix (length = ncols * ncols)
+ * @param[in]  sample whether to evaluate sample covariance or not. In other words,
+ * whether to normalize the output using N-1 or N, for true or false,
+ * respectively
+ * @param[in]  stable whether to run the slower-but-numerically-stable version or not
+ * @note if stable=true, then the input data will be mean centered after this
+ * function returns!
+ */
+template <typename value_t, typename idx_t, typename layout_t>
+void cov(const raft::handle_t& handle,
+         raft::device_matrix_view<value_t, idx_t, layout_t> data,
+         raft::device_vector_view<const value_t, idx_t> mu,
+         raft::device_matrix_view<value_t, idx_t, layout_t> covar,
+         bool sample,
+         bool stable)
+{
+  static_assert(
+    std::is_same_v<layout_t, raft::row_major> || std::is_same_v<layout_t, raft::col_major>,
+    "Data layout not supported");
+  RAFT_EXPECTS(data.extent(1) == covar.extent(0) && data.extent(1) == covar.extent(1),
+               "Size mismatch");
+  RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous");
+  RAFT_EXPECTS(covar.is_exhaustive(), "covar must be contiguous");
+  RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous");
+
+  detail::cov(handle,
+              covar.data_handle(),
+              data.data_handle(),
+              mu.data_handle(),
+              data.extent(1),
+              data.extent(0),
+              std::is_same_v<layout_t, raft::row_major>,
+              sample,
+              stable,
+              handle.get_stream());
+}
 };  // end namespace stats
 };  // end namespace raft
 
diff --git a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh
index e3b56d2183..25a3721af1 100644
--- a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh
+++ b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh
@@ -112,7 +112,7 @@ __global__ void compute_chunked_a_b_kernel(value_t* a,
 
 template <typename value_idx, typename label_idx>
 rmm::device_uvector<value_idx> get_cluster_counts(const raft::handle_t& handle,
-                                                  label_idx* y,
+                                                  const label_idx* y,
                                                   value_idx& n_rows,
                                                   label_idx& n_labels)
 {
@@ -129,8 +129,8 @@ rmm::device_uvector<value_idx> get_cluster_counts(const raft::handle_t& handle,
 
 template <typename value_t, typename value_idx>
 rmm::device_uvector<value_t> get_pairwise_distance(const raft::handle_t& handle,
-                                                   value_t* left_begin,
-                                                   value_t* right_begin,
+                                                   const value_t* left_begin,
+                                                   const value_t* right_begin,
                                                    value_idx& n_left_rows,
                                                    value_idx& n_right_rows,
                                                    value_idx& n_cols,
@@ -170,10 +170,10 @@ void compute_chunked_a_b(const raft::handle_t& handle,
 template <typename value_t, typename value_idx, typename label_idx>
 value_t silhouette_score(
   const raft::handle_t& handle,
-  value_t* X,
+  const value_t* X,
   value_idx n_rows,
   value_idx n_cols,
-  label_idx* y,
+  const label_idx* y,
   label_idx n_labels,
   value_t* scores,
   value_idx chunk,
@@ -221,8 +221,8 @@ value_t silhouette_score(
 
       auto chunk_stream = handle.get_next_usable_stream(i + chunk * j);
 
-      auto* left_begin  = X + (i * n_cols);
-      auto* right_begin = X + (j * n_cols);
+      const auto* left_begin  = X + (i * n_cols);
+      const auto* right_begin = X + (j * n_cols);
 
       auto n_left_rows  = (i + chunk) < n_rows ? chunk : (n_rows - i);
       auto n_right_rows = (j + chunk) < n_rows ? chunk : (n_rows - j);
diff --git a/cpp/include/raft/stats/detail/histogram.cuh b/cpp/include/raft/stats/detail/histogram.cuh
index 54fe683b77..777e0b7816 100644
--- a/cpp/include/raft/stats/detail/histogram.cuh
+++ b/cpp/include/raft/stats/detail/histogram.cuh
@@ -465,7 +465,7 @@ HistType selectBestHistAlgo(IdxT nbins)
  * @param nbins number of bins
  * @param data input data (length = ncols * nrows)
  * @param nrows data array length in each column (or batch)
- * @param ncols number of columsn (or batch size)
+ * @param ncols number of columns (or batch size)
  * @param stream cuda stream
  * @param binner the operation that computes the bin index of the input data
  *
diff --git a/cpp/include/raft/stats/detail/rand_index.cuh b/cpp/include/raft/stats/detail/rand_index.cuh
index a827427d8f..1e66216929 100644
--- a/cpp/include/raft/stats/detail/rand_index.cuh
+++ b/cpp/include/raft/stats/detail/rand_index.cuh
@@ -125,8 +125,8 @@ __global__ void computeTheNumerator(
  * @param stream: the cudaStream object
  */
 template <typename T>
-double compute_rand_index(T* firstClusterArray,
-                          T* secondClusterArray,
+double compute_rand_index(const T* firstClusterArray,
+                          const T* secondClusterArray,
                           uint64_t size,
                           cudaStream_t stream)
 {
diff --git a/cpp/include/raft/stats/detail/silhouette_score.cuh b/cpp/include/raft/stats/detail/silhouette_score.cuh
index f2e138ed6f..cfaff5fcce 100644
--- a/cpp/include/raft/stats/detail/silhouette_score.cuh
+++ b/cpp/include/raft/stats/detail/silhouette_score.cuh
@@ -56,7 +56,7 @@ template <typename DataT, typename LabelT>
 __global__ void populateAKernel(DataT* sampleToClusterSumOfDistances,
                                 DataT* binCountArray,
                                 DataT* d_aArray,
-                                LabelT* labels,
+                                const LabelT* labels,
                                 int nRows,
                                 int nLabels,
                                 const DataT MAX_VAL)
@@ -102,7 +102,7 @@ __global__ void populateAKernel(DataT* sampleToClusterSumOfDistances,
  * @param stream: the cuda stream where to launch this kernel
  */
 template <typename DataT, typename LabelT>
-void countLabels(LabelT* labels,
+void countLabels(const LabelT* labels,
                  DataT* binCountArray,
                  int nRows,
                  int nUniqueLabels,
@@ -205,10 +205,10 @@ struct MinOp {
 template <typename DataT, typename LabelT>
 DataT silhouette_score(
   const raft::handle_t& handle,
-  DataT* X_in,
+  const DataT* X_in,
   int nRows,
   int nCols,
-  LabelT* labels,
+  const LabelT* labels,
   int nLabels,
   DataT* silhouette_scorePerSample,
   cudaStream_t stream,
diff --git a/cpp/include/raft/stats/detail/weighted_mean.cuh b/cpp/include/raft/stats/detail/weighted_mean.cuh
index e8f85b4af3..43dbe4e7f1 100644
--- a/cpp/include/raft/stats/detail/weighted_mean.cuh
+++ b/cpp/include/raft/stats/detail/weighted_mean.cuh
@@ -71,4 +71,4 @@ void weightedMean(Type* mu,
 }
 };  // end namespace detail
 };  // end namespace stats
-};  // end namespace raft
+};  // end namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh
index c868092517..9f995e4d5a 100644
--- a/cpp/include/raft/stats/dispersion.cuh
+++ b/cpp/include/raft/stats/dispersion.cuh
@@ -19,6 +19,8 @@
 
 #pragma once
 
+#include <optional>
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/dispersion.cuh>
 
 namespace raft {
@@ -51,10 +53,72 @@ DataT dispersion(const DataT* centroids,
                  IdxT dim,
                  cudaStream_t stream)
 {
-  return detail::dispersion(
+  return detail::dispersion<DataT, IdxT, TPB>(
     centroids, clusterSizes, globalCentroid, nClusters, nPoints, dim, stream);
 }
 
+/**
+ * @brief Compute cluster dispersion metric. This is very useful for
+ * automatically finding the 'k' (in kmeans) that improves this metric.
+ * The cluster dispersion metric is defined as the square root of the sum of the
+ * squared distances between the cluster centroids and the global centroid
+ * @tparam value_t data type
+ * @tparam idx_t index type
+ * @param[in]  handle the raft handle
+ * @param[in]  centroids the cluster centroids. This is assumed to be row-major
+ *   and of dimension (n_clusters x dim)
+ * @param[in]  cluster_sizes number of points in the dataset which belong to each
+ *   cluster. This is of length n_clusters
+ * @param[out] global_centroid compute the global weighted centroid of all cluster
+ *   centroids. This is of length dim. Use std::nullopt to not return it.
+ * @param[in]  n_points number of points in the dataset
+ * @return the cluster dispersion value
+ */
+template <typename value_t, typename idx_t>
+value_t cluster_dispersion(
+  const raft::handle_t& handle,
+  raft::device_matrix_view<const value_t, idx_t, raft::row_major> centroids,
+  raft::device_vector_view<const idx_t, idx_t> cluster_sizes,
+  std::optional<raft::device_vector_view<value_t, idx_t>> global_centroid,
+  const idx_t n_points)
+{
+  RAFT_EXPECTS(cluster_sizes.extent(0) == centroids.extent(0), "Size mismatch");
+  RAFT_EXPECTS(cluster_sizes.is_exhaustive(), "cluster_sizes must be contiguous");
+
+  value_t* global_centroid_ptr = nullptr;
+  if (global_centroid.has_value()) {
+    RAFT_EXPECTS(global_centroid.value().extent(0) == centroids.extent(1),
+                 "Size mismatch between global_centroid and centroids");
+    RAFT_EXPECTS(global_centroid.value().is_exhaustive(), "global_centroid must be contiguous");
+    global_centroid_ptr = global_centroid.value().data_handle();
+  }
+  return detail::dispersion<value_t, idx_t>(centroids.data_handle(),
+                                            cluster_sizes.data_handle(),
+                                            global_centroid_ptr,
+                                            centroids.extent(0),
+                                            n_points,
+                                            centroids.extent(1),
+                                            handle.get_stream());
+}
+
+/**
+ * @brief Overload of `cluster_dispersion` to help the
+ *   compiler find the above overload, in case users pass in
+ *   `std::nullopt` for the optional arguments.
+ *
+ * Please see above for documentation of `cluster_dispersion`.
+ */
+template <typename value_t, typename idx_t>
+value_t cluster_dispersion(
+  const raft::handle_t& handle,
+  raft::device_matrix_view<const value_t, idx_t, raft::row_major> centroids,
+  raft::device_vector_view<const idx_t, idx_t> cluster_sizes,
+  std::nullopt_t global_centroid,
+  const idx_t n_points)
+{
+  std::optional<raft::device_vector_view<value_t, idx_t>> opt_centroid = global_centroid;
+  return cluster_dispersion(handle, centroids, cluster_sizes, opt_centroid, n_points);
+}
 }  // end namespace stats
 }  // end namespace raft
 
diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh
index 59cbbd368f..8a98a03c6b 100644
--- a/cpp/include/raft/stats/entropy.cuh
+++ b/cpp/include/raft/stats/entropy.cuh
@@ -18,6 +18,7 @@
 #define __ENTROPY_H
 
 #pragma once
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/entropy.cuh>
 
 namespace raft {
@@ -27,6 +28,7 @@ namespace stats {
  * @brief Function to calculate entropy
  * <a href="https://en.wikipedia.org/wiki/Entropy_(information_theory)">more info on entropy</a>
  *
+ * @tparam T data type
  * @param clusterArray: the array of classes of type T
  * @param size: the size of the data points of type int
  * @param lowerLabelRange: the lower bound of the range of labels
@@ -44,6 +46,31 @@ double entropy(const T* clusterArray,
   return detail::entropy(clusterArray, size, lowerLabelRange, upperLabelRange, stream);
 }
 
+/**
+ * @brief Function to calculate entropy
+ * <a href="https://en.wikipedia.org/wiki/Entropy_(information_theory)">more info on entropy</a>
+ *
+ * @tparam value_t data type
+ * @tparam idx_t index type
+ * @param[in] handle the raft handle
+ * @param[in] cluster_array: the array of classes of type value_t
+ * @param[in] lower_label_range: the lower bound of the range of labels
+ * @param[in] upper_label_range: the upper bound of the range of labels
+ * @return the entropy score
+ */
+template <typename value_t, typename idx_t>
+double entropy(const raft::handle_t& handle,
+               raft::device_vector_view<const value_t, idx_t> cluster_array,
+               const value_t lower_label_range,
+               const value_t upper_label_range)
+{
+  RAFT_EXPECTS(cluster_array.is_exhaustive(), "cluster_array must be contiguous");
+  return detail::entropy(cluster_array.data_handle(),
+                         cluster_array.extent(0),
+                         lower_label_range,
+                         upper_label_range,
+                         handle.get_stream());
+}
 };  // end namespace stats
 };  // end namespace raft
 
diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh
index e8176ebc92..df1c2772f1 100644
--- a/cpp/include/raft/stats/histogram.cuh
+++ b/cpp/include/raft/stats/histogram.cuh
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/common.hpp>
 #include <raft/stats/detail/histogram.cuh>
 
@@ -42,7 +43,7 @@ namespace stats {
  * @param nbins number of bins
  * @param data input data (length = ncols * nrows)
  * @param nrows data array length in each column (or batch)
- * @param ncols number of columsn (or batch size)
+ * @param ncols number of columns (or batch size)
  * @param stream cuda stream
  * @param binner the operation that computes the bin index of the input data
  *
@@ -61,6 +62,42 @@ void histogram(HistType type,
   detail::histogram<DataT, IdxT, BinnerOp>(type, bins, nbins, data, nrows, ncols, stream, binner);
 }
 
+/**
+ * @brief Perform histogram on the input data. It chooses the right load size
+ * based on the input data vector length. It also supports large-bin cases
+ * using a specialized smem-based hashing technique.
+ * @tparam value_t input data type
+ * @tparam idx_t data type used to compute indices
+ * @tparam binner_op takes the input data and computes its bin index
+ * @param[in]  handle the raft handle
+ * @param[in]  type histogram implementation type to choose
+ * @param[in]  data input data col-major (length = nrows * ncols)
+ * @param[out] bins the output bins col-major (length = nbins * ncols)
+ * @param[in]  binner the operation that computes the bin index of the input data
+ *
+ * @note signature of binner_op is `int func(value_t, IdxT);`
+ */
+template <typename value_t, typename idx_t, typename binner_op = IdentityBinner<value_t, idx_t>>
+void histogram(const raft::handle_t& handle,
+               HistType type,
+               raft::device_matrix_view<const value_t, idx_t, raft::col_major> data,
+               raft::device_matrix_view<int, idx_t, raft::col_major> bins,
+               binner_op binner = IdentityBinner<value_t, idx_t>())
+{
+  RAFT_EXPECTS(std::is_integral_v<idx_t> && data.extent(0) <= std::numeric_limits<int>::max(),
+               "Index type not supported");
+  RAFT_EXPECTS(bins.extent(1) == data.extent(1), "Size mismatch");
+  RAFT_EXPECTS(bins.is_exhaustive(), "bins must be contiguous");
+  RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous");
+  detail::histogram<value_t, idx_t, binner_op>(type,
+                                               bins.data_handle(),
+                                               bins.extent(0),
+                                               data.data_handle(),
+                                               data.extent(0),
+                                               data.extent(1),
+                                               handle.get_stream(),
+                                               binner);
+}
 };  // end namespace stats
 };  // end namespace raft
 
diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh
index 5fe92db78a..91c479bc99 100644
--- a/cpp/include/raft/stats/homogeneity_score.cuh
+++ b/cpp/include/raft/stats/homogeneity_score.cuh
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/homogeneity_score.cuh>
 
 namespace raft {
@@ -47,6 +48,37 @@ double homogeneity_score(const T* truthClusterArray,
     truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream);
 }
 
+/**
+ * @brief Function to calculate the homogeneity score between two clusters
+ * <a href="https://en.wikipedia.org/wiki/Homogeneity_(statistics)">more info on mutual
+ * information</a>
+ *
+ * @tparam value_t data type
+ * @tparam idx_t index type
+ * @param[in] handle the raft handle
+ * @param[in] truth_cluster_array: the array of truth classes of type value_t
+ * @param[in] pred_cluster_array: the array of predicted classes of type value_t
+ * @param[in] lower_label_range: the lower bound of the range of labels
+ * @param[in] upper_label_range: the upper bound of the range of labels
+ * @return the homogeneity score
+ */
+template <typename value_t, typename idx_t>
+double homogeneity_score(const raft::handle_t& handle,
+                         raft::device_vector_view<const value_t, idx_t> truth_cluster_array,
+                         raft::device_vector_view<const value_t, idx_t> pred_cluster_array,
+                         value_t lower_label_range,
+                         value_t upper_label_range)
+{
+  RAFT_EXPECTS(truth_cluster_array.size() == pred_cluster_array.size(), "Size mismatch");
+  RAFT_EXPECTS(truth_cluster_array.is_exhaustive(), "truth_cluster_array must be contiguous");
+  RAFT_EXPECTS(pred_cluster_array.is_exhaustive(), "pred_cluster_array must be contiguous");
+  return detail::homogeneity_score(truth_cluster_array.data_handle(),
+                                   pred_cluster_array.data_handle(),
+                                   truth_cluster_array.extent(0),
+                                   lower_label_range,
+                                   upper_label_range,
+                                   handle.get_stream());
+}
 };  // end namespace stats
 };  // end namespace raft
 
diff --git a/cpp/include/raft/stats/information_criterion.cuh b/cpp/include/raft/stats/information_criterion.cuh
index 0744dcdffe..8ab4723d01 100644
--- a/cpp/include/raft/stats/information_criterion.cuh
+++ b/cpp/include/raft/stats/information_criterion.cuh
@@ -29,6 +29,8 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/handle.hpp>
 #include <raft/stats/common.hpp>
 #include <raft/stats/detail/batched/information_criterion.cuh>
 
@@ -63,6 +65,42 @@ void information_criterion_batched(ScalarT* d_ic,
     d_ic, d_loglikelihood, ic_type, n_params, batch_size, n_samples, stream);
 }
 
+/**
+ * Compute the given type of information criterion
+ *
+ * @note: it is safe to do the computation in-place (i.e give same pointer
+ *        as input and output)
+ *
+ * @tparam value_t data type
+ * @tparam idx_t index type
+ * @param[in]  handle           the raft handle
+ * @param[in]  d_loglikelihood  Log-likelihood for each series (device) length: batch_size
+ * @param[out] d_ic             Information criterion to be returned for each
+ *                              series (device) length: batch_size
+ * @param[in]  ic_type          Type of criterion to compute. See IC_Type
+ * @param[in]  n_params         Number of parameters in the model
+ * @param[in]  n_samples        Number of samples in each series
+ */
+template <typename value_t, typename idx_t>
+void information_criterion_batched(const raft::handle_t& handle,
+                                   raft::device_vector_view<const value_t, idx_t> d_loglikelihood,
+                                   raft::device_vector_view<value_t, idx_t> d_ic,
+                                   IC_Type ic_type,
+                                   idx_t n_params,
+                                   idx_t n_samples)
+{
+  RAFT_EXPECTS(d_ic.size() == d_loglikelihood.size(), "Size mismatch");
+  RAFT_EXPECTS(d_ic.is_exhaustive(), "d_ic must be contiguous");
+  RAFT_EXPECTS(d_loglikelihood.is_exhaustive(), "d_loglikelihood must be contiguous");
+  batched::detail::information_criterion(d_ic.data_handle(),
+                                         d_loglikelihood.data_handle(),
+                                         ic_type,
+                                         n_params,
+                                         d_ic.extent(0),
+                                         n_samples,
+                                         handle.get_stream());
+}
+
 }  // namespace stats
 }  // namespace raft
 #endif
diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh
index b29f277b4a..265e87dc68 100644
--- a/cpp/include/raft/stats/kl_divergence.cuh
+++ b/cpp/include/raft/stats/kl_divergence.cuh
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/kl_divergence.cuh>
 
 namespace raft {
@@ -41,6 +42,30 @@ DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size,
   return detail::kl_divergence(modelPDF, candidatePDF, size, stream);
 }
 
+/**
+ * @brief Function to calculate KL Divergence
+ * <a href="https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence">more info on KL
+ * Divergence</a>
+ *
+ * @tparam value_t: Data type of the input array
+ * @tparam idx_t index type
+ * @param[in] handle the raft handle
+ * @param[in] modelPDF: the model array of probability density functions of type value_t
+ * @param[in] candidatePDF: the candidate array of probability density functions of type value_t
+ * @return the KL Divergence value
+ */
+template <typename value_t, typename idx_t>
+value_t kl_divergence(const raft::handle_t& handle,
+                      raft::device_vector_view<const value_t, idx_t> modelPDF,
+                      raft::device_vector_view<const value_t, idx_t> candidatePDF)
+{
+  RAFT_EXPECTS(modelPDF.size() == candidatePDF.size(), "Size mismatch");
+  RAFT_EXPECTS(modelPDF.is_exhaustive(), "modelPDF must be contiguous");
+  RAFT_EXPECTS(candidatePDF.is_exhaustive(), "candidatePDF must be contiguous");
+  return detail::kl_divergence(
+    modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.extent(0), handle.get_stream());
+}
+
 };  // end namespace stats
 };  // end namespace raft
 
diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh
index 976b58c048..d5913e6176 100644
--- a/cpp/include/raft/stats/mean.cuh
+++ b/cpp/include/raft/stats/mean.cuh
@@ -19,9 +19,9 @@
 
 #pragma once
 
-#include "detail/mean.cuh"
-
+#include <raft/core/device_mdspan.hpp>
 #include <raft/core/handle.hpp>
+#include <raft/stats/detail/mean.cuh>
 
 namespace raft {
 namespace stats {
@@ -50,6 +50,41 @@ void mean(
   detail::mean(mu, data, D, N, sample, rowMajor, stream);
 }
 
+/**
+ * @brief Compute mean of the input matrix
+ *
+ * Mean operation is assumed to be performed on a given column.
+ *
+ * @tparam value_t the data type
+ * @tparam idx_t index type
+ * @tparam layout_t Layout type of the input matrix.
+ * @param[in]  handle the raft handle
+ * @param[in]  data: the input matrix
+ * @param[out] mu: the output mean vector
+ * @param[in]  sample: whether to evaluate sample mean or not. In other words, whether
+ *   to normalize the output using N-1 or N, for true or false, respectively
+ */
+template <typename value_t, typename idx_t, typename layout_t>
+void mean(const raft::handle_t& handle,
+          raft::device_matrix_view<const value_t, idx_t, layout_t> data,
+          raft::device_vector_view<value_t, idx_t> mu,
+          bool sample)
+{
+  static_assert(
+    std::is_same_v<layout_t, raft::row_major> || std::is_same_v<layout_t, raft::col_major>,
+    "Data layout not supported");
+  RAFT_EXPECTS(data.extent(1) == mu.extent(0), "Size mismatch betwen data and mu");
+  RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous");
+  RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous");
+  detail::mean(mu.data_handle(),
+               data.data_handle(),
+               data.extent(1),
+               data.extent(0),
+               sample,
+               std::is_same_v<layout_t, raft::row_major>,
+               handle.get_stream());
+}
+
 };  // namespace stats
 };  // namespace raft
 
diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh
index 3b2222ef52..fba2aa5b5a 100644
--- a/cpp/include/raft/stats/mean_center.cuh
+++ b/cpp/include/raft/stats/mean_center.cuh
@@ -19,7 +19,8 @@
 
 #pragma once
 
-#include "detail/mean_center.cuh"
+#include <raft/core/device_mdspan.hpp>
+#include <raft/stats/detail/mean_center.cuh>
 
 namespace raft {
 namespace stats {
@@ -51,6 +52,42 @@ void meanCenter(Type* out,
   detail::meanCenter<Type, IdxType, TPB>(out, data, mu, D, N, rowMajor, bcastAlongRows, stream);
 }
 
+/**
+ * @brief Center the input matrix wrt its mean
+ * @tparam value_t the data type
+ * @tparam idx_t index type
+ * @tparam layout_t Layout type of the input matrix.
+ * @param[in]  handle the raft handle
+ * @param[in]  data input matrix of size nrows * ncols
+ * @param[in]  mu the mean vector of size ncols if bcast_along_rows else nrows
+ * @param[out] out the output mean-centered matrix
+ * @param[in]  bcast_along_rows whether to broadcast vector along rows or columns
+ */
+template <typename value_t, typename idx_t, typename layout_t>
+void mean_center(const raft::handle_t& handle,
+                 raft::device_matrix_view<const value_t, idx_t, layout_t> data,
+                 raft::device_vector_view<const value_t, idx_t> mu,
+                 raft::device_matrix_view<value_t, idx_t, layout_t> out,
+                 bool bcast_along_rows)
+{
+  static_assert(
+    std::is_same_v<layout_t, raft::row_major> || std::is_same_v<layout_t, raft::col_major>,
+    "Data layout not supported");
+  auto mean_vec_size = bcast_along_rows ? data.extent(1) : data.extent(0);
+  RAFT_EXPECTS(out.extents() == data.extents(), "Size mismatch");
+  RAFT_EXPECTS(mean_vec_size == mu.extent(0), "Size mismatch betwen data and mu");
+  RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous");
+  RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous");
+  detail::meanCenter<value_t, idx_t>(out.data_handle(),
+                                     data.data_handle(),
+                                     mu.data_handle(),
+                                     data.extent(1),
+                                     data.extent(0),
+                                     std::is_same_v<layout_t, raft::row_major>,
+                                     bcast_along_rows,
+                                     handle.get_stream());
+}
+
 /**
  * @brief Add the input matrix wrt its mean
  * @tparam Type the data type
@@ -78,6 +115,42 @@ void meanAdd(Type* out,
   detail::meanAdd<Type, IdxType, TPB>(out, data, mu, D, N, rowMajor, bcastAlongRows, stream);
 }
 
+/**
+ * @brief Add the input matrix wrt its mean
+ * @tparam Type the data type
+ * @tparam idx_t index type
+ * @tparam layout_t Layout type of the input matrix.
+ * @tparam TPB threads per block of the cuda kernel launched
+ * @param[in]  handle the raft handle
+ * @param[in]  data input matrix of size nrows * ncols
+ * @param[in]  mu the mean vector of size ncols if bcast_along_rows else nrows
+ * @param[out] out the output mean-centered matrix
+ * @param[in]  bcast_along_rows whether to broadcast vector along rows or columns
+ */
+template <typename value_t, typename idx_t, typename layout_t>
+void mean_add(const raft::handle_t& handle,
+              raft::device_matrix_view<const value_t, idx_t, layout_t> data,
+              raft::device_vector_view<const value_t, idx_t> mu,
+              raft::device_matrix_view<value_t, idx_t, layout_t> out,
+              bool bcast_along_rows)
+{
+  static_assert(
+    std::is_same_v<layout_t, raft::row_major> || std::is_same_v<layout_t, raft::col_major>,
+    "Data layout not supported");
+  auto mean_vec_size = bcast_along_rows ? data.extent(1) : data.extent(0);
+  RAFT_EXPECTS(out.extents() == data.extents(), "Size mismatch");
+  RAFT_EXPECTS(mean_vec_size == mu.extent(0), "Size mismatch betwen data and mu");
+  RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous");
+  RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous");
+  detail::meanAdd<value_t, idx_t>(out.data_handle(),
+                                  data.data_handle(),
+                                  mu.data_handle(),
+                                  data.extent(1),
+                                  data.extent(0),
+                                  std::is_same_v<layout_t, raft::row_major>,
+                                  bcast_along_rows,
+                                  handle.get_stream());
+}
 };  // end namespace stats
 };  // end namespace raft
 
diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh
index 0c3c423493..544aed092d 100644
--- a/cpp/include/raft/stats/meanvar.cuh
+++ b/cpp/include/raft/stats/meanvar.cuh
@@ -18,7 +18,8 @@
 
 #pragma once
 
-#include "detail/meanvar.cuh"
+#include <raft/core/device_mdspan.hpp>
+#include <raft/stats/detail/meanvar.cuh>
 
 namespace raft::stats {
 
@@ -55,6 +56,49 @@ void meanvar(Type* mean,
   detail::meanvar(mean, var, data, D, N, sample, rowMajor, stream);
 }
 
+/**
+ * @brief Compute mean and variance for each column of a given matrix.
+ *
+ * The operation is performed in a single sweep. Consider using it when you need to compute
+ * both mean and variance, or when you need to compute variance but don't have the mean.
+ * It's almost twice faster than running `mean` and `vars` sequentially, because all three
+ * kernels are memory-bound.
+ *
+ * @tparam value_t the data type
+ * @tparam idx_t Integer type used for addressing
+ * @tparam layout_t Layout type of the input matrix.
+ * @param[in]  handle the raft handle
+ * @param[in]  data the input matrix of size [N, D]
+ * @param[out] mean the output mean vector of size D
+ * @param[out] var the output variance vector of size D
+ * @param[in]  sample whether to evaluate sample variance or not. In other words, whether to
+ * normalize the variance using N-1 or N, for true or false respectively.
+ */
+template <typename value_t, typename idx_t, typename layout_t>
+void meanvar(const raft::handle_t& handle,
+             raft::device_matrix_view<const value_t, idx_t, layout_t> data,
+             raft::device_vector_view<value_t, idx_t> mean,
+             raft::device_vector_view<value_t, idx_t> var,
+             bool sample)
+{
+  static_assert(
+    std::is_same_v<layout_t, raft::row_major> || std::is_same_v<layout_t, raft::col_major>,
+    "Data layout not supported");
+  RAFT_EXPECTS(data.extent(1) == var.extent(0), "Size mismatch betwen data and var");
+  RAFT_EXPECTS(mean.size() == var.size(), "Size mismatch betwen mean and var");
+  RAFT_EXPECTS(mean.is_exhaustive(), "mean must be contiguous");
+  RAFT_EXPECTS(var.is_exhaustive(), "var must be contiguous");
+  RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous");
+  detail::meanvar(mean.data_handle(),
+                  var.data_handle(),
+                  data.data_handle(),
+                  data.extent(1),
+                  data.extent(0),
+                  sample,
+                  std::is_same_v<layout_t, raft::row_major>,
+                  handle.get_stream());
+}
+
 };  // namespace raft::stats
 
 #endif
diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh
index 431d06ec6f..305e63cc10 100644
--- a/cpp/include/raft/stats/minmax.cuh
+++ b/cpp/include/raft/stats/minmax.cuh
@@ -18,6 +18,8 @@
 
 #pragma once
 
+#include <optional>
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/minmax.cuh>
 #include <raft/util/cuda_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
@@ -68,6 +70,67 @@ void minmax(const T* data,
     data, rowids, colids, nrows, ncols, row_stride, globalmin, globalmax, sampledcols, stream);
 }
 
+/**
+ * @brief Computes min/max across every column of the input matrix, as well as
+ * optionally allow to subsample based on the given row/col ID mapping vectors
+ *
+ * @tparam value_t Data type of input matrix element.
+ * @tparam idx_t Index type of matrix extent.
+ * @param[in]  handle the raft handle
+ * @param[in]  data input data col-major of size [nrows, ncols], unless rowids or
+ * colids length is smaller
+ * @param[in]  rowids optional row ID mappings of length nrows. If you want to
+ * skip this index lookup entirely, pass std::nullopt
+ * @param[in]  colids optional col ID mappings of length ncols. If you want to
+ * skip this index lookup entirely, pass std::nullopt
+ * @param[out] globalmin final col-wise global minimum (size = ncols)
+ * @param[out] globalmax final col-wise global maximum (size = ncols)
+ * @param[out] sampledcols output sampled data. Pass std::nullopt if you don't need this
+ * @note This method makes the following assumptions:
+ * 1. input and output matrices are assumed to be col-major
+ * 2. ncols is small enough to fit the whole of min/max values across all cols
+ *    in shared memory
+ */
+template <typename value_t, typename idx_t>
+void minmax(const raft::handle_t& handle,
+            raft::device_matrix_view<const value_t, idx_t, raft::col_major> data,
+            std::optional<raft::device_vector_view<const unsigned, idx_t>> rowids,
+            std::optional<raft::device_vector_view<const unsigned, idx_t>> colids,
+            raft::device_vector_view<value_t, idx_t> globalmin,
+            raft::device_vector_view<value_t, idx_t> globalmax,
+            std::optional<raft::device_vector_view<value_t, idx_t>> sampledcols)
+{
+  const unsigned* rowids_ptr = nullptr;
+  const unsigned* colids_ptr = nullptr;
+  value_t* sampledcols_ptr   = nullptr;
+  auto nrows                 = data.extent(0);
+  auto ncols                 = data.extent(1);
+  auto row_stride            = data.stride(1);
+  if (rowids.has_value()) {
+    rowids_ptr = rowids.value().data_handle();
+    RAFT_EXPECTS(rowids.value().extent(0) <= nrows, "Rowids size is greater than nrows");
+    nrows = rowids.value().extent(0);
+  }
+  if (colids.has_value()) {
+    colids_ptr = colids.value().data_handle();
+    RAFT_EXPECTS(colids.value().extent(0) <= ncols, "Colids size is greater than ncols");
+    ncols = colids.value().extent(0);
+  }
+  if (sampledcols.has_value()) { sampledcols_ptr = sampledcols.value().data_handle(); }
+  RAFT_EXPECTS(globalmin.extent(0) == ncols, "Size mismatch betwen globalmin and ncols");
+  RAFT_EXPECTS(globalmax.extent(0) == ncols, "Size mismatch betwen globalmax and ncols");
+  detail::minmax<value_t>(data.data_handle(),
+                          rowids_ptr,
+                          colids_ptr,
+                          nrows,
+                          ncols,
+                          row_stride,
+                          globalmin.data_handle(),
+                          globalmax.data_handle(),
+                          sampledcols_ptr,
+                          handle.get_stream());
+}
+
 };  // namespace stats
 };  // namespace raft
 #endif
\ No newline at end of file
diff --git a/cpp/include/raft/stats/mutual_info_score.cuh b/cpp/include/raft/stats/mutual_info_score.cuh
index 9e48168e74..e953f12461 100644
--- a/cpp/include/raft/stats/mutual_info_score.cuh
+++ b/cpp/include/raft/stats/mutual_info_score.cuh
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/mutual_info_score.cuh>
 
 namespace raft {
@@ -46,6 +47,36 @@ double mutual_info_score(const T* firstClusterArray,
     firstClusterArray, secondClusterArray, size, lowerLabelRange, upperLabelRange, stream);
 }
 
+/**
+ * @brief Function to calculate the mutual information between two clusters
+ * <a href="https://en.wikipedia.org/wiki/Mutual_information">more info on mutual information</a>
+ * @tparam value_t the data type
+ * @tparam idx_t index type
+ * @param[in] handle the raft handle
+ * @param[in] first_cluster_array: the array of classes of type value_t
+ * @param[in] second_cluster_array: the array of classes of type value_t
+ * @param[in] lower_label_range: the lower bound of the range of labels
+ * @param[in] upper_label_range: the upper bound of the range of labels
+ * @return the mutual information score
+ */
+template <typename value_t, typename idx_t>
+double mutual_info_score(const raft::handle_t& handle,
+                         raft::device_vector_view<const value_t, idx_t> first_cluster_array,
+                         raft::device_vector_view<const value_t, idx_t> second_cluster_array,
+                         value_t lower_label_range,
+                         value_t upper_label_range)
+{
+  RAFT_EXPECTS(first_cluster_array.extent(0) == second_cluster_array.extent(0),
+               "Size mismatch betwen first_cluster_array and second_cluster_array");
+  RAFT_EXPECTS(first_cluster_array.is_exhaustive(), "first_cluster_array must be contiguous");
+  RAFT_EXPECTS(second_cluster_array.is_exhaustive(), "second_cluster_array must be contiguous");
+  return detail::mutual_info_score(first_cluster_array.data_handle(),
+                                   second_cluster_array.data_handle(),
+                                   first_cluster_array.extent(0),
+                                   lower_label_range,
+                                   upper_label_range,
+                                   handle.get_stream());
+}
 };  // end namespace stats
 };  // end namespace raft
 
diff --git a/cpp/include/raft/stats/r2_score.cuh b/cpp/include/raft/stats/r2_score.cuh
index 88fac5aaa6..e7fcdb6a4e 100644
--- a/cpp/include/raft/stats/r2_score.cuh
+++ b/cpp/include/raft/stats/r2_score.cuh
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/scores.cuh>
 
 namespace raft {
@@ -45,6 +46,39 @@ math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream)
   return detail::r2_score(y, y_hat, n, stream);
 }
 
+/**
+ * Calculates the "Coefficient of Determination" (R-Squared) score
+ * normalizing the sum of squared errors by the total sum of squares.
+ *
+ * This score indicates the proportionate amount of variation in an
+ * expected response variable is explained by the independent variables
+ * in a linear regression model. The larger the R-squared value, the
+ * more variability is explained by the linear regression model.
+ *
+ * @tparam value_t the data type
+ * @tparam idx_t index type
+ * @param[in] handle the raft handle
+ * @param[in] y: Array of ground-truth response variables
+ * @param[in] y_hat: Array of predicted response variables
+ * @return: The R-squared value.
+ * @note The constness of y and y_hat is currently casted away.
+ */
+template <typename value_t, typename idx_t>
+value_t r2_score(const raft::handle_t& handle,
+                 raft::device_vector_view<const value_t, idx_t> y,
+                 raft::device_vector_view<const value_t, idx_t> y_hat)
+{
+  RAFT_EXPECTS(y.extent(0) == y_hat.extent(0), "Size mismatch betwen y and y_hat");
+  RAFT_EXPECTS(y.is_exhaustive(), "y must be contiguous");
+  RAFT_EXPECTS(y_hat.is_exhaustive(), "y_hat must be contiguous");
+
+  // TODO: Change the underlying implementation to remove the need to const_cast
+  return detail::r2_score(const_cast<value_t*>(y.data_handle()),
+                          const_cast<value_t*>(y_hat.data_handle()),
+                          y.extent(0),
+                          handle.get_stream());
+}
+
 }  // namespace stats
 }  // namespace raft
 
diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh
index 82bf046c4e..72ad53f5d9 100644
--- a/cpp/include/raft/stats/rand_index.cuh
+++ b/cpp/include/raft/stats/rand_index.cuh
@@ -18,6 +18,8 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/handle.hpp>
 #include <raft/stats/detail/rand_index.cuh>
 
 namespace raft {
@@ -37,6 +39,30 @@ double rand_index(T* firstClusterArray, T* secondClusterArray, uint64_t size, cu
   return detail::compute_rand_index(firstClusterArray, secondClusterArray, size, stream);
 }
 
+/**
+ * @brief Function to calculate RandIndex
+ * <a href="https://en.wikipedia.org/wiki/Rand_index">more info on rand index</a>
+ * @tparam value_t the data type
+ * @tparam idx_t index type
+ * @param[in] handle the raft handle
+ * @param[in] first_cluster_array: the array of classes of type value_t
+ * @param[in] second_cluster_array: the array of classes of type value_t
+ * @return: The RandIndex value.
+ */
+template <typename value_t, typename idx_t>
+double rand_index(const raft::handle_t& handle,
+                  raft::device_vector_view<const value_t, idx_t> first_cluster_array,
+                  raft::device_vector_view<const value_t, idx_t> second_cluster_array)
+{
+  RAFT_EXPECTS(first_cluster_array.extent(0) == second_cluster_array.extent(0),
+               "Size mismatch betwen first_cluster_array and second_cluster_array");
+  RAFT_EXPECTS(first_cluster_array.is_exhaustive(), "first_cluster_array must be contiguous");
+  RAFT_EXPECTS(second_cluster_array.is_exhaustive(), "second_cluster_array must be contiguous");
+  return detail::compute_rand_index(first_cluster_array.data_handle(),
+                                    second_cluster_array.data_handle(),
+                                    second_cluster_array.extent(0),
+                                    handle.get_stream());
+}
 };  // end namespace stats
 };  // end namespace raft
 
diff --git a/cpp/include/raft/stats/regression_metrics.cuh b/cpp/include/raft/stats/regression_metrics.cuh
index 0fb6d39967..fd33f2af49 100644
--- a/cpp/include/raft/stats/regression_metrics.cuh
+++ b/cpp/include/raft/stats/regression_metrics.cuh
@@ -18,6 +18,9 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/handle.hpp>
+#include <raft/core/host_mdspan.hpp>
 #include <raft/stats/detail/scores.cuh>
 
 namespace raft {
@@ -49,6 +52,46 @@ void regression_metrics(const T* predictions,
   detail::regression_metrics(
     predictions, ref_predictions, n, stream, mean_abs_error, mean_squared_error, median_abs_error);
 }
+
+/**
+ * @brief Compute regression metrics mean absolute error, mean squared error, median absolute error
+ * @tparam value_t the data type for predictions (e.g., float or double for regression).
+ * @tparam idx_t index type
+ * @param[in]  handle the raft handle
+ * @param[in]  predictions: array of predictions.
+ * @param[in]  ref_predictions: array of reference (ground-truth) predictions.
+ * @param[out] mean_abs_error: Mean Absolute Error. Sum over n of (|predictions[i] -
+ * ref_predictions[i]|) / n.
+ * @param[out] mean_squared_error: Mean Squared Error. Sum over n of ((predictions[i] -
+ * ref_predictions[i])^2) / n.
+ * @param[out] median_abs_error: Median Absolute Error. Median of |predictions[i] -
+ * ref_predictions[i]| for i in [0, n).
+ */
+template <typename value_t, typename idx_t>
+void regression_metrics(const raft::handle_t& handle,
+                        raft::device_vector_view<const value_t, idx_t> predictions,
+                        raft::device_vector_view<const value_t, idx_t> ref_predictions,
+                        raft::host_scalar_view<double> mean_abs_error,
+                        raft::host_scalar_view<double> mean_squared_error,
+                        raft::host_scalar_view<double> median_abs_error)
+{
+  RAFT_EXPECTS(predictions.extent(0) == ref_predictions.extent(0),
+               "Size mismatch betwen predictions and ref_predictions");
+  RAFT_EXPECTS(predictions.is_exhaustive(), "predictions must be contiguous");
+  RAFT_EXPECTS(ref_predictions.is_exhaustive(), "ref_predictions must be contiguous");
+  RAFT_EXPECTS(mean_abs_error.data_handle() != nullptr, "mean_abs_error view must not be empty");
+  RAFT_EXPECTS(mean_squared_error.data_handle() != nullptr,
+               "mean_squared_error view must not be empty");
+  RAFT_EXPECTS(median_abs_error.data_handle() != nullptr,
+               "median_abs_error view must not be empty");
+  detail::regression_metrics(predictions.data_handle(),
+                             ref_predictions.data_handle(),
+                             predictions.extent(0),
+                             handle.get_stream(),
+                             *mean_abs_error.data_handle(),
+                             *mean_squared_error.data_handle(),
+                             *median_abs_error.data_handle());
+}
 }  // namespace stats
 }  // namespace raft
 
diff --git a/cpp/include/raft/stats/silhouette_score.cuh b/cpp/include/raft/stats/silhouette_score.cuh
index 9f02cf6d74..0b7d6436dd 100644
--- a/cpp/include/raft/stats/silhouette_score.cuh
+++ b/cpp/include/raft/stats/silhouette_score.cuh
@@ -18,6 +18,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/batched/silhouette_score.cuh>
 #include <raft/stats/detail/silhouette_score.cuh>
 
@@ -73,6 +74,144 @@ value_t silhouette_score_batched(
     handle, X, n_rows, n_cols, y, n_labels, scores, chunk, metric);
 }
 
+/**
+ * @brief main function that returns the average silhouette score for a given set of data and its
+ * clusterings
+ * @tparam value_t: type of the data samples
+ * @tparam label_t: type of the labels
+ * @tparam idx_t index type
+ * @param[in]  handle: raft handle for managing expensive resources
+ * @param[in]  X_in: input matrix Data in row-major format (nRows x nCols)
+ * @param[in]  labels: the pointer to the array containing labels for every data sample (length:
+ * nRows)
+ * @param[out] silhouette_score_per_sample: optional array populated with the silhouette score
+ * for every sample (length: nRows)
+ * @param[in]  n_unique_labels: number of unique labels in the labels array
+ * @param[in]  metric: the numerical value that maps to the type of distance metric to be used in
+ * the calculations
+ * @return: The silhouette score.
+ */
+template <typename value_t, typename label_t, typename idx_t>
+value_t silhouette_score(
+  const raft::handle_t& handle,
+  raft::device_matrix_view<const value_t, idx_t, raft::row_major> X_in,
+  raft::device_vector_view<const label_t, idx_t> labels,
+  std::optional<raft::device_vector_view<value_t, idx_t>> silhouette_score_per_sample,
+  idx_t n_unique_labels,
+  raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded)
+{
+  RAFT_EXPECTS(labels.extent(0) == X_in.extent(0), "Size mismatch betwen labels and data");
+
+  value_t* silhouette_score_per_sample_ptr = nullptr;
+  if (silhouette_score_per_sample.has_value()) {
+    silhouette_score_per_sample_ptr = silhouette_score_per_sample.value().data_handle();
+    RAFT_EXPECTS(silhouette_score_per_sample.value().extent(0) == X_in.extent(0),
+                 "Size mismatch betwen silhouette_score_per_sample and data");
+  }
+  return detail::silhouette_score(handle,
+                                  X_in.data_handle(),
+                                  X_in.extent(0),
+                                  X_in.extent(1),
+                                  labels.data_handle(),
+                                  n_unique_labels,
+                                  silhouette_score_per_sample_ptr,
+                                  handle.get_stream(),
+                                  metric);
+}
+
+/**
+ * @brief Overload of `silhouette_score` to help the
+ *   compiler find the above overload, in case users pass in
+ *   `std::nullopt` for the optional arguments.
+ *
+ * Please see above for documentation of `silhouette_score`.
+ */
+template <typename value_t, typename label_t, typename idx_t>
+value_t silhouette_score(
+  const raft::handle_t& handle,
+  raft::device_matrix_view<const value_t, idx_t, raft::row_major> X_in,
+  raft::device_vector_view<const label_t, idx_t> labels,
+  std::nullopt_t silhouette_score_per_sample,
+  idx_t n_unique_labels,
+  raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded)
+{
+  std::optional<raft::device_vector_view<value_t, idx_t>> opt_scores = silhouette_score_per_sample;
+  return silhouette_score(handle, X_in, labels, opt_scores, n_unique_labels, metric);
+}
+
+/**
+ * @brief function that returns the average silhouette score for a given set of data and its
+ * clusterings
+ * @tparam value_t: type of the data samples
+ * @tparam label_t: type of the labels
+ * @tparam idx_t index type
+ * @param[in]  handle: raft handle for managing expensive resources
+ * @param[in]  X: input matrix Data in row-major format (nRows x nCols)
+ * @param[in]  labels: the pointer to the array containing labels for every data sample (length:
+ * nRows)
+ * @param[out] silhouette_score_per_sample: optional array populated with the silhouette score
+ * for every sample (length: nRows)
+ * @param[in]  n_unique_labels: number of unique labels in the labels array
+ * @param[in]  batch_size: number of samples per batch
+ * @param[in]  metric: the numerical value that maps to the type of distance metric to be used in
+ * the calculations
+ * @return: The silhouette score.
+ */
+template <typename value_t, typename label_t, typename idx_t>
+value_t silhouette_score_batched(
+  const raft::handle_t& handle,
+  raft::device_matrix_view<const value_t, idx_t, raft::row_major> X,
+  raft::device_vector_view<const label_t, idx_t> labels,
+  std::optional<raft::device_vector_view<value_t, idx_t>> silhouette_score_per_sample,
+  idx_t n_unique_labels,
+  idx_t batch_size,
+  raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded)
+{
+  static_assert(std::is_integral_v<idx_t>,
+                "silhouette_score_batched: The index type "
+                "of each mdspan argument must be an integral type.");
+  static_assert(std::is_integral_v<label_t>,
+                "silhouette_score_batched: The label type must be an integral type.");
+  RAFT_EXPECTS(labels.extent(0) == X.extent(0), "Size mismatch betwen labels and data");
+
+  value_t* scores_ptr = nullptr;
+  if (silhouette_score_per_sample.has_value()) {
+    scores_ptr = silhouette_score_per_sample.value().data_handle();
+    RAFT_EXPECTS(silhouette_score_per_sample.value().extent(0) == X.extent(0),
+                 "Size mismatch betwen silhouette_score_per_sample and data");
+  }
+  return batched::detail::silhouette_score(handle,
+                                           X.data_handle(),
+                                           X.extent(0),
+                                           X.extent(1),
+                                           labels.data_handle(),
+                                           n_unique_labels,
+                                           scores_ptr,
+                                           batch_size,
+                                           metric);
+}
+
+/**
+ * @brief Overload of `silhouette_score_batched` to help the
+ *   compiler find the above overload, in case users pass in
+ *   `std::nullopt` for the optional arguments.
+ *
+ * Please see above for documentation of `silhouette_score_batched`.
+ */
+template <typename value_t, typename label_t, typename idx_t>
+value_t silhouette_score_batched(
+  const raft::handle_t& handle,
+  raft::device_matrix_view<const value_t, idx_t, raft::row_major> X,
+  raft::device_vector_view<const label_t, idx_t> labels,
+  std::nullopt_t silhouette_score_per_sample,
+  idx_t n_unique_labels,
+  idx_t batch_size,
+  raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded)
+{
+  std::optional<raft::device_vector_view<value_t, idx_t>> opt_scores = silhouette_score_per_sample;
+  return silhouette_score_batched(
+    handle, X, labels, opt_scores, n_unique_labels, batch_size, metric);
+}
 };  // namespace stats
 };  // namespace raft
 
diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh
index 3fc41ebc8c..2747029955 100644
--- a/cpp/include/raft/stats/stddev.cuh
+++ b/cpp/include/raft/stats/stddev.cuh
@@ -18,9 +18,9 @@
 
 #pragma once
 
-#include "detail/stddev.cuh"
-
+#include <raft/core/device_mdspan.hpp>
 #include <raft/core/handle.hpp>
+#include <raft/stats/detail/stddev.cuh>
 
 namespace raft {
 namespace stats {
@@ -87,6 +87,86 @@ void vars(Type* var,
   detail::vars(var, data, mu, D, N, sample, rowMajor, stream);
 }
 
+/**
+ * @brief Compute stddev of the input matrix
+ *
+ * Stddev operation is assumed to be performed on a given column.
+ *
+ * @tparam value_t the data type
+ * @tparam idx_t Integer type used to for addressing
+ * @tparam layout_t Layout type of the input matrix.
+ * @param[in]  handle the raft handle
+ * @param[in]  data the input matrix
+ * @param[in]  mu the mean vector
+ * @param[out] std the output stddev vector
+ * @param[in]  sample whether to evaluate sample stddev or not. In other words,
+ * whether
+ *  to normalize the output using N-1 or N, for true or false, respectively
+ */
+template <typename value_t, typename idx_t, typename layout_t>
+void stddev(const raft::handle_t& handle,
+            raft::device_matrix_view<const value_t, idx_t, layout_t> data,
+            raft::device_vector_view<const value_t, idx_t> mu,
+            raft::device_vector_view<value_t, idx_t> std,
+            bool sample)
+{
+  constexpr bool is_row_major = std::is_same_v<layout_t, raft::row_major>;
+  constexpr bool is_col_major = std::is_same_v<layout_t, raft::col_major>;
+  static_assert(is_row_major || is_col_major,
+                "stddev: Layout must be either "
+                "raft::row_major or raft::col_major (or one of their aliases)");
+  RAFT_EXPECTS(mu.size() == std.size(), "Size mismatch between mu and std");
+  RAFT_EXPECTS(mu.extent(0) == data.extent(1), "Size mismatch between data and mu");
+  detail::stddev(std.data_handle(),
+                 data.data_handle(),
+                 mu.data_handle(),
+                 data.extent(1),
+                 data.extent(0),
+                 sample,
+                 is_row_major,
+                 handle.get_stream());
+}
+
+/**
+ * @brief Compute variance of the input matrix
+ *
+ * Variance operation is assumed to be performed on a given column.
+ *
+ * @tparam value_t the data type
+ * @tparam idx_t Integer type used to for addressing
+ * @tparam layout_t Layout type of the input matrix.
+ * @param[in]  handle the raft handle
+ * @param[in]  data the input matrix
+ * @param[in]  mu the mean vector
+ * @param[out] var the output stddev vector
+ * @param[in]  sample whether to evaluate sample stddev or not. In other words,
+ * whether
+ *  to normalize the output using N-1 or N, for true or false, respectively
+ */
+template <typename value_t, typename idx_t, typename layout_t>
+void vars(const raft::handle_t& handle,
+          raft::device_matrix_view<const value_t, idx_t, layout_t> data,
+          raft::device_vector_view<const value_t, idx_t> mu,
+          raft::device_vector_view<value_t, idx_t> var,
+          bool sample)
+{
+  constexpr bool is_row_major = std::is_same_v<layout_t, raft::row_major>;
+  constexpr bool is_col_major = std::is_same_v<layout_t, raft::col_major>;
+  static_assert(is_row_major || is_col_major,
+                "vars: Layout must be either "
+                "raft::row_major or raft::col_major (or one of their aliases)");
+  RAFT_EXPECTS(mu.size() == var.size(), "Size mismatch between mu and std");
+  RAFT_EXPECTS(mu.extent(0) == data.extent(1), "Size mismatch between data and mu");
+  detail::vars(var.data_handle(),
+               data.data_handle(),
+               mu.data_handle(),
+               data.extent(1),
+               data.extent(0),
+               sample,
+               is_row_major,
+               handle.get_stream());
+}
+
 };  // namespace stats
 };  // namespace raft
 
diff --git a/cpp/include/raft/stats/sum.cuh b/cpp/include/raft/stats/sum.cuh
index 89135dd076..18265c5e3a 100644
--- a/cpp/include/raft/stats/sum.cuh
+++ b/cpp/include/raft/stats/sum.cuh
@@ -19,8 +19,8 @@
 
 #pragma once
 
-#include "detail/sum.cuh"
-
+#include <raft/core/device_mdspan.hpp>
+#include <raft/stats/detail/sum.cuh>
 #include <raft/util/cudart_utils.hpp>
 
 namespace raft {
@@ -46,6 +46,37 @@ void sum(Type* output, const Type* input, IdxType D, IdxType N, bool rowMajor, c
   detail::sum(output, input, D, N, rowMajor, stream);
 }
 
+/**
+ * @brief Compute sum of the input matrix
+ *
+ * Sum operation is assumed to be performed on a given column.
+ *
+ * @tparam value_t the data type
+ * @tparam idx_t Integer type used to for addressing
+ * @tparam layout_t Layout type of the input matrix.
+ * @param[in]  handle the raft handle
+ * @param[in]  input the input matrix
+ * @param[out] output the output mean vector
+ */
+template <typename value_t, typename idx_t, typename layout_t>
+void sum(const raft::handle_t& handle,
+         raft::device_matrix_view<const value_t, idx_t, layout_t> input,
+         raft::device_vector_view<value_t, idx_t> output)
+{
+  constexpr bool is_row_major = std::is_same_v<layout_t, raft::row_major>;
+  constexpr bool is_col_major = std::is_same_v<layout_t, raft::col_major>;
+  static_assert(is_row_major || is_col_major,
+                "sum: Layout must be either "
+                "raft::row_major or raft::col_major (or one of their aliases)");
+  RAFT_EXPECTS(input.extent(1) == output.extent(0), "Size mismatch between input and output");
+  detail::sum(output.data_handle(),
+              input.data_handle(),
+              input.extent(1),
+              input.extent(0),
+              is_row_major,
+              handle.get_stream());
+}
+
 };  // end namespace stats
 };  // end namespace raft
 
diff --git a/cpp/include/raft/stats/trustworthiness_score.cuh b/cpp/include/raft/stats/trustworthiness_score.cuh
index c89eab8d2b..b7b3999f77 100644
--- a/cpp/include/raft/stats/trustworthiness_score.cuh
+++ b/cpp/include/raft/stats/trustworthiness_score.cuh
@@ -18,6 +18,8 @@
 #define __TRUSTWORTHINESS_SCORE_H
 
 #pragma once
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/handle.hpp>
 #include <raft/stats/detail/trustworthiness_score.cuh>
 
 namespace raft {
@@ -48,6 +50,43 @@ double trustworthiness_score(const raft::handle_t& h,
   return detail::trustworthiness_score<math_t, distance_type>(
     h, X, X_embedded, n, m, d, n_neighbors, batchSize);
 }
+
+/**
+ * @brief Compute the trustworthiness score
+ * @tparam value_t the data type
+ * @tparam idx_t Integer type used to for addressing
+ * @param[in] handle the raft handle
+ * @param[in] X: Data in original dimension
+ * @param[in] X_embedded: Data in target dimension (embedding)
+ * @param[in] n_neighbors Number of neighbors considered by trustworthiness score
+ * @param[in] batch_size Batch size
+ * @return Trustworthiness score
+ * @note The constness of the data in X_embedded is currently casted away and the data is slightly
+ * modified.
+ */
+template <raft::distance::DistanceType distance_type, typename value_t, typename idx_t>
+double trustworthiness_score(
+  const raft::handle_t& handle,
+  raft::device_matrix_view<const value_t, idx_t, raft::row_major> X,
+  raft::device_matrix_view<const value_t, idx_t, raft::row_major> X_embedded,
+  int n_neighbors,
+  int batch_size = 512)
+{
+  RAFT_EXPECTS(X.extent(0) == X_embedded.extent(0), "Size mismatch between X and X_embedded");
+  RAFT_EXPECTS(std::is_integral_v<idx_t> && X.extent(0) <= std::numeric_limits<int>::max(),
+               "Index type not supported");
+
+  // TODO: Change the underlying implementation to remove the need to const_cast X_embedded.
+  return detail::trustworthiness_score<value_t, distance_type>(
+    handle,
+    X.data_handle(),
+    const_cast<value_t*>(X_embedded.data_handle()),
+    X.extent(0),
+    X.extent(1),
+    X_embedded.extent(1),
+    n_neighbors,
+    batch_size);
+}
 }  // namespace stats
 }  // namespace raft
 
diff --git a/cpp/include/raft/stats/v_measure.cuh b/cpp/include/raft/stats/v_measure.cuh
index dd6ebd9b15..b8f16695bc 100644
--- a/cpp/include/raft/stats/v_measure.cuh
+++ b/cpp/include/raft/stats/v_measure.cuh
@@ -18,6 +18,8 @@
 #define __V_MEASURE_H
 
 #pragma once
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/handle.hpp>
 #include <raft/stats/detail/v_measure.cuh>
 
 namespace raft {
@@ -47,6 +49,41 @@ double v_measure(const T* truthClusterArray,
     truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream, beta);
 }
 
+/**
+ * @brief Function to calculate the v-measure between two clusters
+ *
+ * @tparam value_t the data type
+ * @tparam idx_t Integer type used to for addressing
+ * @param[in] handle the raft handle
+ * @param[in] truth_cluster_array: the array of truth classes of type T
+ * @param[in] pred_cluster_array: the array of predicted classes of type T
+ * @param[in] lower_label_range: the lower bound of the range of labels
+ * @param[in] upper_label_range: the upper bound of the range of labels
+ * @param[in] beta: v_measure parameter
+ * @return the v-measure between the two clusters
+ */
+template <typename value_t, typename idx_t>
+double v_measure(const raft::handle_t& handle,
+                 raft::device_vector_view<const value_t, idx_t> truth_cluster_array,
+                 raft::device_vector_view<const value_t, idx_t> pred_cluster_array,
+                 value_t lower_label_range,
+                 value_t upper_label_range,
+                 double beta = 1.0)
+{
+  RAFT_EXPECTS(truth_cluster_array.extent(0) == pred_cluster_array.extent(0),
+               "Size mismatch betwen truth_cluster_array and pred_cluster_array");
+  RAFT_EXPECTS(truth_cluster_array.is_exhaustive(), "truth_cluster_array must be contiguous");
+  RAFT_EXPECTS(pred_cluster_array.is_exhaustive(), "pred_cluster_array must be contiguous");
+
+  return detail::v_measure(truth_cluster_array.data_handle(),
+                           pred_cluster_array.data_handle(),
+                           truth_cluster_array.extent(0),
+                           lower_label_range,
+                           upper_label_range,
+                           handle.get_stream(),
+                           beta);
+}
+
 };  // end namespace stats
 };  // end namespace raft
 
diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/test/stats/adjusted_rand_index.cu
index 473972ace4..f113af821d 100644
--- a/cpp/test/stats/adjusted_rand_index.cu
+++ b/cpp/test/stats/adjusted_rand_index.cu
@@ -18,8 +18,8 @@
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/adjusted_rand_index.cuh>
-#include <raft/stats/contingency_matrix.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <random>
 
@@ -40,11 +40,13 @@ struct adjustedRandIndexParam {
 template <typename T, typename MathT = int>
 class adjustedRandIndexTest : public ::testing::TestWithParam<adjustedRandIndexParam> {
  protected:
-  adjustedRandIndexTest() : firstClusterArray(0, stream), secondClusterArray(0, stream) {}
+  adjustedRandIndexTest()
+    : stream(handle.get_stream()), firstClusterArray(0, stream), secondClusterArray(0, stream)
+  {
+  }
 
   void SetUp() override
   {
-    RAFT_CUDA_TRY(cudaStreamCreate(&stream));
     params    = ::testing::TestWithParam<adjustedRandIndexParam>::GetParam();
     nElements = params.nElements;
 
@@ -62,11 +64,11 @@ class adjustedRandIndexTest : public ::testing::TestWithParam<adjustedRandIndexP
     }
     // allocating and initializing memory to the GPU
     computed_adjusted_rand_index = adjusted_rand_index<T, MathT>(
-      firstClusterArray.data(), secondClusterArray.data(), nElements, stream);
+      handle,
+      raft::make_device_vector_view<const T>(firstClusterArray.data(), nElements),
+      raft::make_device_vector_view<const T>(secondClusterArray.data(), nElements));
   }
 
-  void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); }
-
   void SetUpDifferentArrays()
   {
     lowerLabelRange = params.lowerLabelRange;
@@ -135,6 +137,8 @@ class adjustedRandIndexTest : public ::testing::TestWithParam<adjustedRandIndexP
     truth_adjusted_rand_index = 1.0;
   }
 
+  raft::handle_t handle;
+  cudaStream_t stream = 0;
   adjustedRandIndexParam params;
   T lowerLabelRange, upperLabelRange;
   rmm::device_uvector<T> firstClusterArray;
@@ -142,7 +146,6 @@ class adjustedRandIndexTest : public ::testing::TestWithParam<adjustedRandIndexP
   int nElements                       = 0;
   double truth_adjusted_rand_index    = 0;
   double computed_adjusted_rand_index = 0;
-  cudaStream_t stream                 = 0;
 };
 
 const std::vector<adjustedRandIndexParam> inputs = {
diff --git a/cpp/test/stats/completeness_score.cu b/cpp/test/stats/completeness_score.cu
index 6f6b5a8afb..2f8a40afdc 100644
--- a/cpp/test/stats/completeness_score.cu
+++ b/cpp/test/stats/completeness_score.cu
@@ -40,6 +40,8 @@ template <typename T>
 class completenessTest : public ::testing::TestWithParam<completenessParam> {
  protected:
   // the constructor
+  completenessTest() : stream(handle.get_stream()) {}
+
   void SetUp() override
   {
     // getting the parameters
@@ -64,9 +66,6 @@ class completenessTest : public ::testing::TestWithParam<completenessParam> {
     }
 
     // allocating and initializing memory to the GPU
-
-    RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-
     rmm::device_uvector<T> truthClusterArray(nElements, stream);
     rmm::device_uvector<T> predClusterArray(nElements, stream);
     raft::update_device(truthClusterArray.data(), arr1.data(), (int)nElements, stream);
@@ -92,18 +91,16 @@ class completenessTest : public ::testing::TestWithParam<completenessParam> {
     if (nElements == 0) truthCompleteness = 1.0;
 
     // calling the completeness CUDA implementation
-    computedCompleteness = raft::stats::completeness_score(truthClusterArray.data(),
-                                                           predClusterArray.data(),
-                                                           nElements,
-                                                           lowerLabelRange,
-                                                           upperLabelRange,
-                                                           stream);
+    computedCompleteness = raft::stats::completeness_score(
+      handle,
+      raft::make_device_vector_view<const T>(truthClusterArray.data(), nElements),
+      raft::make_device_vector_view<const T>(predClusterArray.data(), nElements),
+      lowerLabelRange,
+      upperLabelRange);
   }
 
-  // the destructor
-  void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); }
-
   // declaring the data values
+  raft::handle_t handle;
   completenessParam params;
   T lowerLabelRange, upperLabelRange;
   int nElements               = 0;
diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu
index 4785c739ed..7943610689 100644
--- a/cpp/test/stats/contingencyMatrix.cu
+++ b/cpp/test/stats/contingencyMatrix.cu
@@ -40,7 +40,7 @@ template <typename T>
 class ContingencyMatrixTest : public ::testing::TestWithParam<ContingencyMatrixParam> {
  protected:
   ContingencyMatrixTest()
-    : pWorkspace(0, stream),
+    : stream(handle.get_stream()),
       dY(0, stream),
       dYHat(0, stream),
       dComputedOutput(0, stream),
@@ -80,7 +80,6 @@ class ContingencyMatrixTest : public ::testing::TestWithParam<ContingencyMatrixP
       std::replace(y_hat.begin(), y_hat.end(), y2, y2_R);
     }
 
-    RAFT_CUDA_TRY(cudaStreamCreate(&stream));
     dY.resize(numElements, stream);
     dYHat.resize(numElements, stream);
 
@@ -88,7 +87,11 @@ class ContingencyMatrixTest : public ::testing::TestWithParam<ContingencyMatrixP
     raft::update_device(dY.data(), &y[0], numElements, stream);
 
     if (params.calcCardinality) {
-      raft::stats::getInputClassCardinality(dY.data(), numElements, stream, minLabel, maxLabel);
+      raft::stats::get_input_class_cardinality(
+        handle,
+        raft::make_device_vector_view<const T>(dY.data(), numElements),
+        raft::make_host_scalar_view(&minLabel),
+        raft::make_host_scalar_view(&maxLabel));
     } else {
       minLabel = lowerLabelRange;
       maxLabel = upperLabelRange;
@@ -111,27 +114,19 @@ class ContingencyMatrixTest : public ::testing::TestWithParam<ContingencyMatrixP
 
     raft::update_device(
       dGoldenOutput.data(), hGoldenOutput.data(), numUniqueClasses * numUniqueClasses, stream);
-
-    workspaceSz = raft::stats::getContingencyMatrixWorkspaceSize(
-      numElements, dY.data(), stream, minLabel, maxLabel);
-    pWorkspace.resize(workspaceSz, stream);
     raft::interruptible::synchronize(stream);
   }
 
-  void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); }
-
   void RunTest()
   {
     int numElements = params.nElements;
-    raft::stats::contingencyMatrix(dY.data(),
-                                   dYHat.data(),
-                                   numElements,
-                                   dComputedOutput.data(),
-                                   stream,
-                                   (void*)pWorkspace.data(),
-                                   workspaceSz,
-                                   minLabel,
-                                   maxLabel);
+    raft::stats::contingency_matrix(
+      handle,
+      raft::make_device_vector_view<const T>(dY.data(), numElements),
+      raft::make_device_vector_view<const T>(dYHat.data(), numElements),
+      raft::make_device_matrix_view(dComputedOutput.data(), numUniqueClasses, numUniqueClasses),
+      std::make_optional(minLabel),
+      std::make_optional(maxLabel));
 
     raft::interruptible::synchronize(stream);
     ASSERT_TRUE(raft::devArrMatch(dComputedOutput.data(),
@@ -140,12 +135,11 @@ class ContingencyMatrixTest : public ::testing::TestWithParam<ContingencyMatrixP
                                   raft::Compare<T>()));
   }
 
+  raft::handle_t handle;
   ContingencyMatrixParam params;
   int numUniqueClasses = -1;
   T minLabel, maxLabel;
   cudaStream_t stream = 0;
-  size_t workspaceSz;
-  rmm::device_uvector<char> pWorkspace;
   rmm::device_uvector<T> dY, dYHat;
   rmm::device_uvector<int> dComputedOutput, dGoldenOutput;
 };
diff --git a/cpp/test/stats/cov.cu b/cpp/test/stats/cov.cu
index 4ed2215d91..890c5b7826 100644
--- a/cpp/test/stats/cov.cu
+++ b/cpp/test/stats/cov.cu
@@ -69,16 +69,23 @@ class CovTest : public ::testing::TestWithParam<CovInputs<T>> {
     normal(handle, r, data.data(), len, params.mean, var);
     raft::stats::mean(
       mean_act.data(), data.data(), cols, rows, params.sample, params.rowMajor, stream);
-    cov(handle,
-        cov_act.data(),
-        data.data(),
-        mean_act.data(),
-        cols,
-        rows,
-        params.sample,
-        params.rowMajor,
-        params.stable,
-        stream);
+    if (params.rowMajor) {
+      using layout = raft::row_major;
+      cov(handle,
+          raft::make_device_matrix_view<T, std::uint32_t, layout>(data.data(), rows, cols),
+          raft::make_device_vector_view<const T, std::uint32_t>(mean_act.data(), cols),
+          raft::make_device_matrix_view<T, std::uint32_t, layout>(cov_act.data(), cols, cols),
+          params.sample,
+          params.stable);
+    } else {
+      using layout = raft::col_major;
+      cov(handle,
+          raft::make_device_matrix_view<T, std::uint32_t, layout>(data.data(), rows, cols),
+          raft::make_device_vector_view<const T, std::uint32_t>(mean_act.data(), cols),
+          raft::make_device_matrix_view<T, std::uint32_t, layout>(cov_act.data(), cols, cols),
+          params.sample,
+          params.stable);
+    }
 
     T data_h[6]       = {1.0, 2.0, 5.0, 4.0, 2.0, 1.0};
     T cov_cm_ref_h[4] = {4.3333, -2.8333, -2.8333, 2.333};
diff --git a/cpp/test/stats/dispersion.cu b/cpp/test/stats/dispersion.cu
index afad286e98..4f18c9fb54 100644
--- a/cpp/test/stats/dispersion.cu
+++ b/cpp/test/stats/dispersion.cu
@@ -16,6 +16,7 @@
 
 #include "../test_utils.h"
 #include <gtest/gtest.h>
+#include <optional>
 #include <raft/interruptible.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/dispersion.cuh>
@@ -44,14 +45,13 @@ template <typename T>
 template <typename T>
 class DispersionTest : public ::testing::TestWithParam<DispersionInputs<T>> {
  protected:
-  DispersionTest() : exp_mean(0, stream), act_mean(0, stream) {}
+  DispersionTest() : stream(handle.get_stream()), exp_mean(0, stream), act_mean(0, stream) {}
 
   void SetUp() override
   {
     params = ::testing::TestWithParam<DispersionInputs<T>>::GetParam();
     raft::random::RngState r(params.seed);
     int len = params.clusters * params.dim;
-    stream  = handle.get_stream();
     rmm::device_uvector<T> data(len, stream);
     rmm::device_uvector<int> counts(params.clusters, stream);
     exp_mean.resize(params.dim, stream);
@@ -64,8 +64,12 @@ class DispersionTest : public ::testing::TestWithParam<DispersionInputs<T>> {
     for (const auto& val : h_counts) {
       npoints += val;
     }
-    actualVal = dispersion(
-      data.data(), counts.data(), act_mean.data(), params.clusters, npoints, params.dim, stream);
+    actualVal = cluster_dispersion(
+      handle,
+      raft::make_device_matrix_view<const T, int>(data.data(), params.clusters, params.dim),
+      raft::make_device_vector_view<const int, int>(counts.data(), params.clusters),
+      std::make_optional(raft::make_device_vector_view<T, int>(act_mean.data(), params.dim)),
+      npoints);
     expectedVal = T(0);
     std::vector<T> h_data(len, T(0));
     raft::update_host(&(h_data[0]), data.data(), len, stream);
diff --git a/cpp/test/stats/entropy.cu b/cpp/test/stats/entropy.cu
index a3703bdb14..04aa9f7a80 100644
--- a/cpp/test/stats/entropy.cu
+++ b/cpp/test/stats/entropy.cu
@@ -38,6 +38,8 @@ template <typename T>
 class entropyTest : public ::testing::TestWithParam<entropyParam> {
  protected:
   // the constructor
+  entropyTest() : stream(handle.get_stream()) {}
+
   void SetUp() override
   {
     // getting the parameters
@@ -74,17 +76,19 @@ class entropyTest : public ::testing::TestWithParam<entropyParam> {
     }
 
     // allocating and initializing memory to the GPU
-    RAFT_CUDA_TRY(cudaStreamCreate(&stream));
     rmm::device_uvector<T> clusterArray(nElements, stream);
     raft::update_device(clusterArray.data(), &arr1[0], (int)nElements, stream);
 
     raft::interruptible::synchronize(stream);
     // calling the entropy CUDA implementation
-    computedEntropy = raft::stats::entropy(
-      clusterArray.data(), nElements, lowerLabelRange, upperLabelRange, stream);
-    RAFT_CUDA_TRY(cudaStreamDestroy(stream));
+    computedEntropy =
+      raft::stats::entropy(handle,
+                           raft::make_device_vector_view<const T>(clusterArray.data(), nElements),
+                           lowerLabelRange,
+                           upperLabelRange);
   }
 
+  raft::handle_t handle;
   // declaring the data values
   entropyParam params;
   T lowerLabelRange, upperLabelRange;
diff --git a/cpp/test/stats/histogram.cu b/cpp/test/stats/histogram.cu
index 537bde2272..d9793a57df 100644
--- a/cpp/test/stats/histogram.cu
+++ b/cpp/test/stats/histogram.cu
@@ -84,8 +84,12 @@ class HistTest : public ::testing::TestWithParam<HistInputs> {
     RAFT_CUDA_TRY(
       cudaMemsetAsync(ref_bins.data(), 0, sizeof(int) * params.nbins * params.ncols, stream));
     naiveHist(ref_bins.data(), params.nbins, in.data(), params.nrows, params.ncols, stream);
-    histogram<int>(
-      params.type, bins.data(), params.nbins, in.data(), params.nrows, params.ncols, stream);
+    histogram(handle,
+              params.type,
+              raft::make_device_matrix_view<const int, int, raft::col_major>(
+                in.data(), params.nrows, params.ncols),
+              raft::make_device_matrix_view<int, int, raft::col_major>(
+                bins.data(), params.nbins, params.ncols));
     handle.sync_stream();
   }
 
diff --git a/cpp/test/stats/homogeneity_score.cu b/cpp/test/stats/homogeneity_score.cu
index 729863003d..9bd6d9266b 100644
--- a/cpp/test/stats/homogeneity_score.cu
+++ b/cpp/test/stats/homogeneity_score.cu
@@ -47,6 +47,7 @@ class homogeneityTest : public ::testing::TestWithParam<homogeneityParam> {
     nElements       = params.nElements;
     lowerLabelRange = params.lowerLabelRange;
     upperLabelRange = params.upperLabelRange;
+    stream          = handle.get_stream();
 
     // generating random value test input
     std::vector<int> arr1(nElements, 0);
@@ -63,9 +64,6 @@ class homogeneityTest : public ::testing::TestWithParam<homogeneityParam> {
     }
 
     // allocating and initializing memory to the GPU
-
-    RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-
     rmm::device_uvector<T> truthClusterArray(nElements, stream);
     rmm::device_uvector<T> predClusterArray(nElements, stream);
     raft::update_device(truthClusterArray.data(), &arr1[0], (int)nElements, stream);
@@ -91,16 +89,16 @@ class homogeneityTest : public ::testing::TestWithParam<homogeneityParam> {
     if (nElements == 0) truthHomogeneity = 1.0;
 
     // calling the homogeneity CUDA implementation
-    computedHomogeneity = raft::stats::homogeneity_score(truthClusterArray.data(),
-                                                         predClusterArray.data(),
-                                                         nElements,
-                                                         lowerLabelRange,
-                                                         upperLabelRange,
-                                                         stream);
-    RAFT_CUDA_TRY(cudaStreamDestroy(stream));
+    computedHomogeneity = raft::stats::homogeneity_score(
+      handle,
+      raft::make_device_vector_view<const T>(truthClusterArray.data(), nElements),
+      raft::make_device_vector_view<const T>(predClusterArray.data(), nElements),
+      lowerLabelRange,
+      upperLabelRange);
   }
 
   // declaring the data values
+  raft::handle_t handle;
   homogeneityParam params;
   T lowerLabelRange, upperLabelRange;
   int nElements              = 0;
diff --git a/cpp/test/stats/information_criterion.cu b/cpp/test/stats/information_criterion.cu
index c4cf6e950a..4a9a2128c6 100644
--- a/cpp/test/stats/information_criterion.cu
+++ b/cpp/test/stats/information_criterion.cu
@@ -89,13 +89,13 @@ class BatchedICTest : public ::testing::TestWithParam<BatchedICInputs<T>> {
     raft::update_device(loglike_d.data(), loglike_h.data(), params.batch_size, stream);
 
     // Compute the tested results
-    information_criterion_batched(res_d.data(),
-                                  loglike_d.data(),
-                                  params.ic_type,
-                                  params.n_params,
-                                  params.batch_size,
-                                  params.n_samples,
-                                  stream);
+    information_criterion_batched(
+      handle,
+      raft::make_device_vector_view<const T>(loglike_d.data(), params.batch_size),
+      raft::make_device_vector_view(res_d.data(), params.batch_size),
+      params.ic_type,
+      params.n_params,
+      params.n_samples);
 
     // Compute the expected results
     naive_ic(res_h.data(),
diff --git a/cpp/test/stats/kl_divergence.cu b/cpp/test/stats/kl_divergence.cu
index e25f1c3bc5..58a64f7199 100644
--- a/cpp/test/stats/kl_divergence.cu
+++ b/cpp/test/stats/kl_divergence.cu
@@ -39,6 +39,7 @@ class klDivergenceTest : public ::testing::TestWithParam<klDivergenceParam> {
   {
     // getting the parameters
     params = ::testing::TestWithParam<klDivergenceParam>::GetParam();
+    stream = handle.get_stream();
 
     nElements = params.nElements;
 
@@ -54,8 +55,6 @@ class klDivergenceTest : public ::testing::TestWithParam<klDivergenceParam> {
       h_candidatePDF.begin(), h_candidatePDF.end(), [&]() { return realGenerator(dre); });
 
     // allocating and initializing memory to the GPU
-    RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-
     rmm::device_uvector<DataT> d_modelPDF(nElements, stream);
     rmm::device_uvector<DataT> d_candidatePDF(nElements, stream);
     RAFT_CUDA_TRY(cudaMemset(d_modelPDF.data(), 0, d_modelPDF.size() * sizeof(DataT)));
@@ -74,12 +73,14 @@ class klDivergenceTest : public ::testing::TestWithParam<klDivergenceParam> {
     }
 
     // calling the kl_divergence CUDA implementation
-    computedklDivergence =
-      raft::stats::kl_divergence(d_modelPDF.data(), d_candidatePDF.data(), nElements, stream);
-    RAFT_CUDA_TRY(cudaStreamDestroy(stream));
+    computedklDivergence = raft::stats::kl_divergence(
+      handle,
+      raft::make_device_vector_view<const DataT>(d_modelPDF.data(), nElements),
+      raft::make_device_vector_view<const DataT>(d_candidatePDF.data(), nElements));
   }
 
   // declaring the data values
+  raft::handle_t handle;
   klDivergenceParam params;
   int nElements              = 0;
   DataT truthklDivergence    = 0;
diff --git a/cpp/test/stats/mean.cu b/cpp/test/stats/mean.cu
index bec7a3adce..b299f81f68 100644
--- a/cpp/test/stats/mean.cu
+++ b/cpp/test/stats/mean.cu
@@ -49,7 +49,7 @@ class MeanTest : public ::testing::TestWithParam<MeanInputs<T>> {
       rows(params.rows),
       cols(params.cols),
       data(rows * cols, stream),
-      mean_act(rows * cols, stream)
+      mean_act(cols, stream)
   {
   }
 
@@ -65,7 +65,19 @@ class MeanTest : public ::testing::TestWithParam<MeanInputs<T>> {
   void meanSGtest(T* data, cudaStream_t stream)
   {
     int rows = params.rows, cols = params.cols;
-    mean(mean_act.data(), data, cols, rows, params.sample, params.rowMajor, stream);
+    if (params.rowMajor) {
+      using layout = raft::row_major;
+      mean(handle,
+           raft::make_device_matrix_view<const T, int, layout>(data, rows, cols),
+           raft::make_device_vector_view<T, int>(mean_act.data(), cols),
+           params.sample);
+    } else {
+      using layout = raft::col_major;
+      mean(handle,
+           raft::make_device_matrix_view<const T, int, layout>(data, rows, cols),
+           raft::make_device_vector_view<T, int>(mean_act.data(), cols),
+           params.sample);
+    }
   }
 
  protected:
diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu
index c4f979d82e..30dcdd475b 100644
--- a/cpp/test/stats/mean_center.cu
+++ b/cpp/test/stats/mean_center.cu
@@ -58,18 +58,26 @@ class MeanCenterTest : public ::testing::TestWithParam<MeanCenterInputs<T, IdxTy
   void SetUp() override
   {
     raft::random::RngState r(params.seed);
-    auto len = rows * cols;
+    auto len         = rows * cols;
+    auto meanVecSize = params.bcastAlongRows ? cols : rows;
     normal(handle, r, data.data(), len, params.mean, (T)1.0);
     raft::stats::mean(
       meanVec.data(), data.data(), cols, rows, params.sample, params.rowMajor, stream);
-    meanCenter(out.data(),
-               data.data(),
-               meanVec.data(),
-               cols,
-               rows,
-               params.rowMajor,
-               params.bcastAlongRows,
-               stream);
+    if (params.rowMajor) {
+      using layout = raft::row_major;
+      mean_center(handle,
+                  raft::make_device_matrix_view<const T, int, layout>(data.data(), rows, cols),
+                  raft::make_device_vector_view<const T, int>(meanVec.data(), meanVecSize),
+                  raft::make_device_matrix_view<T, int, layout>(out.data(), rows, cols),
+                  params.bcastAlongRows);
+    } else {
+      using layout = raft::col_major;
+      mean_center(handle,
+                  raft::make_device_matrix_view<const T, int, layout>(data.data(), rows, cols),
+                  raft::make_device_vector_view<const T, int>(meanVec.data(), meanVecSize),
+                  raft::make_device_matrix_view<T, int, layout>(out.data(), rows, cols),
+                  params.bcastAlongRows);
+    }
     raft::linalg::naiveMatVec(out_ref.data(),
                               data.data(),
                               meanVec.data(),
diff --git a/cpp/test/stats/meanvar.cu b/cpp/test/stats/meanvar.cu
index 74e52e670d..424395c5e8 100644
--- a/cpp/test/stats/meanvar.cu
+++ b/cpp/test/stats/meanvar.cu
@@ -67,14 +67,24 @@ class MeanVarTest : public ::testing::TestWithParam<MeanVarInputs<T>> {
   {
     random::RngState r(params.seed);
     normal(handle, r, data.data(), params.cols * params.rows, params.mean, params.stddev);
-    meanvar(mean_act.data(),
-            vars_act.data(),
-            data.data(),
-            params.cols,
-            params.rows,
-            params.sample,
-            params.rowMajor,
-            stream);
+
+    if (params.rowMajor) {
+      using layout = raft::row_major;
+      meanvar(
+        handle,
+        raft::make_device_matrix_view<const T, int, layout>(data.data(), params.rows, params.cols),
+        raft::make_device_vector_view<T, int>(mean_act.data(), params.cols),
+        raft::make_device_vector_view<T, int>(vars_act.data(), params.cols),
+        params.sample);
+    } else {
+      using layout = raft::col_major;
+      meanvar(
+        handle,
+        raft::make_device_matrix_view<const T, int, layout>(data.data(), params.rows, params.cols),
+        raft::make_device_vector_view<T, int>(mean_act.data(), params.cols),
+        raft::make_device_vector_view<T, int>(vars_act.data(), params.cols),
+        params.sample);
+    }
     RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
   }
 
diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu
index 0468ebb177..a2ba6bfc9e 100644
--- a/cpp/test/stats/minmax.cu
+++ b/cpp/test/stats/minmax.cu
@@ -17,6 +17,7 @@
 #include "../test_utils.h"
 #include <gtest/gtest.h>
 #include <limits>
+#include <raft/core/device_mdspan.hpp>
 #include <raft/random/rng.cuh>
 #include <raft/stats/minmax.cuh>
 #include <raft/util/cuda_utils.cuh>
@@ -117,16 +118,15 @@ class MinMaxTest : public ::testing::TestWithParam<MinMaxInputs<T>> {
                 minmax_ref.data(),
                 minmax_ref.data() + params.cols,
                 stream);
-    minmax<T, 512>(data.data(),
-                   nullptr,
-                   nullptr,
-                   params.rows,
-                   params.cols,
-                   params.rows,
-                   minmax_act.data(),
-                   minmax_act.data() + params.cols,
-                   nullptr,
-                   stream);
+    raft::stats::minmax<T, int>(
+      handle,
+      raft::make_device_matrix_view<const T, int, raft::layout_f_contiguous>(
+        data.data(), params.rows, params.cols),
+      std::nullopt,
+      std::nullopt,
+      raft::make_device_vector_view<T, int>(minmax_act.data(), params.cols),
+      raft::make_device_vector_view<T, int>(minmax_act.data() + params.cols, params.cols),
+      std::nullopt);
   }
 
  protected:
diff --git a/cpp/test/stats/mutual_info_score.cu b/cpp/test/stats/mutual_info_score.cu
index 6bf3e6623f..fb9362df52 100644
--- a/cpp/test/stats/mutual_info_score.cu
+++ b/cpp/test/stats/mutual_info_score.cu
@@ -17,7 +17,7 @@
 #include <algorithm>
 #include <gtest/gtest.h>
 #include <iostream>
-#include <raft/stats/contingency_matrix.cuh>
+#include <raft/core/handle.hpp>
 #include <raft/stats/mutual_info_score.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <random>
@@ -104,7 +104,7 @@ class mutualInfoTest : public ::testing::TestWithParam<mutualInfoParam> {
     truthmutualInfo /= nElements;
 
     // allocating and initializing memory to the GPU
-    RAFT_CUDA_TRY(cudaStreamCreate(&stream));
+    stream = handle.get_stream();
 
     rmm::device_uvector<T> firstClusterArray(nElements, stream);
     rmm::device_uvector<T> secondClusterArray(nElements, stream);
@@ -117,18 +117,16 @@ class mutualInfoTest : public ::testing::TestWithParam<mutualInfoParam> {
     raft::update_device(secondClusterArray.data(), &arr2[0], (int)nElements, stream);
 
     // calling the mutualInfo CUDA implementation
-    computedmutualInfo = raft::stats::mutual_info_score(firstClusterArray.data(),
-                                                        secondClusterArray.data(),
-                                                        nElements,
-                                                        lowerLabelRange,
-                                                        upperLabelRange,
-                                                        stream);
+    computedmutualInfo = raft::stats::mutual_info_score(
+      handle,
+      raft::make_device_vector_view<const T>(firstClusterArray.data(), nElements),
+      raft::make_device_vector_view<const T>(secondClusterArray.data(), nElements),
+      lowerLabelRange,
+      upperLabelRange);
   }
 
-  // the destructor
-  void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); }
-
   // declaring the data values
+  raft::handle_t handle;
   mutualInfoParam params;
   T lowerLabelRange, upperLabelRange;
   int nElements             = 0;
diff --git a/cpp/test/stats/rand_index.cu b/cpp/test/stats/rand_index.cu
index ca1c4dd5e8..67e4ab5517 100644
--- a/cpp/test/stats/rand_index.cu
+++ b/cpp/test/stats/rand_index.cu
@@ -22,6 +22,7 @@
 
 #include <algorithm>
 #include <iostream>
+#include <raft/core/handle.hpp>
 #include <raft/stats/rand_index.cuh>
 #include <random>
 
@@ -77,7 +78,7 @@ class randIndexTest : public ::testing::TestWithParam<randIndexParam> {
     truthRandIndex      = (double)(((double)(a_truth + b_truth)) / (double)nChooseTwo);
 
     // allocating and initializing memory to the GPU
-    RAFT_CUDA_TRY(cudaStreamCreate(&stream));
+    stream = handle.get_stream();
 
     rmm::device_uvector<T> firstClusterArray(size, stream);
     rmm::device_uvector<T> secondClusterArray(size, stream);
@@ -90,14 +91,14 @@ class randIndexTest : public ::testing::TestWithParam<randIndexParam> {
     raft::update_device(secondClusterArray.data(), &arr2[0], (int)size, stream);
 
     // calling the rand_index CUDA implementation
-    computedRandIndex =
-      raft::stats::rand_index(firstClusterArray.data(), secondClusterArray.data(), size, stream);
+    computedRandIndex = raft::stats::rand_index(
+      handle,
+      raft::make_device_vector_view<const T>(firstClusterArray.data(), size),
+      raft::make_device_vector_view<const T>(secondClusterArray.data(), size));
   }
 
-  // the destructor
-  void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); }
-
   // declaring the data values
+  raft::handle_t handle;
   randIndexParam params;
   int lowerLabelRange = 0, upperLabelRange = 2;
   uint64_t size            = 0;
diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu
index f885c1034f..37a6fff786 100644
--- a/cpp/test/stats/silhouette_score.cu
+++ b/cpp/test/stats/silhouette_score.cu
@@ -173,25 +173,22 @@ class silhouetteScoreTest : public ::testing::TestWithParam<silhouetteScoreParam
     host_silhouette_score();
 
     // calling the silhouette_score CUDA implementation
-    computedSilhouetteScore = raft::stats::silhouette_score(handle,
-                                                            d_X.data(),
-                                                            nRows,
-                                                            nCols,
-                                                            d_labels.data(),
-                                                            nLabels,
-                                                            sampleSilScore.data(),
-                                                            handle.get_stream(),
-                                                            params.metric);
-
-    batchedSilhouetteScore = raft::stats::silhouette_score_batched(handle,
-                                                                   d_X.data(),
-                                                                   nRows,
-                                                                   nCols,
-                                                                   d_labels.data(),
-                                                                   nLabels,
-                                                                   sampleSilScore.data(),
-                                                                   chunk,
-                                                                   params.metric);
+    computedSilhouetteScore = raft::stats::silhouette_score(
+      handle,
+      raft::make_device_matrix_view<const DataT>(d_X.data(), nRows, nCols),
+      raft::make_device_vector_view<const LabelT>(d_labels.data(), nRows),
+      std::make_optional(raft::make_device_vector_view(sampleSilScore.data(), nRows)),
+      nLabels,
+      params.metric);
+
+    batchedSilhouetteScore = raft::stats::silhouette_score_batched(
+      handle,
+      raft::make_device_matrix_view<const DataT>(d_X.data(), nRows, nCols),
+      raft::make_device_vector_view<const LabelT>(d_labels.data(), nRows),
+      std::make_optional(raft::make_device_vector_view(sampleSilScore.data(), nRows)),
+      nLabels,
+      chunk,
+      params.metric);
   }
 
   // declaring the data values
diff --git a/cpp/test/stats/stddev.cu b/cpp/test/stats/stddev.cu
index 70d99c2aeb..73f30f17e9 100644
--- a/cpp/test/stats/stddev.cu
+++ b/cpp/test/stats/stddev.cu
@@ -73,14 +73,43 @@ class StdDevTest : public ::testing::TestWithParam<StdDevInputs<T>> {
   {
     int rows = params.rows, cols = params.cols;
 
-    mean(mean_act.data(), data, cols, rows, params.sample, params.rowMajor, stream);
-
-    stddev(
-      stddev_act.data(), data, mean_act.data(), cols, rows, params.sample, params.rowMajor, stream);
-
-    vars(
-      vars_act.data(), data, mean_act.data(), cols, rows, params.sample, params.rowMajor, stream);
-
+    if (params.rowMajor) {
+      using layout_t = raft::row_major;
+      mean(handle,
+           raft::make_device_matrix_view<const T, int, layout_t>(data, rows, cols),
+           raft::make_device_vector_view<T, int>(mean_act.data(), cols),
+           params.sample);
+
+      stddev(handle,
+             raft::make_device_matrix_view<const T, int, layout_t>(data, rows, cols),
+             raft::make_device_vector_view<const T, int>(mean_act.data(), cols),
+             raft::make_device_vector_view<T, int>(stddev_act.data(), cols),
+             params.sample);
+
+      vars(handle,
+           raft::make_device_matrix_view<const T, int, layout_t>(data, rows, cols),
+           raft::make_device_vector_view<const T, int>(mean_act.data(), cols),
+           raft::make_device_vector_view<T, int>(vars_act.data(), cols),
+           params.sample);
+    } else {
+      using layout_t = raft::col_major;
+      mean(handle,
+           raft::make_device_matrix_view<const T, int, layout_t>(data, rows, cols),
+           raft::make_device_vector_view<T>(mean_act.data(), cols),
+           params.sample);
+
+      stddev(handle,
+             raft::make_device_matrix_view<const T, int, layout_t>(data, rows, cols),
+             raft::make_device_vector_view<const T, int>(mean_act.data(), cols),
+             raft::make_device_vector_view<T, int>(stddev_act.data(), cols),
+             params.sample);
+
+      vars(handle,
+           raft::make_device_matrix_view<const T, int, layout_t>(data, rows, cols),
+           raft::make_device_vector_view<const T, int>(mean_act.data(), cols),
+           raft::make_device_vector_view<T, int>(vars_act.data(), cols),
+           params.sample);
+    }
     raft::matrix::seqRoot(vars_act.data(), T(1), cols, stream);
   }
 
diff --git a/cpp/test/stats/sum.cu b/cpp/test/stats/sum.cu
index 7a16dbde4a..e67988abb0 100644
--- a/cpp/test/stats/sum.cu
+++ b/cpp/test/stats/sum.cu
@@ -65,7 +65,9 @@ class SumTest : public ::testing::TestWithParam<SumInputs<T>> {
     }
 
     raft::update_device(data.data(), data_h, len, stream);
-    sum(sum_act.data(), data.data(), cols, rows, false, stream);
+    sum(handle,
+        raft::make_device_matrix_view<const T>(data.data(), rows, cols),
+        raft::make_device_vector_view(sum_act.data(), cols));
     handle.sync_stream(stream);
   }
 
diff --git a/cpp/test/stats/trustworthiness.cu b/cpp/test/stats/trustworthiness.cu
index ae596d0535..cbb8228f8f 100644
--- a/cpp/test/stats/trustworthiness.cu
+++ b/cpp/test/stats/trustworthiness.cu
@@ -320,10 +320,17 @@ class TrustworthinessScoreTest : public ::testing::Test {
 
     raft::update_device(d_X.data(), X.data(), X.size(), stream);
     raft::update_device(d_X_embedded.data(), X_embedded.data(), X_embedded.size(), stream);
+    auto n_sample            = 50;
+    auto n_features_origin   = 30;
+    auto n_features_embedded = 8;
 
     // euclidean test
-    score = trustworthiness_score<float, raft::distance::DistanceType::L2SqrtUnexpanded>(
-      handle, d_X.data(), d_X_embedded.data(), 50, 30, 8, 5);
+    score = trustworthiness_score<raft::distance::DistanceType::L2SqrtUnexpanded, float>(
+      handle,
+      raft::make_device_matrix_view<const float>(d_X.data(), n_sample, n_features_origin),
+      raft::make_device_matrix_view<const float>(
+        d_X_embedded.data(), n_sample, n_features_embedded),
+      5);
   }
 
   void SetUp() override { basicTest(); }
diff --git a/cpp/test/stats/v_measure.cu b/cpp/test/stats/v_measure.cu
index 22dcefba0c..0cbc2da7d9 100644
--- a/cpp/test/stats/v_measure.cu
+++ b/cpp/test/stats/v_measure.cu
@@ -65,7 +65,7 @@ class vMeasureTest : public ::testing::TestWithParam<vMeasureParam> {
 
     // allocating and initializing memory to the GPU
 
-    RAFT_CUDA_TRY(cudaStreamCreate(&stream));
+    stream = handle.get_stream();
     rmm::device_uvector<T> truthClusterArray(nElements, stream);
     rmm::device_uvector<T> predClusterArray(nElements, stream);
     raft::update_device(truthClusterArray.data(), &arr1[0], (int)nElements, stream);
@@ -93,19 +93,17 @@ class vMeasureTest : public ::testing::TestWithParam<vMeasureParam> {
       truthVMeasure = ((1 + params.beta) * truthHomogeity * truthCompleteness /
                        (params.beta * truthHomogeity + truthCompleteness));
     // calling the v_measure CUDA implementation
-    computedVMeasure = raft::stats::v_measure(truthClusterArray.data(),
-                                              predClusterArray.data(),
-                                              nElements,
-                                              lowerLabelRange,
-                                              upperLabelRange,
-                                              stream,
-                                              params.beta);
+    computedVMeasure = raft::stats::v_measure(
+      handle,
+      raft::make_device_vector_view<const T>(truthClusterArray.data(), nElements),
+      raft::make_device_vector_view<const T>(predClusterArray.data(), nElements),
+      lowerLabelRange,
+      upperLabelRange,
+      params.beta);
   }
 
-  // the destructor
-  void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); }
-
   // declaring the data values
+  raft::handle_t handle;
   vMeasureParam params;
   T lowerLabelRange, upperLabelRange;
   int nElements           = 0;
diff --git a/cpp/test/stats/weighted_mean.cu b/cpp/test/stats/weighted_mean.cu
index 5ff8454490..ec99d5a627 100644
--- a/cpp/test/stats/weighted_mean.cu
+++ b/cpp/test/stats/weighted_mean.cu
@@ -294,4 +294,4 @@ TEST_P(WeightedMeanTestD, Result)
 INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestD, ::testing::ValuesIn(inputsd));
 
 };  // end namespace stats
-};  // end namespace raft
+};  // end namespace raft
\ No newline at end of file