rapidsai · rapids-bot · Oct 7, 2022 · Jul 25, 2022 · Jul 25, 2022 · Jul 25, 2022
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/scores.cuh>
 
 namespace raft {
@@ -39,6 +40,29 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n,
   return detail::accuracy_score(predictions, ref_predictions, n, stream);
 }
 
+/**
+ * @brief Compute accuracy of predictions. Useful for classification.
+ * @tparam value_t: data type for predictions (e.g., int for classification)
+ * @tparam idx_t Index type of matrix extent.
+ * @param[in] handle: the raft handle.
+ * @param[in] predictions: array of predictions (GPU pointer).
+ * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer).
+ * @return: Accuracy score in [0, 1]; higher is better.
+ */
+template <typename value_t, typename idx_t>
+float accuracy(const raft::handle_t& handle,
+               raft::device_vector_view<const value_t, idx_t> predictions,
+               raft::device_vector_view<const value_t, idx_t> ref_predictions)
+{
+  RAFT_EXPECTS(predictions.size() == ref_predictions.size(), "Size mismatch");
+  RAFT_EXPECTS(predictions.is_exhaustive(), "predictions must be contiguous");
+  RAFT_EXPECTS(ref_predictions.is_exhaustive(), "ref_predictions must be contiguous");
+
+  return detail::accuracy_score(predictions.data_handle(),
+                                ref_predictions.data_handle(),
+                                predictions.extent(0),
+                                handle.get_stream());
+}
 }  // namespace stats
 }  // namespace raft
 

@@ -24,6 +24,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/adjusted_rand_index.cuh>
 
 namespace raft {
@@ -48,6 +49,32 @@ double adjusted_rand_index(const T* firstClusterArray,
   return detail::compute_adjusted_rand_index(firstClusterArray, secondClusterArray, size, stream);
 }
 
+/**
+ * @brief Function to calculate Adjusted RandIndex as described
+ *        <a href="https://en.wikipedia.org/wiki/Rand_index">here</a>
+ * @tparam value_t data-type for input label arrays
+ * @tparam math_t integral data-type used for computing n-choose-r
+ * @tparam idx_t Index type of matrix extent.
+ * @param[in] handle: the raft handle.
+ * @param[in] first_cluster_array: the array of classes
+ * @param[in] second_cluster_array: the array of classes
+ * @return the Adjusted RandIndex
+ */
+template <typename value_t, typename math_t, typename idx_t>
+double adjusted_rand_index(const raft::handle_t& handle,
+                           raft::device_vector_view<const value_t, idx_t> first_cluster_array,
+                           raft::device_vector_view<const value_t, idx_t> second_cluster_array)
+{
+  RAFT_EXPECTS(first_cluster_array.size() == second_cluster_array.size(), "Size mismatch");
+  RAFT_EXPECTS(first_cluster_array.is_exhaustive(), "first_cluster_array must be contiguous");
+  RAFT_EXPECTS(second_cluster_array.is_exhaustive(), "second_cluster_array must be contiguous");
+
+  return detail::compute_adjusted_rand_index<value_t, math_t>(first_cluster_array.data_handle(),
+                                                              second_cluster_array.data_handle(),
+                                                              first_cluster_array.extent(0),
+                                                              handle.get_stream());
+}
+
 };  // end namespace stats
 };  // end namespace raft
 

@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/homogeneity_score.cuh>
 
 namespace raft {
@@ -30,20 +31,50 @@ namespace stats {
  * @param truthClusterArray: the array of truth classes of type T
  * @param predClusterArray: the array of predicted classes of type T
  * @param size: the size of the data points of type int
- * @param lowerLabelRange: the lower bound of the range of labels
- * @param upperLabelRange: the upper bound of the range of labels
+ * @param lower_label_range: the lower bound of the range of labels
+ * @param upper_label_range: the upper bound of the range of labels
  * @param stream: the cudaStream object
  */
 template <typename T>
 double completeness_score(const T* truthClusterArray,
                           const T* predClusterArray,
                           int size,
-                          T lowerLabelRange,
-                          T upperLabelRange,
+                          T lower_label_range,
+                          T upper_label_range,
                           cudaStream_t stream)
 {
   return detail::homogeneity_score(
-    predClusterArray, truthClusterArray, size, lowerLabelRange, upperLabelRange, stream);
+    predClusterArray, truthClusterArray, size, lower_label_range, upper_label_range, stream);
+}
+
+/**
+ * @brief Function to calculate the completeness score between two clusters
+ *
+ * @tparam value_t the data type
+ * @tparam idx_t Index type of matrix extent.
+ * @param[in] handle: the raft handle.
+ * @param[in] truth_cluster_array: the array of truth classes of type value_t
+ * @param[in] pred_cluster_array: the array of predicted classes of type value_t
+ * @param[in] lower_label_range: the lower bound of the range of labels
+ * @param[in] upper_label_range: the upper bound of the range of labels
+ * @return the cluster completeness score
+ */
+template <typename value_t, typename idx_t>
+double completeness_score(const raft::handle_t& handle,
+                          raft::device_vector_view<const value_t, idx_t> truth_cluster_array,
+                          raft::device_vector_view<const value_t, idx_t> pred_cluster_array,
+                          value_t lower_label_range,
+                          value_t upper_label_range)
+{
+  RAFT_EXPECTS(truth_cluster_array.size() == pred_cluster_array.size(), "Size mismatch");
+  RAFT_EXPECTS(truth_cluster_array.is_exhaustive(), "truth_cluster_array must be contiguous");
+  RAFT_EXPECTS(pred_cluster_array.is_exhaustive(), "pred_cluster_array must be contiguous");
+  return detail::homogeneity_score(pred_cluster_array.data_handle(),
+                                   truth_cluster_array.data_handle(),
+                                   truth_cluster_array.extent(0),
+                                   lower_label_range,
+                                   upper_label_range,
+                                   handle.get_stream());
 }
 
 };  // end namespace stats

@@ -19,6 +19,10 @@
 
 #pragma once
 
+#include <raft/core/device_mdarray.hpp>
+#include <raft/core/device_mdspan.hpp>
+#include <raft/core/handle.hpp>
+#include <raft/core/host_mdspan.hpp>
 #include <raft/stats/detail/contingencyMatrix.cuh>
 
 namespace raft {
@@ -40,6 +44,31 @@ void getInputClassCardinality(
   detail::getInputClassCardinality(groundTruth, nSamples, stream, minLabel, maxLabel);
 }
 
+/**
+ * @brief use this to allocate output matrix size
+ * size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int)
+ * @tparam value_t label type
+ * @tparam idx_t Index type of matrix extent.
+ * @param[in]  handle: the raft handle.
+ * @param[in]  groundTruth: device 1-d array for ground truth (num of rows)
+ * @param[out] minLabel: calculated min value in input array
+ * @param[out] maxLabel: calculated max value in input array
+ */
+template <typename value_t, typename idx_t>
+void get_input_class_cardinality(const raft::handle_t& handle,
+                                 raft::device_vector_view<const value_t, idx_t> groundTruth,
+                                 raft::host_scalar_view<value_t> minLabel,
+                                 raft::host_scalar_view<value_t> maxLabel)
+{
+  RAFT_EXPECTS(minLabel.data_handle() != nullptr, "Invalid minLabel pointer");
+  RAFT_EXPECTS(maxLabel.data_handle() != nullptr, "Invalid maxLabel pointer");
+  detail::getInputClassCardinality(groundTruth.data_handle(),
+                                   groundTruth.extent(0),
+                                   handle.get_stream(),
+                                   *minLabel.data_handle(),
+                                   *maxLabel.data_handle());
+}
+
 /**
  * @brief Calculate workspace size for running contingency matrix calculations
  * @tparam T label type
@@ -71,7 +100,7 @@ size_t getContingencyMatrixWorkspaceSize(int nSamples,
  * @param groundTruth: device 1-d array for ground truth (num of rows)
  * @param predictedLabel: device 1-d array for prediction (num of columns)
  * @param nSamples: number of elements in input array
- * @param outMat: output buffer for contingecy matrix
+ * @param outMat: output buffer for contingency matrix
  * @param stream: cuda stream for execution
  * @param workspace: Optional, workspace memory allocation
  * @param workspaceSize: Optional, size of workspace memory
@@ -100,6 +129,82 @@ void contingencyMatrix(const T* groundTruth,
                                      maxLabel);
 }
 
+/**
+ * @brief contruct contingency matrix given input ground truth and prediction
+ *        labels. Users should call function getInputClassCardinality to find
+ *        and allocate memory for output. Similarly workspace requirements
+ *        should be checked using function getContingencyMatrixWorkspaceSize
+ * @tparam value_t label type
+ * @tparam out_t output matrix type
+ * @tparam idx_t Index type of matrix extent.
+ * @tparam layout_t Layout type of the input data.
+ * @param[in]  handle: the raft handle.
+ * @param[in]  ground_truth: device 1-d array for ground truth (num of rows)
+ * @param[in]  predicted_label: device 1-d array for prediction (num of columns)
+ * @param[out] out_mat: output buffer for contingency matrix
+ * @param[in]  min_label: Optional, min value in input ground truth array
+ * @param[in]  max_label: Optional, max value in input ground truth array
+ */
+template <typename value_t, typename out_t, typename idx_t, typename layout_t>
+void contingency_matrix(const raft::handle_t& handle,
+                        raft::device_vector_view<const value_t, idx_t> ground_truth,
+                        raft::device_vector_view<const value_t, idx_t> predicted_label,
+                        raft::device_matrix_view<out_t, idx_t, layout_t> out_mat,
+                        std::optional<value_t> min_label = std::nullopt,
+                        std::optional<value_t> max_label = std::nullopt)
+{
+  RAFT_EXPECTS(ground_truth.size() == predicted_label.size(), "Size mismatch");
+  RAFT_EXPECTS(ground_truth.is_exhaustive(), "ground_truth must be contiguous");
+  RAFT_EXPECTS(predicted_label.is_exhaustive(), "predicted_label must be contiguous");
+  RAFT_EXPECTS(out_mat.is_exhaustive(), "out_mat must be contiguous");
+
+  value_t min_label_value = std::numeric_limits<value_t>::max();
+  value_t max_label_value = std::numeric_limits<value_t>::max();
+  if (min_label.has_value()) { min_label_value = min_label.value(); }
+  if (max_label.has_value()) { max_label_value = max_label.value(); }
+
+  auto workspace_sz = detail::getContingencyMatrixWorkspaceSize(ground_truth.extent(0),
+                                                                ground_truth.data_handle(),
+                                                                handle.get_stream(),
+                                                                min_label_value,
+                                                                max_label_value);
+  auto workspace    = raft::make_device_vector<char>(handle, workspace_sz);
+
+  detail::contingencyMatrix<value_t, out_t>(ground_truth.data_handle(),
+                                            predicted_label.data_handle(),
+                                            ground_truth.extent(0),
+                                            out_mat.data_handle(),
+                                            handle.get_stream(),
+                                            workspace.data_handle(),
+                                            workspace_sz,
+                                            min_label_value,
+                                            max_label_value);
+}
+
+/**
+ * @brief Overload of `contingency_matrix` to help the
+ *   compiler find the above overload, in case users pass in
+ *   `std::nullopt` for the optional arguments.
+ *
+ * Please see above for documentation of `contingency_matrix`.
+ */
+template <typename value_t,
+          typename out_t,
+          typename idx_t,
+          typename layout_t,
+          typename opt_min_label_t,
+          typename opt_max_label_t>
+void contingency_matrix(const raft::handle_t& handle,
+                        raft::device_vector_view<const value_t, idx_t> ground_truth,
+                        raft::device_vector_view<const value_t, idx_t> predicted_label,
+                        raft::device_matrix_view<out_t, idx_t, layout_t> out_mat,
+                        opt_min_label_t&& min_label = std::nullopt,
+                        opt_max_label_t&& max_label = std::nullopt)
+{
+  std::optional<value_t> opt_min_label = std::forward<opt_min_label_t>(min_label);
+  std::optional<value_t> opt_max_label = std::forward<opt_max_label_t>(max_label);
+  contingency_matrix(handle, ground_truth, predicted_label, out_mat, opt_min_label, opt_max_label);
+}
 };  // namespace stats
 };  // namespace raft
 

@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <raft/core/device_mdspan.hpp>
 #include <raft/stats/detail/cov.cuh>
 namespace raft {
 namespace stats {
@@ -57,6 +58,55 @@ void cov(const raft::handle_t& handle,
 {
   detail::cov(handle, covar, data, mu, D, N, sample, rowMajor, stable, stream);
 }
+
+/**
+ * @brief Compute covariance of the input matrix
+ *
+ * Mean operation is assumed to be performed on a given column.
+ *
+ * @tparam value_t the data type
+ * @tparam idx_t the index type
+ * @tparam layout_t Layout type of the input data.
+ * @param[in]  handle the raft handle
+ * @param[in]  data the input matrix (this will get mean-centered at the end!)
+ * (length = nrows * ncols)
+ * @param[in]  mu mean vector of the input matrix (length = ncols)
+ * @param[out] covar the output covariance matrix (length = ncols * ncols)
+ * @param[in]  sample whether to evaluate sample covariance or not. In other words,
+ * whether to normalize the output using N-1 or N, for true or false,
+ * respectively
+ * @param[in]  stable whether to run the slower-but-numerically-stable version or not
+ * @note if stable=true, then the input data will be mean centered after this
+ * function returns!
+ */
+template <typename value_t, typename idx_t, typename layout_t>
+void cov(const raft::handle_t& handle,
+         raft::device_matrix_view<value_t, idx_t, layout_t> data,
+         raft::device_vector_view<const value_t, idx_t> mu,
+         raft::device_matrix_view<value_t, idx_t, layout_t> covar,
+         bool sample,
+         bool stable)
+{
+  static_assert(
+    std::is_same_v<layout_t, raft::row_major> || std::is_same_v<layout_t, raft::col_major>,
+    "Data layout not supported");
+  RAFT_EXPECTS(data.extent(1) == covar.extent(0) && data.extent(1) == covar.extent(1),
+               "Size mismatch");
+  RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous");
+  RAFT_EXPECTS(covar.is_exhaustive(), "covar must be contiguous");
+  RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous");
+
+  detail::cov(handle,
+              covar.data_handle(),
+              data.data_handle(),
+              mu.data_handle(),
+              data.extent(1),
+              data.extent(0),
+              std::is_same_v<layout_t, raft::row_major>,
+              sample,
+              stable,
+              handle.get_stream());
+}
 };  // end namespace stats
 };  // end namespace raft
 

@@ -112,7 +112,7 @@ __global__ void compute_chunked_a_b_kernel(value_t* a,
 
 template <typename value_idx, typename label_idx>
 rmm::device_uvector<value_idx> get_cluster_counts(const raft::handle_t& handle,
-                                                  label_idx* y,
+                                                  const label_idx* y,
                                                   value_idx& n_rows,
                                                   label_idx& n_labels)
 {
@@ -129,8 +129,8 @@ rmm::device_uvector<value_idx> get_cluster_counts(const raft::handle_t& handle,
 
 template <typename value_t, typename value_idx>
 rmm::device_uvector<value_t> get_pairwise_distance(const raft::handle_t& handle,
-                                                   value_t* left_begin,
-                                                   value_t* right_begin,
+                                                   const value_t* left_begin,
+                                                   const value_t* right_begin,
                                                    value_idx& n_left_rows,
                                                    value_idx& n_right_rows,
                                                    value_idx& n_cols,
@@ -170,10 +170,10 @@ void compute_chunked_a_b(const raft::handle_t& handle,
 template <typename value_t, typename value_idx, typename label_idx>
 value_t silhouette_score(
   const raft::handle_t& handle,
-  value_t* X,
+  const value_t* X,
   value_idx n_rows,
   value_idx n_cols,
-  label_idx* y,
+  const label_idx* y,
   label_idx n_labels,
   value_t* scores,
   value_idx chunk,
@@ -221,8 +221,8 @@ value_t silhouette_score(
 
       auto chunk_stream = handle.get_next_usable_stream(i + chunk * j);
 
-      auto* left_begin  = X + (i * n_cols);
-      auto* right_begin = X + (j * n_cols);
+      const auto* left_begin  = X + (i * n_cols);
+      const auto* right_begin = X + (j * n_cols);
 
       auto n_left_rows  = (i + chunk) < n_rows ? chunk : (n_rows - i);
       auto n_right_rows = (j + chunk) < n_rows ? chunk : (n_rows - j);

@@ -465,7 +465,7 @@ HistType selectBestHistAlgo(IdxT nbins)
  * @param nbins number of bins
  * @param data input data (length = ncols * nrows)
  * @param nrows data array length in each column (or batch)
- * @param ncols number of columsn (or batch size)
+ * @param ncols number of columns (or batch size)
  * @param stream cuda stream
  * @param binner the operation that computes the bin index of the input data
  *