diff --git a/build.sh b/build.sh index a31d97c22c..d1dd8bdde1 100755 --- a/build.sh +++ b/build.sh @@ -357,7 +357,8 @@ if (( ${NUMARGS} == 0 )) || hasArg pylibraft; then fi if hasArg docs; then - cmake --build ${LIBRAFT_BUILD_DIR} --target docs_raft + set -x + cmake --build ${LIBRAFT_BUILD_DIR} -v --target docs_raft cd ${SPHINX_BUILD_DIR} make html fi diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ce6eb00bc1..32fe654965 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -569,7 +569,6 @@ endif() ############################################################################## # - doxygen targets ---------------------------------------------------------- - include(cmake/doxygen.cmake) add_doxygen_target(IN_DOXYFILE doxygen/Doxyfile.in OUT_DOXYFILE ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile diff --git a/cpp/cmake/doxygen.cmake b/cpp/cmake/doxygen.cmake index 5b2da57eb5..7d06ec194c 100644 --- a/cpp/cmake/doxygen.cmake +++ b/cpp/cmake/doxygen.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,6 +22,8 @@ function(add_doxygen_target) set(multiValueArgs "") cmake_parse_arguments(dox "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) configure_file(${dox_IN_DOXYFILE} ${dox_OUT_DOXYFILE} @ONLY) + + message("Command: ${DOXYGEN_EXECUTABLE} ${dox_OUT_DOXYFILE}") add_custom_target(docs_raft ${DOXYGEN_EXECUTABLE} ${dox_OUT_DOXYFILE} WORKING_DIRECTORY ${dox_CWD} diff --git a/cpp/doxygen/Doxyfile.in b/cpp/doxygen/Doxyfile.in index 549862600a..5517562a9f 100644 --- a/cpp/doxygen/Doxyfile.in +++ b/cpp/doxygen/Doxyfile.in @@ -459,7 +459,7 @@ LOOKUP_CACHE_SIZE = 0 # DOT_NUM_THREADS setting. # Minimum value: 0, maximum value: 32, default value: 1. -NUM_PROC_THREADS = 1 +NUM_PROC_THREADS = 0 #--------------------------------------------------------------------------- # Build related configuration options @@ -2495,7 +2495,7 @@ PLANTUML_INCLUDE_PATH = # Minimum value: 0, maximum value: 10000, default value: 50. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_GRAPH_MAX_NODES = 50 +DOT_GRAPH_MAX_NODES = 100 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs # generated by dot. A depth value of 3 means that only nodes reachable from the diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index 250ce579e5..37cdc280f9 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -39,6 +40,29 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, return detail::accuracy_score(predictions, ref_predictions, n, stream); } +/** + * @brief Compute accuracy of predictions. Useful for classification. + * @tparam value_t: data type for predictions (e.g., int for classification) + * @tparam idx_t Index type of matrix extent. + * @param[in] handle: the raft handle. + * @param[in] predictions: array of predictions (GPU pointer). + * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). + * @return: Accuracy score in [0, 1]; higher is better. + */ +template +float accuracy(const raft::handle_t& handle, + raft::device_vector_view predictions, + raft::device_vector_view ref_predictions) +{ + RAFT_EXPECTS(predictions.size() == ref_predictions.size(), "Size mismatch"); + RAFT_EXPECTS(predictions.is_exhaustive(), "predictions must be contiguous"); + RAFT_EXPECTS(ref_predictions.is_exhaustive(), "ref_predictions must be contiguous"); + + return detail::accuracy_score(predictions.data_handle(), + ref_predictions.data_handle(), + predictions.extent(0), + handle.get_stream()); +} } // namespace stats } // namespace raft diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index a59d7b4c81..e1b6a241c4 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -24,6 +24,7 @@ #pragma once +#include #include namespace raft { @@ -48,6 +49,32 @@ double adjusted_rand_index(const T* firstClusterArray, return detail::compute_adjusted_rand_index(firstClusterArray, secondClusterArray, size, stream); } +/** + * @brief Function to calculate Adjusted RandIndex as described + * here + * @tparam value_t data-type for input label arrays + * @tparam math_t integral data-type used for computing n-choose-r + * @tparam idx_t Index type of matrix extent. + * @param[in] handle: the raft handle. + * @param[in] first_cluster_array: the array of classes + * @param[in] second_cluster_array: the array of classes + * @return the Adjusted RandIndex + */ +template +double adjusted_rand_index(const raft::handle_t& handle, + raft::device_vector_view first_cluster_array, + raft::device_vector_view second_cluster_array) +{ + RAFT_EXPECTS(first_cluster_array.size() == second_cluster_array.size(), "Size mismatch"); + RAFT_EXPECTS(first_cluster_array.is_exhaustive(), "first_cluster_array must be contiguous"); + RAFT_EXPECTS(second_cluster_array.is_exhaustive(), "second_cluster_array must be contiguous"); + + return detail::compute_adjusted_rand_index(first_cluster_array.data_handle(), + second_cluster_array.data_handle(), + first_cluster_array.extent(0), + handle.get_stream()); +} + }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 407986de05..fd535e77d5 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -30,20 +31,50 @@ namespace stats { * @param truthClusterArray: the array of truth classes of type T * @param predClusterArray: the array of predicted classes of type T * @param size: the size of the data points of type int - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels + * @param lower_label_range: the lower bound of the range of labels + * @param upper_label_range: the upper bound of the range of labels * @param stream: the cudaStream object */ template double completeness_score(const T* truthClusterArray, const T* predClusterArray, int size, - T lowerLabelRange, - T upperLabelRange, + T lower_label_range, + T upper_label_range, cudaStream_t stream) { return detail::homogeneity_score( - predClusterArray, truthClusterArray, size, lowerLabelRange, upperLabelRange, stream); + predClusterArray, truthClusterArray, size, lower_label_range, upper_label_range, stream); +} + +/** + * @brief Function to calculate the completeness score between two clusters + * + * @tparam value_t the data type + * @tparam idx_t Index type of matrix extent. + * @param[in] handle: the raft handle. + * @param[in] truth_cluster_array: the array of truth classes of type value_t + * @param[in] pred_cluster_array: the array of predicted classes of type value_t + * @param[in] lower_label_range: the lower bound of the range of labels + * @param[in] upper_label_range: the upper bound of the range of labels + * @return the cluster completeness score + */ +template +double completeness_score(const raft::handle_t& handle, + raft::device_vector_view truth_cluster_array, + raft::device_vector_view pred_cluster_array, + value_t lower_label_range, + value_t upper_label_range) +{ + RAFT_EXPECTS(truth_cluster_array.size() == pred_cluster_array.size(), "Size mismatch"); + RAFT_EXPECTS(truth_cluster_array.is_exhaustive(), "truth_cluster_array must be contiguous"); + RAFT_EXPECTS(pred_cluster_array.is_exhaustive(), "pred_cluster_array must be contiguous"); + return detail::homogeneity_score(pred_cluster_array.data_handle(), + truth_cluster_array.data_handle(), + truth_cluster_array.extent(0), + lower_label_range, + upper_label_range, + handle.get_stream()); } }; // end namespace stats diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 081782432c..10dedc44eb 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -19,6 +19,10 @@ #pragma once +#include +#include +#include +#include #include namespace raft { @@ -40,6 +44,31 @@ void getInputClassCardinality( detail::getInputClassCardinality(groundTruth, nSamples, stream, minLabel, maxLabel); } +/** + * @brief use this to allocate output matrix size + * size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int) + * @tparam value_t label type + * @tparam idx_t Index type of matrix extent. + * @param[in] handle: the raft handle. + * @param[in] groundTruth: device 1-d array for ground truth (num of rows) + * @param[out] minLabel: calculated min value in input array + * @param[out] maxLabel: calculated max value in input array + */ +template +void get_input_class_cardinality(const raft::handle_t& handle, + raft::device_vector_view groundTruth, + raft::host_scalar_view minLabel, + raft::host_scalar_view maxLabel) +{ + RAFT_EXPECTS(minLabel.data_handle() != nullptr, "Invalid minLabel pointer"); + RAFT_EXPECTS(maxLabel.data_handle() != nullptr, "Invalid maxLabel pointer"); + detail::getInputClassCardinality(groundTruth.data_handle(), + groundTruth.extent(0), + handle.get_stream(), + *minLabel.data_handle(), + *maxLabel.data_handle()); +} + /** * @brief Calculate workspace size for running contingency matrix calculations * @tparam T label type @@ -71,7 +100,7 @@ size_t getContingencyMatrixWorkspaceSize(int nSamples, * @param groundTruth: device 1-d array for ground truth (num of rows) * @param predictedLabel: device 1-d array for prediction (num of columns) * @param nSamples: number of elements in input array - * @param outMat: output buffer for contingecy matrix + * @param outMat: output buffer for contingency matrix * @param stream: cuda stream for execution * @param workspace: Optional, workspace memory allocation * @param workspaceSize: Optional, size of workspace memory @@ -100,6 +129,82 @@ void contingencyMatrix(const T* groundTruth, maxLabel); } +/** + * @brief contruct contingency matrix given input ground truth and prediction + * labels. Users should call function getInputClassCardinality to find + * and allocate memory for output. Similarly workspace requirements + * should be checked using function getContingencyMatrixWorkspaceSize + * @tparam value_t label type + * @tparam out_t output matrix type + * @tparam idx_t Index type of matrix extent. + * @tparam layout_t Layout type of the input data. + * @param[in] handle: the raft handle. + * @param[in] ground_truth: device 1-d array for ground truth (num of rows) + * @param[in] predicted_label: device 1-d array for prediction (num of columns) + * @param[out] out_mat: output buffer for contingency matrix + * @param[in] min_label: Optional, min value in input ground truth array + * @param[in] max_label: Optional, max value in input ground truth array + */ +template +void contingency_matrix(const raft::handle_t& handle, + raft::device_vector_view ground_truth, + raft::device_vector_view predicted_label, + raft::device_matrix_view out_mat, + std::optional min_label = std::nullopt, + std::optional max_label = std::nullopt) +{ + RAFT_EXPECTS(ground_truth.size() == predicted_label.size(), "Size mismatch"); + RAFT_EXPECTS(ground_truth.is_exhaustive(), "ground_truth must be contiguous"); + RAFT_EXPECTS(predicted_label.is_exhaustive(), "predicted_label must be contiguous"); + RAFT_EXPECTS(out_mat.is_exhaustive(), "out_mat must be contiguous"); + + value_t min_label_value = std::numeric_limits::max(); + value_t max_label_value = std::numeric_limits::max(); + if (min_label.has_value()) { min_label_value = min_label.value(); } + if (max_label.has_value()) { max_label_value = max_label.value(); } + + auto workspace_sz = detail::getContingencyMatrixWorkspaceSize(ground_truth.extent(0), + ground_truth.data_handle(), + handle.get_stream(), + min_label_value, + max_label_value); + auto workspace = raft::make_device_vector(handle, workspace_sz); + + detail::contingencyMatrix(ground_truth.data_handle(), + predicted_label.data_handle(), + ground_truth.extent(0), + out_mat.data_handle(), + handle.get_stream(), + workspace.data_handle(), + workspace_sz, + min_label_value, + max_label_value); +} + +/** + * @brief Overload of `contingency_matrix` to help the + * compiler find the above overload, in case users pass in + * `std::nullopt` for the optional arguments. + * + * Please see above for documentation of `contingency_matrix`. + */ +template +void contingency_matrix(const raft::handle_t& handle, + raft::device_vector_view ground_truth, + raft::device_vector_view predicted_label, + raft::device_matrix_view out_mat, + opt_min_label_t&& min_label = std::nullopt, + opt_max_label_t&& max_label = std::nullopt) +{ + std::optional opt_min_label = std::forward(min_label); + std::optional opt_max_label = std::forward(max_label); + contingency_matrix(handle, ground_truth, predicted_label, out_mat, opt_min_label, opt_max_label); +} }; // namespace stats }; // namespace raft diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 06e8ba0215..a0c2ed2090 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { namespace stats { @@ -57,6 +58,55 @@ void cov(const raft::handle_t& handle, { detail::cov(handle, covar, data, mu, D, N, sample, rowMajor, stable, stream); } + +/** + * @brief Compute covariance of the input matrix + * + * Mean operation is assumed to be performed on a given column. + * + * @tparam value_t the data type + * @tparam idx_t the index type + * @tparam layout_t Layout type of the input data. + * @param[in] handle the raft handle + * @param[in] data the input matrix (this will get mean-centered at the end!) + * (length = nrows * ncols) + * @param[in] mu mean vector of the input matrix (length = ncols) + * @param[out] covar the output covariance matrix (length = ncols * ncols) + * @param[in] sample whether to evaluate sample covariance or not. In other words, + * whether to normalize the output using N-1 or N, for true or false, + * respectively + * @param[in] stable whether to run the slower-but-numerically-stable version or not + * @note if stable=true, then the input data will be mean centered after this + * function returns! + */ +template +void cov(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + raft::device_matrix_view covar, + bool sample, + bool stable) +{ + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); + RAFT_EXPECTS(data.extent(1) == covar.extent(0) && data.extent(1) == covar.extent(1), + "Size mismatch"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); + RAFT_EXPECTS(covar.is_exhaustive(), "covar must be contiguous"); + RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); + + detail::cov(handle, + covar.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + sample, + stable, + handle.get_stream()); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh index e3b56d2183..25a3721af1 100644 --- a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh +++ b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh @@ -112,7 +112,7 @@ __global__ void compute_chunked_a_b_kernel(value_t* a, template rmm::device_uvector get_cluster_counts(const raft::handle_t& handle, - label_idx* y, + const label_idx* y, value_idx& n_rows, label_idx& n_labels) { @@ -129,8 +129,8 @@ rmm::device_uvector get_cluster_counts(const raft::handle_t& handle, template rmm::device_uvector get_pairwise_distance(const raft::handle_t& handle, - value_t* left_begin, - value_t* right_begin, + const value_t* left_begin, + const value_t* right_begin, value_idx& n_left_rows, value_idx& n_right_rows, value_idx& n_cols, @@ -170,10 +170,10 @@ void compute_chunked_a_b(const raft::handle_t& handle, template value_t silhouette_score( const raft::handle_t& handle, - value_t* X, + const value_t* X, value_idx n_rows, value_idx n_cols, - label_idx* y, + const label_idx* y, label_idx n_labels, value_t* scores, value_idx chunk, @@ -221,8 +221,8 @@ value_t silhouette_score( auto chunk_stream = handle.get_next_usable_stream(i + chunk * j); - auto* left_begin = X + (i * n_cols); - auto* right_begin = X + (j * n_cols); + const auto* left_begin = X + (i * n_cols); + const auto* right_begin = X + (j * n_cols); auto n_left_rows = (i + chunk) < n_rows ? chunk : (n_rows - i); auto n_right_rows = (j + chunk) < n_rows ? chunk : (n_rows - j); diff --git a/cpp/include/raft/stats/detail/histogram.cuh b/cpp/include/raft/stats/detail/histogram.cuh index 54fe683b77..777e0b7816 100644 --- a/cpp/include/raft/stats/detail/histogram.cuh +++ b/cpp/include/raft/stats/detail/histogram.cuh @@ -465,7 +465,7 @@ HistType selectBestHistAlgo(IdxT nbins) * @param nbins number of bins * @param data input data (length = ncols * nrows) * @param nrows data array length in each column (or batch) - * @param ncols number of columsn (or batch size) + * @param ncols number of columns (or batch size) * @param stream cuda stream * @param binner the operation that computes the bin index of the input data * diff --git a/cpp/include/raft/stats/detail/rand_index.cuh b/cpp/include/raft/stats/detail/rand_index.cuh index a827427d8f..1e66216929 100644 --- a/cpp/include/raft/stats/detail/rand_index.cuh +++ b/cpp/include/raft/stats/detail/rand_index.cuh @@ -125,8 +125,8 @@ __global__ void computeTheNumerator( * @param stream: the cudaStream object */ template -double compute_rand_index(T* firstClusterArray, - T* secondClusterArray, +double compute_rand_index(const T* firstClusterArray, + const T* secondClusterArray, uint64_t size, cudaStream_t stream) { diff --git a/cpp/include/raft/stats/detail/silhouette_score.cuh b/cpp/include/raft/stats/detail/silhouette_score.cuh index f2e138ed6f..cfaff5fcce 100644 --- a/cpp/include/raft/stats/detail/silhouette_score.cuh +++ b/cpp/include/raft/stats/detail/silhouette_score.cuh @@ -56,7 +56,7 @@ template __global__ void populateAKernel(DataT* sampleToClusterSumOfDistances, DataT* binCountArray, DataT* d_aArray, - LabelT* labels, + const LabelT* labels, int nRows, int nLabels, const DataT MAX_VAL) @@ -102,7 +102,7 @@ __global__ void populateAKernel(DataT* sampleToClusterSumOfDistances, * @param stream: the cuda stream where to launch this kernel */ template -void countLabels(LabelT* labels, +void countLabels(const LabelT* labels, DataT* binCountArray, int nRows, int nUniqueLabels, @@ -205,10 +205,10 @@ struct MinOp { template DataT silhouette_score( const raft::handle_t& handle, - DataT* X_in, + const DataT* X_in, int nRows, int nCols, - LabelT* labels, + const LabelT* labels, int nLabels, DataT* silhouette_scorePerSample, cudaStream_t stream, diff --git a/cpp/include/raft/stats/detail/weighted_mean.cuh b/cpp/include/raft/stats/detail/weighted_mean.cuh index e8f85b4af3..43dbe4e7f1 100644 --- a/cpp/include/raft/stats/detail/weighted_mean.cuh +++ b/cpp/include/raft/stats/detail/weighted_mean.cuh @@ -71,4 +71,4 @@ void weightedMean(Type* mu, } }; // end namespace detail }; // end namespace stats -}; // end namespace raft +}; // end namespace raft \ No newline at end of file diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index c868092517..9f995e4d5a 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -19,6 +19,8 @@ #pragma once +#include +#include #include namespace raft { @@ -51,10 +53,72 @@ DataT dispersion(const DataT* centroids, IdxT dim, cudaStream_t stream) { - return detail::dispersion( + return detail::dispersion( centroids, clusterSizes, globalCentroid, nClusters, nPoints, dim, stream); } +/** + * @brief Compute cluster dispersion metric. This is very useful for + * automatically finding the 'k' (in kmeans) that improves this metric. + * The cluster dispersion metric is defined as the square root of the sum of the + * squared distances between the cluster centroids and the global centroid + * @tparam value_t data type + * @tparam idx_t index type + * @param[in] handle the raft handle + * @param[in] centroids the cluster centroids. This is assumed to be row-major + * and of dimension (n_clusters x dim) + * @param[in] cluster_sizes number of points in the dataset which belong to each + * cluster. This is of length n_clusters + * @param[out] global_centroid compute the global weighted centroid of all cluster + * centroids. This is of length dim. Use std::nullopt to not return it. + * @param[in] n_points number of points in the dataset + * @return the cluster dispersion value + */ +template +value_t cluster_dispersion( + const raft::handle_t& handle, + raft::device_matrix_view centroids, + raft::device_vector_view cluster_sizes, + std::optional> global_centroid, + const idx_t n_points) +{ + RAFT_EXPECTS(cluster_sizes.extent(0) == centroids.extent(0), "Size mismatch"); + RAFT_EXPECTS(cluster_sizes.is_exhaustive(), "cluster_sizes must be contiguous"); + + value_t* global_centroid_ptr = nullptr; + if (global_centroid.has_value()) { + RAFT_EXPECTS(global_centroid.value().extent(0) == centroids.extent(1), + "Size mismatch between global_centroid and centroids"); + RAFT_EXPECTS(global_centroid.value().is_exhaustive(), "global_centroid must be contiguous"); + global_centroid_ptr = global_centroid.value().data_handle(); + } + return detail::dispersion(centroids.data_handle(), + cluster_sizes.data_handle(), + global_centroid_ptr, + centroids.extent(0), + n_points, + centroids.extent(1), + handle.get_stream()); +} + +/** + * @brief Overload of `cluster_dispersion` to help the + * compiler find the above overload, in case users pass in + * `std::nullopt` for the optional arguments. + * + * Please see above for documentation of `cluster_dispersion`. + */ +template +value_t cluster_dispersion( + const raft::handle_t& handle, + raft::device_matrix_view centroids, + raft::device_vector_view cluster_sizes, + std::nullopt_t global_centroid, + const idx_t n_points) +{ + std::optional> opt_centroid = global_centroid; + return cluster_dispersion(handle, centroids, cluster_sizes, opt_centroid, n_points); +} } // end namespace stats } // end namespace raft diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index 59cbbd368f..8a98a03c6b 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -18,6 +18,7 @@ #define __ENTROPY_H #pragma once +#include #include namespace raft { @@ -27,6 +28,7 @@ namespace stats { * @brief Function to calculate entropy * more info on entropy * + * @tparam T data type * @param clusterArray: the array of classes of type T * @param size: the size of the data points of type int * @param lowerLabelRange: the lower bound of the range of labels @@ -44,6 +46,31 @@ double entropy(const T* clusterArray, return detail::entropy(clusterArray, size, lowerLabelRange, upperLabelRange, stream); } +/** + * @brief Function to calculate entropy + * more info on entropy + * + * @tparam value_t data type + * @tparam idx_t index type + * @param[in] handle the raft handle + * @param[in] cluster_array: the array of classes of type value_t + * @param[in] lower_label_range: the lower bound of the range of labels + * @param[in] upper_label_range: the upper bound of the range of labels + * @return the entropy score + */ +template +double entropy(const raft::handle_t& handle, + raft::device_vector_view cluster_array, + const value_t lower_label_range, + const value_t upper_label_range) +{ + RAFT_EXPECTS(cluster_array.is_exhaustive(), "cluster_array must be contiguous"); + return detail::entropy(cluster_array.data_handle(), + cluster_array.extent(0), + lower_label_range, + upper_label_range, + handle.get_stream()); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index e8176ebc92..df1c2772f1 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include #include @@ -42,7 +43,7 @@ namespace stats { * @param nbins number of bins * @param data input data (length = ncols * nrows) * @param nrows data array length in each column (or batch) - * @param ncols number of columsn (or batch size) + * @param ncols number of columns (or batch size) * @param stream cuda stream * @param binner the operation that computes the bin index of the input data * @@ -61,6 +62,42 @@ void histogram(HistType type, detail::histogram(type, bins, nbins, data, nrows, ncols, stream, binner); } +/** + * @brief Perform histogram on the input data. It chooses the right load size + * based on the input data vector length. It also supports large-bin cases + * using a specialized smem-based hashing technique. + * @tparam value_t input data type + * @tparam idx_t data type used to compute indices + * @tparam binner_op takes the input data and computes its bin index + * @param[in] handle the raft handle + * @param[in] type histogram implementation type to choose + * @param[in] data input data col-major (length = nrows * ncols) + * @param[out] bins the output bins col-major (length = nbins * ncols) + * @param[in] binner the operation that computes the bin index of the input data + * + * @note signature of binner_op is `int func(value_t, IdxT);` + */ +template > +void histogram(const raft::handle_t& handle, + HistType type, + raft::device_matrix_view data, + raft::device_matrix_view bins, + binner_op binner = IdentityBinner()) +{ + RAFT_EXPECTS(std::is_integral_v && data.extent(0) <= std::numeric_limits::max(), + "Index type not supported"); + RAFT_EXPECTS(bins.extent(1) == data.extent(1), "Size mismatch"); + RAFT_EXPECTS(bins.is_exhaustive(), "bins must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); + detail::histogram(type, + bins.data_handle(), + bins.extent(0), + data.data_handle(), + data.extent(0), + data.extent(1), + handle.get_stream(), + binner); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index 5fe92db78a..91c479bc99 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -47,6 +48,37 @@ double homogeneity_score(const T* truthClusterArray, truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); } +/** + * @brief Function to calculate the homogeneity score between two clusters + * more info on mutual + * information + * + * @tparam value_t data type + * @tparam idx_t index type + * @param[in] handle the raft handle + * @param[in] truth_cluster_array: the array of truth classes of type value_t + * @param[in] pred_cluster_array: the array of predicted classes of type value_t + * @param[in] lower_label_range: the lower bound of the range of labels + * @param[in] upper_label_range: the upper bound of the range of labels + * @return the homogeneity score + */ +template +double homogeneity_score(const raft::handle_t& handle, + raft::device_vector_view truth_cluster_array, + raft::device_vector_view pred_cluster_array, + value_t lower_label_range, + value_t upper_label_range) +{ + RAFT_EXPECTS(truth_cluster_array.size() == pred_cluster_array.size(), "Size mismatch"); + RAFT_EXPECTS(truth_cluster_array.is_exhaustive(), "truth_cluster_array must be contiguous"); + RAFT_EXPECTS(pred_cluster_array.is_exhaustive(), "pred_cluster_array must be contiguous"); + return detail::homogeneity_score(truth_cluster_array.data_handle(), + pred_cluster_array.data_handle(), + truth_cluster_array.extent(0), + lower_label_range, + upper_label_range, + handle.get_stream()); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/information_criterion.cuh b/cpp/include/raft/stats/information_criterion.cuh index 0744dcdffe..8ab4723d01 100644 --- a/cpp/include/raft/stats/information_criterion.cuh +++ b/cpp/include/raft/stats/information_criterion.cuh @@ -29,6 +29,8 @@ #pragma once +#include +#include #include #include @@ -63,6 +65,42 @@ void information_criterion_batched(ScalarT* d_ic, d_ic, d_loglikelihood, ic_type, n_params, batch_size, n_samples, stream); } +/** + * Compute the given type of information criterion + * + * @note: it is safe to do the computation in-place (i.e give same pointer + * as input and output) + * + * @tparam value_t data type + * @tparam idx_t index type + * @param[in] handle the raft handle + * @param[in] d_loglikelihood Log-likelihood for each series (device) length: batch_size + * @param[out] d_ic Information criterion to be returned for each + * series (device) length: batch_size + * @param[in] ic_type Type of criterion to compute. See IC_Type + * @param[in] n_params Number of parameters in the model + * @param[in] n_samples Number of samples in each series + */ +template +void information_criterion_batched(const raft::handle_t& handle, + raft::device_vector_view d_loglikelihood, + raft::device_vector_view d_ic, + IC_Type ic_type, + idx_t n_params, + idx_t n_samples) +{ + RAFT_EXPECTS(d_ic.size() == d_loglikelihood.size(), "Size mismatch"); + RAFT_EXPECTS(d_ic.is_exhaustive(), "d_ic must be contiguous"); + RAFT_EXPECTS(d_loglikelihood.is_exhaustive(), "d_loglikelihood must be contiguous"); + batched::detail::information_criterion(d_ic.data_handle(), + d_loglikelihood.data_handle(), + ic_type, + n_params, + d_ic.extent(0), + n_samples, + handle.get_stream()); +} + } // namespace stats } // namespace raft #endif diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index b29f277b4a..265e87dc68 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -41,6 +42,30 @@ DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, return detail::kl_divergence(modelPDF, candidatePDF, size, stream); } +/** + * @brief Function to calculate KL Divergence + * more info on KL + * Divergence + * + * @tparam value_t: Data type of the input array + * @tparam idx_t index type + * @param[in] handle the raft handle + * @param[in] modelPDF: the model array of probability density functions of type value_t + * @param[in] candidatePDF: the candidate array of probability density functions of type value_t + * @return the KL Divergence value + */ +template +value_t kl_divergence(const raft::handle_t& handle, + raft::device_vector_view modelPDF, + raft::device_vector_view candidatePDF) +{ + RAFT_EXPECTS(modelPDF.size() == candidatePDF.size(), "Size mismatch"); + RAFT_EXPECTS(modelPDF.is_exhaustive(), "modelPDF must be contiguous"); + RAFT_EXPECTS(candidatePDF.is_exhaustive(), "candidatePDF must be contiguous"); + return detail::kl_divergence( + modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.extent(0), handle.get_stream()); +} + }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index 976b58c048..d5913e6176 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -19,9 +19,9 @@ #pragma once -#include "detail/mean.cuh" - +#include #include +#include namespace raft { namespace stats { @@ -50,6 +50,41 @@ void mean( detail::mean(mu, data, D, N, sample, rowMajor, stream); } +/** + * @brief Compute mean of the input matrix + * + * Mean operation is assumed to be performed on a given column. + * + * @tparam value_t the data type + * @tparam idx_t index type + * @tparam layout_t Layout type of the input matrix. + * @param[in] handle the raft handle + * @param[in] data: the input matrix + * @param[out] mu: the output mean vector + * @param[in] sample: whether to evaluate sample mean or not. In other words, whether + * to normalize the output using N-1 or N, for true or false, respectively + */ +template +void mean(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + bool sample) +{ + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); + RAFT_EXPECTS(data.extent(1) == mu.extent(0), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); + detail::mean(mu.data_handle(), + data.data_handle(), + data.extent(1), + data.extent(0), + sample, + std::is_same_v, + handle.get_stream()); +} + }; // namespace stats }; // namespace raft diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 3b2222ef52..fba2aa5b5a 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -19,7 +19,8 @@ #pragma once -#include "detail/mean_center.cuh" +#include +#include namespace raft { namespace stats { @@ -51,6 +52,42 @@ void meanCenter(Type* out, detail::meanCenter(out, data, mu, D, N, rowMajor, bcastAlongRows, stream); } +/** + * @brief Center the input matrix wrt its mean + * @tparam value_t the data type + * @tparam idx_t index type + * @tparam layout_t Layout type of the input matrix. + * @param[in] handle the raft handle + * @param[in] data input matrix of size nrows * ncols + * @param[in] mu the mean vector of size ncols if bcast_along_rows else nrows + * @param[out] out the output mean-centered matrix + * @param[in] bcast_along_rows whether to broadcast vector along rows or columns + */ +template +void mean_center(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + raft::device_matrix_view out, + bool bcast_along_rows) +{ + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); + auto mean_vec_size = bcast_along_rows ? data.extent(1) : data.extent(0); + RAFT_EXPECTS(out.extents() == data.extents(), "Size mismatch"); + RAFT_EXPECTS(mean_vec_size == mu.extent(0), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); + detail::meanCenter(out.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + bcast_along_rows, + handle.get_stream()); +} + /** * @brief Add the input matrix wrt its mean * @tparam Type the data type @@ -78,6 +115,42 @@ void meanAdd(Type* out, detail::meanAdd(out, data, mu, D, N, rowMajor, bcastAlongRows, stream); } +/** + * @brief Add the input matrix wrt its mean + * @tparam Type the data type + * @tparam idx_t index type + * @tparam layout_t Layout type of the input matrix. + * @tparam TPB threads per block of the cuda kernel launched + * @param[in] handle the raft handle + * @param[in] data input matrix of size nrows * ncols + * @param[in] mu the mean vector of size ncols if bcast_along_rows else nrows + * @param[out] out the output mean-centered matrix + * @param[in] bcast_along_rows whether to broadcast vector along rows or columns + */ +template +void mean_add(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + raft::device_matrix_view out, + bool bcast_along_rows) +{ + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); + auto mean_vec_size = bcast_along_rows ? data.extent(1) : data.extent(0); + RAFT_EXPECTS(out.extents() == data.extents(), "Size mismatch"); + RAFT_EXPECTS(mean_vec_size == mu.extent(0), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); + detail::meanAdd(out.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + bcast_along_rows, + handle.get_stream()); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index 0c3c423493..544aed092d 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -18,7 +18,8 @@ #pragma once -#include "detail/meanvar.cuh" +#include +#include namespace raft::stats { @@ -55,6 +56,49 @@ void meanvar(Type* mean, detail::meanvar(mean, var, data, D, N, sample, rowMajor, stream); } +/** + * @brief Compute mean and variance for each column of a given matrix. + * + * The operation is performed in a single sweep. Consider using it when you need to compute + * both mean and variance, or when you need to compute variance but don't have the mean. + * It's almost twice faster than running `mean` and `vars` sequentially, because all three + * kernels are memory-bound. + * + * @tparam value_t the data type + * @tparam idx_t Integer type used for addressing + * @tparam layout_t Layout type of the input matrix. + * @param[in] handle the raft handle + * @param[in] data the input matrix of size [N, D] + * @param[out] mean the output mean vector of size D + * @param[out] var the output variance vector of size D + * @param[in] sample whether to evaluate sample variance or not. In other words, whether to + * normalize the variance using N-1 or N, for true or false respectively. + */ +template +void meanvar(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view mean, + raft::device_vector_view var, + bool sample) +{ + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); + RAFT_EXPECTS(data.extent(1) == var.extent(0), "Size mismatch betwen data and var"); + RAFT_EXPECTS(mean.size() == var.size(), "Size mismatch betwen mean and var"); + RAFT_EXPECTS(mean.is_exhaustive(), "mean must be contiguous"); + RAFT_EXPECTS(var.is_exhaustive(), "var must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); + detail::meanvar(mean.data_handle(), + var.data_handle(), + data.data_handle(), + data.extent(1), + data.extent(0), + sample, + std::is_same_v, + handle.get_stream()); +} + }; // namespace raft::stats #endif diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index 431d06ec6f..305e63cc10 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -18,6 +18,8 @@ #pragma once +#include +#include #include #include #include @@ -68,6 +70,67 @@ void minmax(const T* data, data, rowids, colids, nrows, ncols, row_stride, globalmin, globalmax, sampledcols, stream); } +/** + * @brief Computes min/max across every column of the input matrix, as well as + * optionally allow to subsample based on the given row/col ID mapping vectors + * + * @tparam value_t Data type of input matrix element. + * @tparam idx_t Index type of matrix extent. + * @param[in] handle the raft handle + * @param[in] data input data col-major of size [nrows, ncols], unless rowids or + * colids length is smaller + * @param[in] rowids optional row ID mappings of length nrows. If you want to + * skip this index lookup entirely, pass std::nullopt + * @param[in] colids optional col ID mappings of length ncols. If you want to + * skip this index lookup entirely, pass std::nullopt + * @param[out] globalmin final col-wise global minimum (size = ncols) + * @param[out] globalmax final col-wise global maximum (size = ncols) + * @param[out] sampledcols output sampled data. Pass std::nullopt if you don't need this + * @note This method makes the following assumptions: + * 1. input and output matrices are assumed to be col-major + * 2. ncols is small enough to fit the whole of min/max values across all cols + * in shared memory + */ +template +void minmax(const raft::handle_t& handle, + raft::device_matrix_view data, + std::optional> rowids, + std::optional> colids, + raft::device_vector_view globalmin, + raft::device_vector_view globalmax, + std::optional> sampledcols) +{ + const unsigned* rowids_ptr = nullptr; + const unsigned* colids_ptr = nullptr; + value_t* sampledcols_ptr = nullptr; + auto nrows = data.extent(0); + auto ncols = data.extent(1); + auto row_stride = data.stride(1); + if (rowids.has_value()) { + rowids_ptr = rowids.value().data_handle(); + RAFT_EXPECTS(rowids.value().extent(0) <= nrows, "Rowids size is greater than nrows"); + nrows = rowids.value().extent(0); + } + if (colids.has_value()) { + colids_ptr = colids.value().data_handle(); + RAFT_EXPECTS(colids.value().extent(0) <= ncols, "Colids size is greater than ncols"); + ncols = colids.value().extent(0); + } + if (sampledcols.has_value()) { sampledcols_ptr = sampledcols.value().data_handle(); } + RAFT_EXPECTS(globalmin.extent(0) == ncols, "Size mismatch betwen globalmin and ncols"); + RAFT_EXPECTS(globalmax.extent(0) == ncols, "Size mismatch betwen globalmax and ncols"); + detail::minmax(data.data_handle(), + rowids_ptr, + colids_ptr, + nrows, + ncols, + row_stride, + globalmin.data_handle(), + globalmax.data_handle(), + sampledcols_ptr, + handle.get_stream()); +} + }; // namespace stats }; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/mutual_info_score.cuh b/cpp/include/raft/stats/mutual_info_score.cuh index 9e48168e74..e953f12461 100644 --- a/cpp/include/raft/stats/mutual_info_score.cuh +++ b/cpp/include/raft/stats/mutual_info_score.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -46,6 +47,36 @@ double mutual_info_score(const T* firstClusterArray, firstClusterArray, secondClusterArray, size, lowerLabelRange, upperLabelRange, stream); } +/** + * @brief Function to calculate the mutual information between two clusters + * more info on mutual information + * @tparam value_t the data type + * @tparam idx_t index type + * @param[in] handle the raft handle + * @param[in] first_cluster_array: the array of classes of type value_t + * @param[in] second_cluster_array: the array of classes of type value_t + * @param[in] lower_label_range: the lower bound of the range of labels + * @param[in] upper_label_range: the upper bound of the range of labels + * @return the mutual information score + */ +template +double mutual_info_score(const raft::handle_t& handle, + raft::device_vector_view first_cluster_array, + raft::device_vector_view second_cluster_array, + value_t lower_label_range, + value_t upper_label_range) +{ + RAFT_EXPECTS(first_cluster_array.extent(0) == second_cluster_array.extent(0), + "Size mismatch betwen first_cluster_array and second_cluster_array"); + RAFT_EXPECTS(first_cluster_array.is_exhaustive(), "first_cluster_array must be contiguous"); + RAFT_EXPECTS(second_cluster_array.is_exhaustive(), "second_cluster_array must be contiguous"); + return detail::mutual_info_score(first_cluster_array.data_handle(), + second_cluster_array.data_handle(), + first_cluster_array.extent(0), + lower_label_range, + upper_label_range, + handle.get_stream()); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/r2_score.cuh b/cpp/include/raft/stats/r2_score.cuh index 88fac5aaa6..e7fcdb6a4e 100644 --- a/cpp/include/raft/stats/r2_score.cuh +++ b/cpp/include/raft/stats/r2_score.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -45,6 +46,39 @@ math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) return detail::r2_score(y, y_hat, n, stream); } +/** + * Calculates the "Coefficient of Determination" (R-Squared) score + * normalizing the sum of squared errors by the total sum of squares. + * + * This score indicates the proportionate amount of variation in an + * expected response variable is explained by the independent variables + * in a linear regression model. The larger the R-squared value, the + * more variability is explained by the linear regression model. + * + * @tparam value_t the data type + * @tparam idx_t index type + * @param[in] handle the raft handle + * @param[in] y: Array of ground-truth response variables + * @param[in] y_hat: Array of predicted response variables + * @return: The R-squared value. + * @note The constness of y and y_hat is currently casted away. + */ +template +value_t r2_score(const raft::handle_t& handle, + raft::device_vector_view y, + raft::device_vector_view y_hat) +{ + RAFT_EXPECTS(y.extent(0) == y_hat.extent(0), "Size mismatch betwen y and y_hat"); + RAFT_EXPECTS(y.is_exhaustive(), "y must be contiguous"); + RAFT_EXPECTS(y_hat.is_exhaustive(), "y_hat must be contiguous"); + + // TODO: Change the underlying implementation to remove the need to const_cast + return detail::r2_score(const_cast(y.data_handle()), + const_cast(y_hat.data_handle()), + y.extent(0), + handle.get_stream()); +} + } // namespace stats } // namespace raft diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh index 82bf046c4e..72ad53f5d9 100644 --- a/cpp/include/raft/stats/rand_index.cuh +++ b/cpp/include/raft/stats/rand_index.cuh @@ -18,6 +18,8 @@ #pragma once +#include +#include #include namespace raft { @@ -37,6 +39,30 @@ double rand_index(T* firstClusterArray, T* secondClusterArray, uint64_t size, cu return detail::compute_rand_index(firstClusterArray, secondClusterArray, size, stream); } +/** + * @brief Function to calculate RandIndex + * more info on rand index + * @tparam value_t the data type + * @tparam idx_t index type + * @param[in] handle the raft handle + * @param[in] first_cluster_array: the array of classes of type value_t + * @param[in] second_cluster_array: the array of classes of type value_t + * @return: The RandIndex value. + */ +template +double rand_index(const raft::handle_t& handle, + raft::device_vector_view first_cluster_array, + raft::device_vector_view second_cluster_array) +{ + RAFT_EXPECTS(first_cluster_array.extent(0) == second_cluster_array.extent(0), + "Size mismatch betwen first_cluster_array and second_cluster_array"); + RAFT_EXPECTS(first_cluster_array.is_exhaustive(), "first_cluster_array must be contiguous"); + RAFT_EXPECTS(second_cluster_array.is_exhaustive(), "second_cluster_array must be contiguous"); + return detail::compute_rand_index(first_cluster_array.data_handle(), + second_cluster_array.data_handle(), + second_cluster_array.extent(0), + handle.get_stream()); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/regression_metrics.cuh b/cpp/include/raft/stats/regression_metrics.cuh index 0fb6d39967..fd33f2af49 100644 --- a/cpp/include/raft/stats/regression_metrics.cuh +++ b/cpp/include/raft/stats/regression_metrics.cuh @@ -18,6 +18,9 @@ #pragma once +#include +#include +#include #include namespace raft { @@ -49,6 +52,46 @@ void regression_metrics(const T* predictions, detail::regression_metrics( predictions, ref_predictions, n, stream, mean_abs_error, mean_squared_error, median_abs_error); } + +/** + * @brief Compute regression metrics mean absolute error, mean squared error, median absolute error + * @tparam value_t the data type for predictions (e.g., float or double for regression). + * @tparam idx_t index type + * @param[in] handle the raft handle + * @param[in] predictions: array of predictions. + * @param[in] ref_predictions: array of reference (ground-truth) predictions. + * @param[out] mean_abs_error: Mean Absolute Error. Sum over n of (|predictions[i] - + * ref_predictions[i]|) / n. + * @param[out] mean_squared_error: Mean Squared Error. Sum over n of ((predictions[i] - + * ref_predictions[i])^2) / n. + * @param[out] median_abs_error: Median Absolute Error. Median of |predictions[i] - + * ref_predictions[i]| for i in [0, n). + */ +template +void regression_metrics(const raft::handle_t& handle, + raft::device_vector_view predictions, + raft::device_vector_view ref_predictions, + raft::host_scalar_view mean_abs_error, + raft::host_scalar_view mean_squared_error, + raft::host_scalar_view median_abs_error) +{ + RAFT_EXPECTS(predictions.extent(0) == ref_predictions.extent(0), + "Size mismatch betwen predictions and ref_predictions"); + RAFT_EXPECTS(predictions.is_exhaustive(), "predictions must be contiguous"); + RAFT_EXPECTS(ref_predictions.is_exhaustive(), "ref_predictions must be contiguous"); + RAFT_EXPECTS(mean_abs_error.data_handle() != nullptr, "mean_abs_error view must not be empty"); + RAFT_EXPECTS(mean_squared_error.data_handle() != nullptr, + "mean_squared_error view must not be empty"); + RAFT_EXPECTS(median_abs_error.data_handle() != nullptr, + "median_abs_error view must not be empty"); + detail::regression_metrics(predictions.data_handle(), + ref_predictions.data_handle(), + predictions.extent(0), + handle.get_stream(), + *mean_abs_error.data_handle(), + *mean_squared_error.data_handle(), + *median_abs_error.data_handle()); +} } // namespace stats } // namespace raft diff --git a/cpp/include/raft/stats/silhouette_score.cuh b/cpp/include/raft/stats/silhouette_score.cuh index 9f02cf6d74..0b7d6436dd 100644 --- a/cpp/include/raft/stats/silhouette_score.cuh +++ b/cpp/include/raft/stats/silhouette_score.cuh @@ -18,6 +18,7 @@ #pragma once +#include #include #include @@ -73,6 +74,144 @@ value_t silhouette_score_batched( handle, X, n_rows, n_cols, y, n_labels, scores, chunk, metric); } +/** + * @brief main function that returns the average silhouette score for a given set of data and its + * clusterings + * @tparam value_t: type of the data samples + * @tparam label_t: type of the labels + * @tparam idx_t index type + * @param[in] handle: raft handle for managing expensive resources + * @param[in] X_in: input matrix Data in row-major format (nRows x nCols) + * @param[in] labels: the pointer to the array containing labels for every data sample (length: + * nRows) + * @param[out] silhouette_score_per_sample: optional array populated with the silhouette score + * for every sample (length: nRows) + * @param[in] n_unique_labels: number of unique labels in the labels array + * @param[in] metric: the numerical value that maps to the type of distance metric to be used in + * the calculations + * @return: The silhouette score. + */ +template +value_t silhouette_score( + const raft::handle_t& handle, + raft::device_matrix_view X_in, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + idx_t n_unique_labels, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + RAFT_EXPECTS(labels.extent(0) == X_in.extent(0), "Size mismatch betwen labels and data"); + + value_t* silhouette_score_per_sample_ptr = nullptr; + if (silhouette_score_per_sample.has_value()) { + silhouette_score_per_sample_ptr = silhouette_score_per_sample.value().data_handle(); + RAFT_EXPECTS(silhouette_score_per_sample.value().extent(0) == X_in.extent(0), + "Size mismatch betwen silhouette_score_per_sample and data"); + } + return detail::silhouette_score(handle, + X_in.data_handle(), + X_in.extent(0), + X_in.extent(1), + labels.data_handle(), + n_unique_labels, + silhouette_score_per_sample_ptr, + handle.get_stream(), + metric); +} + +/** + * @brief Overload of `silhouette_score` to help the + * compiler find the above overload, in case users pass in + * `std::nullopt` for the optional arguments. + * + * Please see above for documentation of `silhouette_score`. + */ +template +value_t silhouette_score( + const raft::handle_t& handle, + raft::device_matrix_view X_in, + raft::device_vector_view labels, + std::nullopt_t silhouette_score_per_sample, + idx_t n_unique_labels, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + std::optional> opt_scores = silhouette_score_per_sample; + return silhouette_score(handle, X_in, labels, opt_scores, n_unique_labels, metric); +} + +/** + * @brief function that returns the average silhouette score for a given set of data and its + * clusterings + * @tparam value_t: type of the data samples + * @tparam label_t: type of the labels + * @tparam idx_t index type + * @param[in] handle: raft handle for managing expensive resources + * @param[in] X: input matrix Data in row-major format (nRows x nCols) + * @param[in] labels: the pointer to the array containing labels for every data sample (length: + * nRows) + * @param[out] silhouette_score_per_sample: optional array populated with the silhouette score + * for every sample (length: nRows) + * @param[in] n_unique_labels: number of unique labels in the labels array + * @param[in] batch_size: number of samples per batch + * @param[in] metric: the numerical value that maps to the type of distance metric to be used in + * the calculations + * @return: The silhouette score. + */ +template +value_t silhouette_score_batched( + const raft::handle_t& handle, + raft::device_matrix_view X, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + idx_t n_unique_labels, + idx_t batch_size, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + static_assert(std::is_integral_v, + "silhouette_score_batched: The index type " + "of each mdspan argument must be an integral type."); + static_assert(std::is_integral_v, + "silhouette_score_batched: The label type must be an integral type."); + RAFT_EXPECTS(labels.extent(0) == X.extent(0), "Size mismatch betwen labels and data"); + + value_t* scores_ptr = nullptr; + if (silhouette_score_per_sample.has_value()) { + scores_ptr = silhouette_score_per_sample.value().data_handle(); + RAFT_EXPECTS(silhouette_score_per_sample.value().extent(0) == X.extent(0), + "Size mismatch betwen silhouette_score_per_sample and data"); + } + return batched::detail::silhouette_score(handle, + X.data_handle(), + X.extent(0), + X.extent(1), + labels.data_handle(), + n_unique_labels, + scores_ptr, + batch_size, + metric); +} + +/** + * @brief Overload of `silhouette_score_batched` to help the + * compiler find the above overload, in case users pass in + * `std::nullopt` for the optional arguments. + * + * Please see above for documentation of `silhouette_score_batched`. + */ +template +value_t silhouette_score_batched( + const raft::handle_t& handle, + raft::device_matrix_view X, + raft::device_vector_view labels, + std::nullopt_t silhouette_score_per_sample, + idx_t n_unique_labels, + idx_t batch_size, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + std::optional> opt_scores = silhouette_score_per_sample; + return silhouette_score_batched( + handle, X, labels, opt_scores, n_unique_labels, batch_size, metric); +} }; // namespace stats }; // namespace raft diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh index 3fc41ebc8c..2747029955 100644 --- a/cpp/include/raft/stats/stddev.cuh +++ b/cpp/include/raft/stats/stddev.cuh @@ -18,9 +18,9 @@ #pragma once -#include "detail/stddev.cuh" - +#include #include +#include namespace raft { namespace stats { @@ -87,6 +87,86 @@ void vars(Type* var, detail::vars(var, data, mu, D, N, sample, rowMajor, stream); } +/** + * @brief Compute stddev of the input matrix + * + * Stddev operation is assumed to be performed on a given column. + * + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @tparam layout_t Layout type of the input matrix. + * @param[in] handle the raft handle + * @param[in] data the input matrix + * @param[in] mu the mean vector + * @param[out] std the output stddev vector + * @param[in] sample whether to evaluate sample stddev or not. In other words, + * whether + * to normalize the output using N-1 or N, for true or false, respectively + */ +template +void stddev(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + raft::device_vector_view std, + bool sample) +{ + constexpr bool is_row_major = std::is_same_v; + constexpr bool is_col_major = std::is_same_v; + static_assert(is_row_major || is_col_major, + "stddev: Layout must be either " + "raft::row_major or raft::col_major (or one of their aliases)"); + RAFT_EXPECTS(mu.size() == std.size(), "Size mismatch between mu and std"); + RAFT_EXPECTS(mu.extent(0) == data.extent(1), "Size mismatch between data and mu"); + detail::stddev(std.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + sample, + is_row_major, + handle.get_stream()); +} + +/** + * @brief Compute variance of the input matrix + * + * Variance operation is assumed to be performed on a given column. + * + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @tparam layout_t Layout type of the input matrix. + * @param[in] handle the raft handle + * @param[in] data the input matrix + * @param[in] mu the mean vector + * @param[out] var the output stddev vector + * @param[in] sample whether to evaluate sample stddev or not. In other words, + * whether + * to normalize the output using N-1 or N, for true or false, respectively + */ +template +void vars(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + raft::device_vector_view var, + bool sample) +{ + constexpr bool is_row_major = std::is_same_v; + constexpr bool is_col_major = std::is_same_v; + static_assert(is_row_major || is_col_major, + "vars: Layout must be either " + "raft::row_major or raft::col_major (or one of their aliases)"); + RAFT_EXPECTS(mu.size() == var.size(), "Size mismatch between mu and std"); + RAFT_EXPECTS(mu.extent(0) == data.extent(1), "Size mismatch between data and mu"); + detail::vars(var.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + sample, + is_row_major, + handle.get_stream()); +} + }; // namespace stats }; // namespace raft diff --git a/cpp/include/raft/stats/sum.cuh b/cpp/include/raft/stats/sum.cuh index 89135dd076..18265c5e3a 100644 --- a/cpp/include/raft/stats/sum.cuh +++ b/cpp/include/raft/stats/sum.cuh @@ -19,8 +19,8 @@ #pragma once -#include "detail/sum.cuh" - +#include +#include #include namespace raft { @@ -46,6 +46,37 @@ void sum(Type* output, const Type* input, IdxType D, IdxType N, bool rowMajor, c detail::sum(output, input, D, N, rowMajor, stream); } +/** + * @brief Compute sum of the input matrix + * + * Sum operation is assumed to be performed on a given column. + * + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @tparam layout_t Layout type of the input matrix. + * @param[in] handle the raft handle + * @param[in] input the input matrix + * @param[out] output the output mean vector + */ +template +void sum(const raft::handle_t& handle, + raft::device_matrix_view input, + raft::device_vector_view output) +{ + constexpr bool is_row_major = std::is_same_v; + constexpr bool is_col_major = std::is_same_v; + static_assert(is_row_major || is_col_major, + "sum: Layout must be either " + "raft::row_major or raft::col_major (or one of their aliases)"); + RAFT_EXPECTS(input.extent(1) == output.extent(0), "Size mismatch between input and output"); + detail::sum(output.data_handle(), + input.data_handle(), + input.extent(1), + input.extent(0), + is_row_major, + handle.get_stream()); +} + }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/trustworthiness_score.cuh b/cpp/include/raft/stats/trustworthiness_score.cuh index c89eab8d2b..b7b3999f77 100644 --- a/cpp/include/raft/stats/trustworthiness_score.cuh +++ b/cpp/include/raft/stats/trustworthiness_score.cuh @@ -18,6 +18,8 @@ #define __TRUSTWORTHINESS_SCORE_H #pragma once +#include +#include #include namespace raft { @@ -48,6 +50,43 @@ double trustworthiness_score(const raft::handle_t& h, return detail::trustworthiness_score( h, X, X_embedded, n, m, d, n_neighbors, batchSize); } + +/** + * @brief Compute the trustworthiness score + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @param[in] handle the raft handle + * @param[in] X: Data in original dimension + * @param[in] X_embedded: Data in target dimension (embedding) + * @param[in] n_neighbors Number of neighbors considered by trustworthiness score + * @param[in] batch_size Batch size + * @return Trustworthiness score + * @note The constness of the data in X_embedded is currently casted away and the data is slightly + * modified. + */ +template +double trustworthiness_score( + const raft::handle_t& handle, + raft::device_matrix_view X, + raft::device_matrix_view X_embedded, + int n_neighbors, + int batch_size = 512) +{ + RAFT_EXPECTS(X.extent(0) == X_embedded.extent(0), "Size mismatch between X and X_embedded"); + RAFT_EXPECTS(std::is_integral_v && X.extent(0) <= std::numeric_limits::max(), + "Index type not supported"); + + // TODO: Change the underlying implementation to remove the need to const_cast X_embedded. + return detail::trustworthiness_score( + handle, + X.data_handle(), + const_cast(X_embedded.data_handle()), + X.extent(0), + X.extent(1), + X_embedded.extent(1), + n_neighbors, + batch_size); +} } // namespace stats } // namespace raft diff --git a/cpp/include/raft/stats/v_measure.cuh b/cpp/include/raft/stats/v_measure.cuh index dd6ebd9b15..b8f16695bc 100644 --- a/cpp/include/raft/stats/v_measure.cuh +++ b/cpp/include/raft/stats/v_measure.cuh @@ -18,6 +18,8 @@ #define __V_MEASURE_H #pragma once +#include +#include #include namespace raft { @@ -47,6 +49,41 @@ double v_measure(const T* truthClusterArray, truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream, beta); } +/** + * @brief Function to calculate the v-measure between two clusters + * + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @param[in] handle the raft handle + * @param[in] truth_cluster_array: the array of truth classes of type T + * @param[in] pred_cluster_array: the array of predicted classes of type T + * @param[in] lower_label_range: the lower bound of the range of labels + * @param[in] upper_label_range: the upper bound of the range of labels + * @param[in] beta: v_measure parameter + * @return the v-measure between the two clusters + */ +template +double v_measure(const raft::handle_t& handle, + raft::device_vector_view truth_cluster_array, + raft::device_vector_view pred_cluster_array, + value_t lower_label_range, + value_t upper_label_range, + double beta = 1.0) +{ + RAFT_EXPECTS(truth_cluster_array.extent(0) == pred_cluster_array.extent(0), + "Size mismatch betwen truth_cluster_array and pred_cluster_array"); + RAFT_EXPECTS(truth_cluster_array.is_exhaustive(), "truth_cluster_array must be contiguous"); + RAFT_EXPECTS(pred_cluster_array.is_exhaustive(), "pred_cluster_array must be contiguous"); + + return detail::v_measure(truth_cluster_array.data_handle(), + pred_cluster_array.data_handle(), + truth_cluster_array.extent(0), + lower_label_range, + upper_label_range, + handle.get_stream(), + beta); +} + }; // end namespace stats }; // end namespace raft diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/test/stats/adjusted_rand_index.cu index 473972ace4..f113af821d 100644 --- a/cpp/test/stats/adjusted_rand_index.cu +++ b/cpp/test/stats/adjusted_rand_index.cu @@ -18,8 +18,8 @@ #include #include #include +#include #include -#include #include #include @@ -40,11 +40,13 @@ struct adjustedRandIndexParam { template class adjustedRandIndexTest : public ::testing::TestWithParam { protected: - adjustedRandIndexTest() : firstClusterArray(0, stream), secondClusterArray(0, stream) {} + adjustedRandIndexTest() + : stream(handle.get_stream()), firstClusterArray(0, stream), secondClusterArray(0, stream) + { + } void SetUp() override { - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); params = ::testing::TestWithParam::GetParam(); nElements = params.nElements; @@ -62,11 +64,11 @@ class adjustedRandIndexTest : public ::testing::TestWithParam( - firstClusterArray.data(), secondClusterArray.data(), nElements, stream); + handle, + raft::make_device_vector_view(firstClusterArray.data(), nElements), + raft::make_device_vector_view(secondClusterArray.data(), nElements)); } - void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } - void SetUpDifferentArrays() { lowerLabelRange = params.lowerLabelRange; @@ -135,6 +137,8 @@ class adjustedRandIndexTest : public ::testing::TestWithParam firstClusterArray; @@ -142,7 +146,6 @@ class adjustedRandIndexTest : public ::testing::TestWithParam inputs = { diff --git a/cpp/test/stats/completeness_score.cu b/cpp/test/stats/completeness_score.cu index 6f6b5a8afb..2f8a40afdc 100644 --- a/cpp/test/stats/completeness_score.cu +++ b/cpp/test/stats/completeness_score.cu @@ -40,6 +40,8 @@ template class completenessTest : public ::testing::TestWithParam { protected: // the constructor + completenessTest() : stream(handle.get_stream()) {} + void SetUp() override { // getting the parameters @@ -64,9 +66,6 @@ class completenessTest : public ::testing::TestWithParam { } // allocating and initializing memory to the GPU - - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - rmm::device_uvector truthClusterArray(nElements, stream); rmm::device_uvector predClusterArray(nElements, stream); raft::update_device(truthClusterArray.data(), arr1.data(), (int)nElements, stream); @@ -92,18 +91,16 @@ class completenessTest : public ::testing::TestWithParam { if (nElements == 0) truthCompleteness = 1.0; // calling the completeness CUDA implementation - computedCompleteness = raft::stats::completeness_score(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + computedCompleteness = raft::stats::completeness_score( + handle, + raft::make_device_vector_view(truthClusterArray.data(), nElements), + raft::make_device_vector_view(predClusterArray.data(), nElements), + lowerLabelRange, + upperLabelRange); } - // the destructor - void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } - // declaring the data values + raft::handle_t handle; completenessParam params; T lowerLabelRange, upperLabelRange; int nElements = 0; diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu index 4785c739ed..7943610689 100644 --- a/cpp/test/stats/contingencyMatrix.cu +++ b/cpp/test/stats/contingencyMatrix.cu @@ -40,7 +40,7 @@ template class ContingencyMatrixTest : public ::testing::TestWithParam { protected: ContingencyMatrixTest() - : pWorkspace(0, stream), + : stream(handle.get_stream()), dY(0, stream), dYHat(0, stream), dComputedOutput(0, stream), @@ -80,7 +80,6 @@ class ContingencyMatrixTest : public ::testing::TestWithParam(dY.data(), numElements), + raft::make_host_scalar_view(&minLabel), + raft::make_host_scalar_view(&maxLabel)); } else { minLabel = lowerLabelRange; maxLabel = upperLabelRange; @@ -111,27 +114,19 @@ class ContingencyMatrixTest : public ::testing::TestWithParam(dY.data(), numElements), + raft::make_device_vector_view(dYHat.data(), numElements), + raft::make_device_matrix_view(dComputedOutput.data(), numUniqueClasses, numUniqueClasses), + std::make_optional(minLabel), + std::make_optional(maxLabel)); raft::interruptible::synchronize(stream); ASSERT_TRUE(raft::devArrMatch(dComputedOutput.data(), @@ -140,12 +135,11 @@ class ContingencyMatrixTest : public ::testing::TestWithParam())); } + raft::handle_t handle; ContingencyMatrixParam params; int numUniqueClasses = -1; T minLabel, maxLabel; cudaStream_t stream = 0; - size_t workspaceSz; - rmm::device_uvector pWorkspace; rmm::device_uvector dY, dYHat; rmm::device_uvector dComputedOutput, dGoldenOutput; }; diff --git a/cpp/test/stats/cov.cu b/cpp/test/stats/cov.cu index 4ed2215d91..890c5b7826 100644 --- a/cpp/test/stats/cov.cu +++ b/cpp/test/stats/cov.cu @@ -69,16 +69,23 @@ class CovTest : public ::testing::TestWithParam> { normal(handle, r, data.data(), len, params.mean, var); raft::stats::mean( mean_act.data(), data.data(), cols, rows, params.sample, params.rowMajor, stream); - cov(handle, - cov_act.data(), - data.data(), - mean_act.data(), - cols, - rows, - params.sample, - params.rowMajor, - params.stable, - stream); + if (params.rowMajor) { + using layout = raft::row_major; + cov(handle, + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_matrix_view(cov_act.data(), cols, cols), + params.sample, + params.stable); + } else { + using layout = raft::col_major; + cov(handle, + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_matrix_view(cov_act.data(), cols, cols), + params.sample, + params.stable); + } T data_h[6] = {1.0, 2.0, 5.0, 4.0, 2.0, 1.0}; T cov_cm_ref_h[4] = {4.3333, -2.8333, -2.8333, 2.333}; diff --git a/cpp/test/stats/dispersion.cu b/cpp/test/stats/dispersion.cu index afad286e98..4f18c9fb54 100644 --- a/cpp/test/stats/dispersion.cu +++ b/cpp/test/stats/dispersion.cu @@ -16,6 +16,7 @@ #include "../test_utils.h" #include +#include #include #include #include @@ -44,14 +45,13 @@ template template class DispersionTest : public ::testing::TestWithParam> { protected: - DispersionTest() : exp_mean(0, stream), act_mean(0, stream) {} + DispersionTest() : stream(handle.get_stream()), exp_mean(0, stream), act_mean(0, stream) {} void SetUp() override { params = ::testing::TestWithParam>::GetParam(); raft::random::RngState r(params.seed); int len = params.clusters * params.dim; - stream = handle.get_stream(); rmm::device_uvector data(len, stream); rmm::device_uvector counts(params.clusters, stream); exp_mean.resize(params.dim, stream); @@ -64,8 +64,12 @@ class DispersionTest : public ::testing::TestWithParam> { for (const auto& val : h_counts) { npoints += val; } - actualVal = dispersion( - data.data(), counts.data(), act_mean.data(), params.clusters, npoints, params.dim, stream); + actualVal = cluster_dispersion( + handle, + raft::make_device_matrix_view(data.data(), params.clusters, params.dim), + raft::make_device_vector_view(counts.data(), params.clusters), + std::make_optional(raft::make_device_vector_view(act_mean.data(), params.dim)), + npoints); expectedVal = T(0); std::vector h_data(len, T(0)); raft::update_host(&(h_data[0]), data.data(), len, stream); diff --git a/cpp/test/stats/entropy.cu b/cpp/test/stats/entropy.cu index a3703bdb14..04aa9f7a80 100644 --- a/cpp/test/stats/entropy.cu +++ b/cpp/test/stats/entropy.cu @@ -38,6 +38,8 @@ template class entropyTest : public ::testing::TestWithParam { protected: // the constructor + entropyTest() : stream(handle.get_stream()) {} + void SetUp() override { // getting the parameters @@ -74,17 +76,19 @@ class entropyTest : public ::testing::TestWithParam { } // allocating and initializing memory to the GPU - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); rmm::device_uvector clusterArray(nElements, stream); raft::update_device(clusterArray.data(), &arr1[0], (int)nElements, stream); raft::interruptible::synchronize(stream); // calling the entropy CUDA implementation - computedEntropy = raft::stats::entropy( - clusterArray.data(), nElements, lowerLabelRange, upperLabelRange, stream); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); + computedEntropy = + raft::stats::entropy(handle, + raft::make_device_vector_view(clusterArray.data(), nElements), + lowerLabelRange, + upperLabelRange); } + raft::handle_t handle; // declaring the data values entropyParam params; T lowerLabelRange, upperLabelRange; diff --git a/cpp/test/stats/histogram.cu b/cpp/test/stats/histogram.cu index 537bde2272..d9793a57df 100644 --- a/cpp/test/stats/histogram.cu +++ b/cpp/test/stats/histogram.cu @@ -84,8 +84,12 @@ class HistTest : public ::testing::TestWithParam { RAFT_CUDA_TRY( cudaMemsetAsync(ref_bins.data(), 0, sizeof(int) * params.nbins * params.ncols, stream)); naiveHist(ref_bins.data(), params.nbins, in.data(), params.nrows, params.ncols, stream); - histogram( - params.type, bins.data(), params.nbins, in.data(), params.nrows, params.ncols, stream); + histogram(handle, + params.type, + raft::make_device_matrix_view( + in.data(), params.nrows, params.ncols), + raft::make_device_matrix_view( + bins.data(), params.nbins, params.ncols)); handle.sync_stream(); } diff --git a/cpp/test/stats/homogeneity_score.cu b/cpp/test/stats/homogeneity_score.cu index 729863003d..9bd6d9266b 100644 --- a/cpp/test/stats/homogeneity_score.cu +++ b/cpp/test/stats/homogeneity_score.cu @@ -47,6 +47,7 @@ class homogeneityTest : public ::testing::TestWithParam { nElements = params.nElements; lowerLabelRange = params.lowerLabelRange; upperLabelRange = params.upperLabelRange; + stream = handle.get_stream(); // generating random value test input std::vector arr1(nElements, 0); @@ -63,9 +64,6 @@ class homogeneityTest : public ::testing::TestWithParam { } // allocating and initializing memory to the GPU - - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - rmm::device_uvector truthClusterArray(nElements, stream); rmm::device_uvector predClusterArray(nElements, stream); raft::update_device(truthClusterArray.data(), &arr1[0], (int)nElements, stream); @@ -91,16 +89,16 @@ class homogeneityTest : public ::testing::TestWithParam { if (nElements == 0) truthHomogeneity = 1.0; // calling the homogeneity CUDA implementation - computedHomogeneity = raft::stats::homogeneity_score(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); + computedHomogeneity = raft::stats::homogeneity_score( + handle, + raft::make_device_vector_view(truthClusterArray.data(), nElements), + raft::make_device_vector_view(predClusterArray.data(), nElements), + lowerLabelRange, + upperLabelRange); } // declaring the data values + raft::handle_t handle; homogeneityParam params; T lowerLabelRange, upperLabelRange; int nElements = 0; diff --git a/cpp/test/stats/information_criterion.cu b/cpp/test/stats/information_criterion.cu index c4cf6e950a..4a9a2128c6 100644 --- a/cpp/test/stats/information_criterion.cu +++ b/cpp/test/stats/information_criterion.cu @@ -89,13 +89,13 @@ class BatchedICTest : public ::testing::TestWithParam> { raft::update_device(loglike_d.data(), loglike_h.data(), params.batch_size, stream); // Compute the tested results - information_criterion_batched(res_d.data(), - loglike_d.data(), - params.ic_type, - params.n_params, - params.batch_size, - params.n_samples, - stream); + information_criterion_batched( + handle, + raft::make_device_vector_view(loglike_d.data(), params.batch_size), + raft::make_device_vector_view(res_d.data(), params.batch_size), + params.ic_type, + params.n_params, + params.n_samples); // Compute the expected results naive_ic(res_h.data(), diff --git a/cpp/test/stats/kl_divergence.cu b/cpp/test/stats/kl_divergence.cu index e25f1c3bc5..58a64f7199 100644 --- a/cpp/test/stats/kl_divergence.cu +++ b/cpp/test/stats/kl_divergence.cu @@ -39,6 +39,7 @@ class klDivergenceTest : public ::testing::TestWithParam { { // getting the parameters params = ::testing::TestWithParam::GetParam(); + stream = handle.get_stream(); nElements = params.nElements; @@ -54,8 +55,6 @@ class klDivergenceTest : public ::testing::TestWithParam { h_candidatePDF.begin(), h_candidatePDF.end(), [&]() { return realGenerator(dre); }); // allocating and initializing memory to the GPU - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - rmm::device_uvector d_modelPDF(nElements, stream); rmm::device_uvector d_candidatePDF(nElements, stream); RAFT_CUDA_TRY(cudaMemset(d_modelPDF.data(), 0, d_modelPDF.size() * sizeof(DataT))); @@ -74,12 +73,14 @@ class klDivergenceTest : public ::testing::TestWithParam { } // calling the kl_divergence CUDA implementation - computedklDivergence = - raft::stats::kl_divergence(d_modelPDF.data(), d_candidatePDF.data(), nElements, stream); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); + computedklDivergence = raft::stats::kl_divergence( + handle, + raft::make_device_vector_view(d_modelPDF.data(), nElements), + raft::make_device_vector_view(d_candidatePDF.data(), nElements)); } // declaring the data values + raft::handle_t handle; klDivergenceParam params; int nElements = 0; DataT truthklDivergence = 0; diff --git a/cpp/test/stats/mean.cu b/cpp/test/stats/mean.cu index bec7a3adce..b299f81f68 100644 --- a/cpp/test/stats/mean.cu +++ b/cpp/test/stats/mean.cu @@ -49,7 +49,7 @@ class MeanTest : public ::testing::TestWithParam> { rows(params.rows), cols(params.cols), data(rows * cols, stream), - mean_act(rows * cols, stream) + mean_act(cols, stream) { } @@ -65,7 +65,19 @@ class MeanTest : public ::testing::TestWithParam> { void meanSGtest(T* data, cudaStream_t stream) { int rows = params.rows, cols = params.cols; - mean(mean_act.data(), data, cols, rows, params.sample, params.rowMajor, stream); + if (params.rowMajor) { + using layout = raft::row_major; + mean(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + params.sample); + } else { + using layout = raft::col_major; + mean(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + params.sample); + } } protected: diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu index c4f979d82e..30dcdd475b 100644 --- a/cpp/test/stats/mean_center.cu +++ b/cpp/test/stats/mean_center.cu @@ -58,18 +58,26 @@ class MeanCenterTest : public ::testing::TestWithParam(data.data(), rows, cols), + raft::make_device_vector_view(meanVec.data(), meanVecSize), + raft::make_device_matrix_view(out.data(), rows, cols), + params.bcastAlongRows); + } else { + using layout = raft::col_major; + mean_center(handle, + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(meanVec.data(), meanVecSize), + raft::make_device_matrix_view(out.data(), rows, cols), + params.bcastAlongRows); + } raft::linalg::naiveMatVec(out_ref.data(), data.data(), meanVec.data(), diff --git a/cpp/test/stats/meanvar.cu b/cpp/test/stats/meanvar.cu index 74e52e670d..424395c5e8 100644 --- a/cpp/test/stats/meanvar.cu +++ b/cpp/test/stats/meanvar.cu @@ -67,14 +67,24 @@ class MeanVarTest : public ::testing::TestWithParam> { { random::RngState r(params.seed); normal(handle, r, data.data(), params.cols * params.rows, params.mean, params.stddev); - meanvar(mean_act.data(), - vars_act.data(), - data.data(), - params.cols, - params.rows, - params.sample, - params.rowMajor, - stream); + + if (params.rowMajor) { + using layout = raft::row_major; + meanvar( + handle, + raft::make_device_matrix_view(data.data(), params.rows, params.cols), + raft::make_device_vector_view(mean_act.data(), params.cols), + raft::make_device_vector_view(vars_act.data(), params.cols), + params.sample); + } else { + using layout = raft::col_major; + meanvar( + handle, + raft::make_device_matrix_view(data.data(), params.rows, params.cols), + raft::make_device_vector_view(mean_act.data(), params.cols), + raft::make_device_vector_view(vars_act.data(), params.cols), + params.sample); + } RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); } diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu index 0468ebb177..a2ba6bfc9e 100644 --- a/cpp/test/stats/minmax.cu +++ b/cpp/test/stats/minmax.cu @@ -17,6 +17,7 @@ #include "../test_utils.h" #include #include +#include #include #include #include @@ -117,16 +118,15 @@ class MinMaxTest : public ::testing::TestWithParam> { minmax_ref.data(), minmax_ref.data() + params.cols, stream); - minmax(data.data(), - nullptr, - nullptr, - params.rows, - params.cols, - params.rows, - minmax_act.data(), - minmax_act.data() + params.cols, - nullptr, - stream); + raft::stats::minmax( + handle, + raft::make_device_matrix_view( + data.data(), params.rows, params.cols), + std::nullopt, + std::nullopt, + raft::make_device_vector_view(minmax_act.data(), params.cols), + raft::make_device_vector_view(minmax_act.data() + params.cols, params.cols), + std::nullopt); } protected: diff --git a/cpp/test/stats/mutual_info_score.cu b/cpp/test/stats/mutual_info_score.cu index 6bf3e6623f..fb9362df52 100644 --- a/cpp/test/stats/mutual_info_score.cu +++ b/cpp/test/stats/mutual_info_score.cu @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -104,7 +104,7 @@ class mutualInfoTest : public ::testing::TestWithParam { truthmutualInfo /= nElements; // allocating and initializing memory to the GPU - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + stream = handle.get_stream(); rmm::device_uvector firstClusterArray(nElements, stream); rmm::device_uvector secondClusterArray(nElements, stream); @@ -117,18 +117,16 @@ class mutualInfoTest : public ::testing::TestWithParam { raft::update_device(secondClusterArray.data(), &arr2[0], (int)nElements, stream); // calling the mutualInfo CUDA implementation - computedmutualInfo = raft::stats::mutual_info_score(firstClusterArray.data(), - secondClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + computedmutualInfo = raft::stats::mutual_info_score( + handle, + raft::make_device_vector_view(firstClusterArray.data(), nElements), + raft::make_device_vector_view(secondClusterArray.data(), nElements), + lowerLabelRange, + upperLabelRange); } - // the destructor - void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } - // declaring the data values + raft::handle_t handle; mutualInfoParam params; T lowerLabelRange, upperLabelRange; int nElements = 0; diff --git a/cpp/test/stats/rand_index.cu b/cpp/test/stats/rand_index.cu index ca1c4dd5e8..67e4ab5517 100644 --- a/cpp/test/stats/rand_index.cu +++ b/cpp/test/stats/rand_index.cu @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -77,7 +78,7 @@ class randIndexTest : public ::testing::TestWithParam { truthRandIndex = (double)(((double)(a_truth + b_truth)) / (double)nChooseTwo); // allocating and initializing memory to the GPU - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + stream = handle.get_stream(); rmm::device_uvector firstClusterArray(size, stream); rmm::device_uvector secondClusterArray(size, stream); @@ -90,14 +91,14 @@ class randIndexTest : public ::testing::TestWithParam { raft::update_device(secondClusterArray.data(), &arr2[0], (int)size, stream); // calling the rand_index CUDA implementation - computedRandIndex = - raft::stats::rand_index(firstClusterArray.data(), secondClusterArray.data(), size, stream); + computedRandIndex = raft::stats::rand_index( + handle, + raft::make_device_vector_view(firstClusterArray.data(), size), + raft::make_device_vector_view(secondClusterArray.data(), size)); } - // the destructor - void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } - // declaring the data values + raft::handle_t handle; randIndexParam params; int lowerLabelRange = 0, upperLabelRange = 2; uint64_t size = 0; diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu index f885c1034f..37a6fff786 100644 --- a/cpp/test/stats/silhouette_score.cu +++ b/cpp/test/stats/silhouette_score.cu @@ -173,25 +173,22 @@ class silhouetteScoreTest : public ::testing::TestWithParam(d_X.data(), nRows, nCols), + raft::make_device_vector_view(d_labels.data(), nRows), + std::make_optional(raft::make_device_vector_view(sampleSilScore.data(), nRows)), + nLabels, + params.metric); + + batchedSilhouetteScore = raft::stats::silhouette_score_batched( + handle, + raft::make_device_matrix_view(d_X.data(), nRows, nCols), + raft::make_device_vector_view(d_labels.data(), nRows), + std::make_optional(raft::make_device_vector_view(sampleSilScore.data(), nRows)), + nLabels, + chunk, + params.metric); } // declaring the data values diff --git a/cpp/test/stats/stddev.cu b/cpp/test/stats/stddev.cu index 70d99c2aeb..73f30f17e9 100644 --- a/cpp/test/stats/stddev.cu +++ b/cpp/test/stats/stddev.cu @@ -73,14 +73,43 @@ class StdDevTest : public ::testing::TestWithParam> { { int rows = params.rows, cols = params.cols; - mean(mean_act.data(), data, cols, rows, params.sample, params.rowMajor, stream); - - stddev( - stddev_act.data(), data, mean_act.data(), cols, rows, params.sample, params.rowMajor, stream); - - vars( - vars_act.data(), data, mean_act.data(), cols, rows, params.sample, params.rowMajor, stream); - + if (params.rowMajor) { + using layout_t = raft::row_major; + mean(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + params.sample); + + stddev(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(stddev_act.data(), cols), + params.sample); + + vars(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(vars_act.data(), cols), + params.sample); + } else { + using layout_t = raft::col_major; + mean(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + params.sample); + + stddev(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(stddev_act.data(), cols), + params.sample); + + vars(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(vars_act.data(), cols), + params.sample); + } raft::matrix::seqRoot(vars_act.data(), T(1), cols, stream); } diff --git a/cpp/test/stats/sum.cu b/cpp/test/stats/sum.cu index 7a16dbde4a..e67988abb0 100644 --- a/cpp/test/stats/sum.cu +++ b/cpp/test/stats/sum.cu @@ -65,7 +65,9 @@ class SumTest : public ::testing::TestWithParam> { } raft::update_device(data.data(), data_h, len, stream); - sum(sum_act.data(), data.data(), cols, rows, false, stream); + sum(handle, + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(sum_act.data(), cols)); handle.sync_stream(stream); } diff --git a/cpp/test/stats/trustworthiness.cu b/cpp/test/stats/trustworthiness.cu index ae596d0535..cbb8228f8f 100644 --- a/cpp/test/stats/trustworthiness.cu +++ b/cpp/test/stats/trustworthiness.cu @@ -320,10 +320,17 @@ class TrustworthinessScoreTest : public ::testing::Test { raft::update_device(d_X.data(), X.data(), X.size(), stream); raft::update_device(d_X_embedded.data(), X_embedded.data(), X_embedded.size(), stream); + auto n_sample = 50; + auto n_features_origin = 30; + auto n_features_embedded = 8; // euclidean test - score = trustworthiness_score( - handle, d_X.data(), d_X_embedded.data(), 50, 30, 8, 5); + score = trustworthiness_score( + handle, + raft::make_device_matrix_view(d_X.data(), n_sample, n_features_origin), + raft::make_device_matrix_view( + d_X_embedded.data(), n_sample, n_features_embedded), + 5); } void SetUp() override { basicTest(); } diff --git a/cpp/test/stats/v_measure.cu b/cpp/test/stats/v_measure.cu index 22dcefba0c..0cbc2da7d9 100644 --- a/cpp/test/stats/v_measure.cu +++ b/cpp/test/stats/v_measure.cu @@ -65,7 +65,7 @@ class vMeasureTest : public ::testing::TestWithParam { // allocating and initializing memory to the GPU - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + stream = handle.get_stream(); rmm::device_uvector truthClusterArray(nElements, stream); rmm::device_uvector predClusterArray(nElements, stream); raft::update_device(truthClusterArray.data(), &arr1[0], (int)nElements, stream); @@ -93,19 +93,17 @@ class vMeasureTest : public ::testing::TestWithParam { truthVMeasure = ((1 + params.beta) * truthHomogeity * truthCompleteness / (params.beta * truthHomogeity + truthCompleteness)); // calling the v_measure CUDA implementation - computedVMeasure = raft::stats::v_measure(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream, - params.beta); + computedVMeasure = raft::stats::v_measure( + handle, + raft::make_device_vector_view(truthClusterArray.data(), nElements), + raft::make_device_vector_view(predClusterArray.data(), nElements), + lowerLabelRange, + upperLabelRange, + params.beta); } - // the destructor - void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } - // declaring the data values + raft::handle_t handle; vMeasureParam params; T lowerLabelRange, upperLabelRange; int nElements = 0; diff --git a/cpp/test/stats/weighted_mean.cu b/cpp/test/stats/weighted_mean.cu index 5ff8454490..ec99d5a627 100644 --- a/cpp/test/stats/weighted_mean.cu +++ b/cpp/test/stats/weighted_mean.cu @@ -294,4 +294,4 @@ TEST_P(WeightedMeanTestD, Result) INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestD, ::testing::ValuesIn(inputsd)); }; // end namespace stats -}; // end namespace raft +}; // end namespace raft \ No newline at end of file