From 853d984d831de7cbe196425b7ed2e2a6e8e37583 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 25 Jul 2022 09:50:44 +0200 Subject: [PATCH 01/40] Add mdspan for cov and mean_center --- cpp/include/raft/stats/cov.cuh | 30 +++++++++ cpp/include/raft/stats/detail/cov.cuh | 65 ++++++++++++++++++ cpp/include/raft/stats/detail/mean_center.cuh | 66 +++++++++++++++++++ cpp/include/raft/stats/mean_center.cuh | 44 ++++++++++++- 4 files changed, 204 insertions(+), 1 deletion(-) diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 06e8ba0215..362c2f2bbb 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { namespace stats { @@ -57,6 +58,35 @@ void cov(const raft::handle_t& handle, { detail::cov(handle, covar, data, mu, D, N, sample, rowMajor, stable, stream); } + +/** + * @brief Compute covariance of the input matrix + * + * Mean operation is assumed to be performed on a given column. + * + * @tparam Type the data type + * @tparam LayoutPolicy Layout type of the input matrix. + * @param handle the raft handle + * @param data the input matrix (this will get mean-centered at the end!) + * @param mu mean vector of the input matrix + * @param covar the output covariance matrix + * @param sample whether to evaluate sample covariance or not. In other words, + * whether to normalize the output using N-1 or N, for true or false, + * respectively + * @param stable whether to run the slower-but-numerically-stable version or not + * @note if stable=true, then the input data will be mean centered after this + * function returns! + */ +template +void cov(const raft::handle_t& handle, + const raft::device_matrix_view& data, + const raft::device_vector_view& mu, + const raft::device_matrix_view& covar, + bool sample, + bool stable) +{ + detail::cov(handle, data, mu, covar, sample, stable); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/detail/cov.cuh b/cpp/include/raft/stats/detail/cov.cuh index 24de58dd91..0cda611514 100644 --- a/cpp/include/raft/stats/detail/cov.cuh +++ b/cpp/include/raft/stats/detail/cov.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -90,6 +91,70 @@ void cov(const raft::handle_t& handle, } RAFT_CUDA_TRY(cudaPeekAtLastError()); } + +/** + * @brief Compute covariance of the input matrix + * + * Mean operation is assumed to be performed on a given column. + * + * @tparam Type the data type + * @tparam LayoutPolicy Layout type of the input matrix. + * @param handle the raft handle + * @param data the input matrix (this will get mean-centered at the end!) + * @param mu mean vector of the input matrix + * @param covar the output covariance matrix + * @param sample whether to evaluate sample covariance or not. In other words, + * whether to normalize the output using N-1 or N, for true or false, + * respectively + * @note if stable=true, then the input data will be mean centered after this + * function returns! + */ +template +void cov(const raft::handle_t& handle, + const raft::device_matrix_view& data, + const raft::device_vector_view& mu, + const raft::device_matrix_view& covar, + bool sample, + bool stable) +{ + if (stable) { + cublasHandle_t cublas_h = handle.get_cublas_handle(); + cudaStream_t stream = handle.get_stream(); + + // since mean operation is assumed to be along a given column, broadcast + // must be along rows! + raft::stats::meanCenter(handle, data, mu, data, true); + Type alpha = Type(1) / (sample ? Type(N - 1) : Type(N)); + Type beta = Type(0); + auto N = data.extent(0); + auto D = data.extent(1); + if constexpr (LayoutPolicy == raft::row_major) { + // #TODO: Call from public API when ready + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublas_h, + CUBLAS_OP_N, + CUBLAS_OP_T, + D, + D, + N, + &alpha, + data.data(), + D, + data.data(), + D, + &beta, + covar.data(), + D, + stream)); + } else { + raft::linalg::gemm( + handle, data.data(), N, D, data.data(), covar.data(), D, D, CUBLAS_OP_T, CUBLAS_OP_N, alpha, beta, stream); + } + } else { + ///@todo: implement this using cutlass + customized epilogue! + ASSERT(false, "cov: Implement stable=false case!"); + } + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} }; // end namespace detail }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/detail/mean_center.cuh b/cpp/include/raft/stats/detail/mean_center.cuh index 1a4fc20c51..5ab65eea4d 100644 --- a/cpp/include/raft/stats/detail/mean_center.cuh +++ b/cpp/include/raft/stats/detail/mean_center.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -60,6 +61,39 @@ void meanCenter(Type* out, stream); } +/** + * @brief Center the input matrix wrt its mean + * @tparam Type the data type + * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam TPB threads per block of the cuda kernel launched + * @param handle the raft handle + * @param data input matrix + * @param mu the mean vector + * @param out the output mean-centered matrix + * @param bcastAlongRows whether to broadcast vector along rows or columns + */ +template +void meanCenter(const raft::handle_t& handle, + const raft::device_matrix_view& data, + const raft::device_vector_view& mu, + const raft::device_matrix_view& out, + bool bcastAlongRows) +{ + cudaStream_t stream = handle.get_stream(); + auto N = data.extent(0); + auto D = data.extent(1); + raft::linalg::matrixVectorOp( + out.data(), + data.data(), + mu.data(), + D, + N, + std::is_same_v, + bcastAlongRows, + [] __device__(Type a, Type b) { return a - b; }, + stream); +} + /** * @brief Add the input matrix wrt its mean * @tparam Type the data type @@ -96,6 +130,38 @@ void meanAdd(Type* out, stream); } +/** + * @brief Add the input matrix wrt its mean + * @tparam Type the data type + * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam TPB threads per block of the cuda kernel launched + * @param handle the raft handle + * @param data input matrix + * @param mu the mean vector + * @param out the output mean-centered matrix + * @param bcastAlongRows whether to broadcast vector along rows or columns + */ +template +void meanAdd(const raft::handle_t& handle, + const raft::device_matrix_view& data, + const raft::device_vector_view& mu, + const raft::device_matrix_view& out, + bool bcastAlongRows) +{ + cudaStream_t stream = handle.get_stream(); + auto N = data.extent(0); + auto D = data.extent(1); + raft::linalg::matrixVectorOp( + out.data(), + data.data(), + mu.data(), + D, + N, + std::is_same_v, + bcastAlongRows, + [] __device__(Type a, Type b) { return a + b; }, + stream); +} }; // end namespace detail }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 3b2222ef52..696ba8d045 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -19,7 +19,8 @@ #pragma once -#include "detail/mean_center.cuh" +#include +#include namespace raft { namespace stats { @@ -51,6 +52,27 @@ void meanCenter(Type* out, detail::meanCenter(out, data, mu, D, N, rowMajor, bcastAlongRows, stream); } +/** + * @brief Center the input matrix wrt its mean + * @tparam Type the data type + * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam TPB threads per block of the cuda kernel launched + * @param handle the raft handle + * @param data input matrix + * @param mu the mean vector + * @param out the output mean-centered matrix + * @param bcastAlongRows whether to broadcast vector along rows or columns + */ +template +void meanCenter(const raft::handle_t& handle, + const raft::device_matrix_view& data, + const raft::device_vector_view& mu, + const raft::device_matrix_view& out, + bool bcastAlongRows) +{ + detail::meanCenter(handle, data, mu, out, bcastAlongRows); +} + /** * @brief Add the input matrix wrt its mean * @tparam Type the data type @@ -78,6 +100,26 @@ void meanAdd(Type* out, detail::meanAdd(out, data, mu, D, N, rowMajor, bcastAlongRows, stream); } +/** + * @brief Add the input matrix wrt its mean + * @tparam Type the data type + * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam TPB threads per block of the cuda kernel launched + * @param handle the raft handle + * @param data input matrix + * @param mu the mean vector + * @param out the output mean-added matrix + * @param bcastAlongRows whether to broadcast vector along rows or columns + */ +template +void meanAdd(const raft::handle_t& handle, + const raft::device_matrix_view& data, + const raft::device_vector_view& mu, + const raft::device_matrix_view& out, + bool bcastAlongRows) +{ + detail::meanAdd(handle, data, mu, out, bcastAlongRows); +} }; // end namespace stats }; // end namespace raft From 2b5218c1afb6c6fc86deffe1872bef670d83a7d1 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 25 Jul 2022 19:41:43 +0200 Subject: [PATCH 02/40] Change only public API --- cpp/include/raft/stats/cov.cuh | 3 +- cpp/include/raft/stats/detail/cov.cuh | 65 ------------------ cpp/include/raft/stats/detail/mean_center.cuh | 66 ------------------- cpp/include/raft/stats/dispersion.cuh | 31 +++++++++ cpp/include/raft/stats/entropy.cuh | 19 ++++++ cpp/include/raft/stats/mean_center.cuh | 22 ++++--- 6 files changed, 64 insertions(+), 142 deletions(-) diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 362c2f2bbb..508be8cc9b 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -85,7 +85,8 @@ void cov(const raft::handle_t& handle, bool sample, bool stable) { - detail::cov(handle, data, mu, covar, sample, stable); + detail::cov(handle, covar.data(), data.data(), mu.data(), data.extent(1), data.extent(0), + std::is_same_v, sample, stable, handle.get_stream()); } }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/detail/cov.cuh b/cpp/include/raft/stats/detail/cov.cuh index 0cda611514..24de58dd91 100644 --- a/cpp/include/raft/stats/detail/cov.cuh +++ b/cpp/include/raft/stats/detail/cov.cuh @@ -16,7 +16,6 @@ #pragma once -#include #include #include @@ -91,70 +90,6 @@ void cov(const raft::handle_t& handle, } RAFT_CUDA_TRY(cudaPeekAtLastError()); } - -/** - * @brief Compute covariance of the input matrix - * - * Mean operation is assumed to be performed on a given column. - * - * @tparam Type the data type - * @tparam LayoutPolicy Layout type of the input matrix. - * @param handle the raft handle - * @param data the input matrix (this will get mean-centered at the end!) - * @param mu mean vector of the input matrix - * @param covar the output covariance matrix - * @param sample whether to evaluate sample covariance or not. In other words, - * whether to normalize the output using N-1 or N, for true or false, - * respectively - * @note if stable=true, then the input data will be mean centered after this - * function returns! - */ -template -void cov(const raft::handle_t& handle, - const raft::device_matrix_view& data, - const raft::device_vector_view& mu, - const raft::device_matrix_view& covar, - bool sample, - bool stable) -{ - if (stable) { - cublasHandle_t cublas_h = handle.get_cublas_handle(); - cudaStream_t stream = handle.get_stream(); - - // since mean operation is assumed to be along a given column, broadcast - // must be along rows! - raft::stats::meanCenter(handle, data, mu, data, true); - Type alpha = Type(1) / (sample ? Type(N - 1) : Type(N)); - Type beta = Type(0); - auto N = data.extent(0); - auto D = data.extent(1); - if constexpr (LayoutPolicy == raft::row_major) { - // #TODO: Call from public API when ready - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublas_h, - CUBLAS_OP_N, - CUBLAS_OP_T, - D, - D, - N, - &alpha, - data.data(), - D, - data.data(), - D, - &beta, - covar.data(), - D, - stream)); - } else { - raft::linalg::gemm( - handle, data.data(), N, D, data.data(), covar.data(), D, D, CUBLAS_OP_T, CUBLAS_OP_N, alpha, beta, stream); - } - } else { - ///@todo: implement this using cutlass + customized epilogue! - ASSERT(false, "cov: Implement stable=false case!"); - } - RAFT_CUDA_TRY(cudaPeekAtLastError()); -} }; // end namespace detail }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/detail/mean_center.cuh b/cpp/include/raft/stats/detail/mean_center.cuh index 5ab65eea4d..1a4fc20c51 100644 --- a/cpp/include/raft/stats/detail/mean_center.cuh +++ b/cpp/include/raft/stats/detail/mean_center.cuh @@ -16,7 +16,6 @@ #pragma once -#include #include #include #include @@ -61,39 +60,6 @@ void meanCenter(Type* out, stream); } -/** - * @brief Center the input matrix wrt its mean - * @tparam Type the data type - * @tparam LayoutPolicy Layout type of the input matrix. - * @tparam TPB threads per block of the cuda kernel launched - * @param handle the raft handle - * @param data input matrix - * @param mu the mean vector - * @param out the output mean-centered matrix - * @param bcastAlongRows whether to broadcast vector along rows or columns - */ -template -void meanCenter(const raft::handle_t& handle, - const raft::device_matrix_view& data, - const raft::device_vector_view& mu, - const raft::device_matrix_view& out, - bool bcastAlongRows) -{ - cudaStream_t stream = handle.get_stream(); - auto N = data.extent(0); - auto D = data.extent(1); - raft::linalg::matrixVectorOp( - out.data(), - data.data(), - mu.data(), - D, - N, - std::is_same_v, - bcastAlongRows, - [] __device__(Type a, Type b) { return a - b; }, - stream); -} - /** * @brief Add the input matrix wrt its mean * @tparam Type the data type @@ -130,38 +96,6 @@ void meanAdd(Type* out, stream); } -/** - * @brief Add the input matrix wrt its mean - * @tparam Type the data type - * @tparam LayoutPolicy Layout type of the input matrix. - * @tparam TPB threads per block of the cuda kernel launched - * @param handle the raft handle - * @param data input matrix - * @param mu the mean vector - * @param out the output mean-centered matrix - * @param bcastAlongRows whether to broadcast vector along rows or columns - */ -template -void meanAdd(const raft::handle_t& handle, - const raft::device_matrix_view& data, - const raft::device_vector_view& mu, - const raft::device_matrix_view& out, - bool bcastAlongRows) -{ - cudaStream_t stream = handle.get_stream(); - auto N = data.extent(0); - auto D = data.extent(1); - raft::linalg::matrixVectorOp( - out.data(), - data.data(), - mu.data(), - D, - N, - std::is_same_v, - bcastAlongRows, - [] __device__(Type a, Type b) { return a + b; }, - stream); -} }; // end namespace detail }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index c868092517..56198128f7 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -19,6 +19,8 @@ #pragma once +#include +#include #include namespace raft { @@ -55,6 +57,35 @@ DataT dispersion(const DataT* centroids, centroids, clusterSizes, globalCentroid, nClusters, nPoints, dim, stream); } +/** + * @brief Compute cluster dispersion metric. This is very useful for + * automatically finding the 'k' (in kmeans) that improves this metric. + * @tparam DataT data type + * @tparam IdxT index type + * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam TPB threads block for kernels launched + * @param handle the raft handle + * @param centroids the cluster centroids. This is assumed to be row-major + * and of dimension (nClusters x dim) + * @param clusterSizes number of points in the dataset which belong to each + * cluster. This is of length nClusters + * @param globalCentroid compute the global weighted centroid of all cluster + * centroids. This is of length dim. Use std::nullopt to not return it. + * @param nPoints number of points in the dataset + * @return the cluster dispersion value + */ +template +DataT dispersion( + const raft::handle_t& handle, + const raft::device_matrix_view& centroids, + const raft::device_vector_view& clusterSizes, + const std::optional>& globalCentroid, + const IdxT nPoints) +{ + return detail::dispersion( + centroids.data(), clusterSizes.data(), globalCentroid.data(), centroids.extent(0), nPoints, centroids.extent(1), handle.get_stream()); +} + } // end namespace stats } // end namespace raft diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index 59cbbd368f..58ee48bbc2 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -18,6 +18,7 @@ #define __ENTROPY_H #pragma once +#include #include namespace raft { @@ -44,6 +45,24 @@ double entropy(const T* clusterArray, return detail::entropy(clusterArray, size, lowerLabelRange, upperLabelRange, stream); } +/** + * @brief Function to calculate entropy + * more info on entropy + * + * @param handle the raft handle + * @param clusterArray: the array of classes of type T + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + * @return the entropy score + */ +template +double entropy(const raft::handle_t& handle, + const raft::device_vector_view& clusterArray, + const T lowerLabelRange, + const T upperLabelRange) +{ + return detail::entropy(clusterArray.data(), clusterArray.extent(0), lowerLabelRange, upperLabelRange, handle.get_stream()); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 696ba8d045..10797fd2e1 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -63,14 +63,15 @@ void meanCenter(Type* out, * @param out the output mean-centered matrix * @param bcastAlongRows whether to broadcast vector along rows or columns */ -template +template void meanCenter(const raft::handle_t& handle, - const raft::device_matrix_view& data, - const raft::device_vector_view& mu, - const raft::device_matrix_view& out, + const raft::device_matrix_view& data, + const raft::device_vector_view& mu, + const raft::device_matrix_view& out, bool bcastAlongRows) { - detail::meanCenter(handle, data, mu, out, bcastAlongRows); + detail::meanCenter(out.data(), data.data(), mu.data(), data.extent(1), data.extent(0), + std::is_same_v, bcastAlongRows, handle.get_stream()); } /** @@ -111,14 +112,15 @@ void meanAdd(Type* out, * @param out the output mean-added matrix * @param bcastAlongRows whether to broadcast vector along rows or columns */ -template +template void meanAdd(const raft::handle_t& handle, - const raft::device_matrix_view& data, - const raft::device_vector_view& mu, - const raft::device_matrix_view& out, + const raft::device_matrix_view& data, + const raft::device_vector_view& mu, + const raft::device_matrix_view& out, bool bcastAlongRows) { - detail::meanAdd(handle, data, mu, out, bcastAlongRows); + detail::meanAdd(out.data(), data.data(), mu.data(), data.extent(1), data.extent(0), + std::is_same_v, bcastAlongRows, handle.get_stream()); } }; // end namespace stats }; // end namespace raft From 9c97e018d22456616967fb97109b502c1999caac Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 25 Jul 2022 22:21:01 +0200 Subject: [PATCH 03/40] Add accuracy, randIndex, completeness and contingency --- cpp/include/raft/stats/accuracy.cuh | 14 ++++ .../raft/stats/adjusted_rand_index.cuh | 19 +++++ cpp/include/raft/stats/completeness_score.cuh | 21 +++++ cpp/include/raft/stats/contingency_matrix.cuh | 76 +++++++++++++++++++ 4 files changed, 130 insertions(+) diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index 250ce579e5..e04a75c9e9 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -39,6 +40,19 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, return detail::accuracy_score(predictions, ref_predictions, n, stream); } +/** + * @brief Compute accuracy of predictions. Useful for classification. + * @tparam math_t: data type for predictions (e.g., int for classification) + * @param[in] handle: the raft handle. + * @param[in] predictions: array of predictions (GPU pointer). + * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). + * @return: Accuracy score in [0, 1]; higher is better. + */ +template +float accuracy(const raft::handle_t& handle, const raft::device_vector_view& predictions, const raft::device_vector_view& ref_predictions) +{ + return detail::accuracy_score(predictions.data(), ref_predictions.data(), predictions.extent(0), handle.get_stream()); +} } // namespace stats } // namespace raft diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index a59d7b4c81..d4acb73e6d 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -24,6 +24,7 @@ #pragma once +#include #include namespace raft { @@ -48,6 +49,24 @@ double adjusted_rand_index(const T* firstClusterArray, return detail::compute_adjusted_rand_index(firstClusterArray, secondClusterArray, size, stream); } +/** + * @brief Function to calculate Adjusted RandIndex as described + * here + * @tparam T data-type for input label arrays + * @tparam MathT integral data-type used for computing n-choose-r + * @param handle: the raft handle. + * @param firstClusterArray: the array of classes + * @param secondClusterArray: the array of classes + */ +template +double adjusted_rand_index(const raft::handle_t& handle, + const raft::device_vector_view& firstClusterArray, + const raft::device_vector_view& secondClusterArray) +{ + return detail::compute_adjusted_rand_index(firstClusterArray.data(), secondClusterArray.data(), + firstClusterArray.extent(0), handle.get_stream()); +} + }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 407986de05..1686086667 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -46,6 +47,26 @@ double completeness_score(const T* truthClusterArray, predClusterArray, truthClusterArray, size, lowerLabelRange, upperLabelRange, stream); } +/** + * @brief Function to calculate the completeness score between two clusters + * + * @param handle: the raft handle. + * @param truthClusterArray: the array of truth classes of type T + * @param predClusterArray: the array of predicted classes of type T + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + */ +template +double completeness_score(const raft::handle_t& handle, + const raft::device_vector_view& truthClusterArray, + const raft::device_vector_view& predClusterArray, + T lowerLabelRange, + T upperLabelRange) +{ + return detail::homogeneity_score( + predClusterArray.data(), truthClusterArray.data(), truthClusterArray.extent(0), lowerLabelRange, upperLabelRange, handle.get_stream()); +} + }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 081782432c..8f77afb7fe 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -40,6 +41,25 @@ void getInputClassCardinality( detail::getInputClassCardinality(groundTruth, nSamples, stream, minLabel, maxLabel); } +/** + * @brief use this to allocate output matrix size + * size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int) + * @param handle: the raft handle. + * @param groundTruth: device 1-d array for ground truth (num of rows) + * @param nSamples: number of elements in input array + * @param minLabel: [out] calculated min value in input array + * @param maxLabel: [out] calculated max value in input array + */ +template +void getInputClassCardinality( + const raft::handle_t& handle, + const raft::device_vector_view& groundTruth, + const raft::host_scalar_view& minLabel, + const raft::host_scalar_view& maxLabel) +{ + detail::getInputClassCardinality(groundTruth.data(), groundTruth.extent(0), handle.get_stream(), *minLabel.data(), *maxLabel.data()); +} + /** * @brief Calculate workspace size for running contingency matrix calculations * @tparam T label type @@ -61,6 +81,26 @@ size_t getContingencyMatrixWorkspaceSize(int nSamples, nSamples, groundTruth, stream, minLabel, maxLabel); } +/** + * @brief Calculate workspace size for running contingency matrix calculations + * @tparam T label type + * @tparam OutT output matrix type + * @param handle: the raft handle. + * @param groundTruth: device 1-d array for ground truth (num of rows) + * @param minLabel: Optional, min value in input array + * @param maxLabel: Optional, max value in input array + */ +template +size_t getContingencyMatrixWorkspaceSize( + const raft::handle_t& handle, + const raft::device_vector_view& groundTruth, + T minLabel = std::numeric_limits::max(), + T maxLabel = std::numeric_limits::max()) +{ + return detail::getContingencyMatrixWorkspaceSize( + groundTruth.extent(0), groundTruth.data(), handle.get_stream(), minLabel, maxLabel); +} + /** * @brief contruct contingency matrix given input ground truth and prediction * labels. Users should call function getInputClassCardinality to find @@ -100,6 +140,42 @@ void contingencyMatrix(const T* groundTruth, maxLabel); } +/** + * @brief contruct contingency matrix given input ground truth and prediction + * labels. Users should call function getInputClassCardinality to find + * and allocate memory for output. Similarly workspace requirements + * should be checked using function getContingencyMatrixWorkspaceSize + * @tparam T label type + * @tparam OutT output matrix type + * @param handle: the raft handle. + * @param groundTruth: device 1-d array for ground truth (num of rows) + * @param predictedLabel: device 1-d array for prediction (num of columns) + * @param outMat: output buffer for contingecy matrix + * @param workspace: Optional, workspace memory allocation + * @param workspaceSize: Optional, size of workspace memory + * @param minLabel: Optional, min value in input ground truth array + * @param maxLabel: Optional, max value in input ground truth array + */ +template +void contingencyMatrix(const raft::device_vector_view& groundTruth, + const raft::device_vector_view& predictedLabel, + const raft::device_matrix_view& outMat, + void* workspace = nullptr, + size_t workspaceSize = 0, + T minLabel = std::numeric_limits::max(), + T maxLabel = std::numeric_limits::max()) +{ + detail::contingencyMatrix(groundTruth.data(), + predictedLabel.data(), + groundTruth.extent(0), + outMat.data(), + handle.get_stream(), + workspace, + workspaceSize, + minLabel, + maxLabel); +} + }; // namespace stats }; // namespace raft From 15913897646cf21c42c39f0458ace1708d8263a5 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Thu, 11 Aug 2022 18:54:44 +0200 Subject: [PATCH 04/40] Update meanvar --- cpp/include/raft/stats/contingency_matrix.cuh | 3 +- cpp/include/raft/stats/cov.cuh | 9 +++--- cpp/include/raft/stats/dispersion.cuh | 10 +++--- cpp/include/raft/stats/entropy.cuh | 6 ++-- cpp/include/raft/stats/histogram.cuh | 29 ++++++++++++++++- cpp/include/raft/stats/homogeneity_score.cuh | 21 ++++++++++++ .../raft/stats/information_criterion.cuh | 27 ++++++++++++++++ cpp/include/raft/stats/kl_divergence.cuh | 19 +++++++++++ cpp/include/raft/stats/mean.cuh | 29 +++++++++++++++-- cpp/include/raft/stats/mean_center.cuh | 3 +- cpp/include/raft/stats/meanvar.cuh | 32 ++++++++++++++++++- 11 files changed, 171 insertions(+), 17 deletions(-) diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 8f77afb7fe..6d42daaca9 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -157,7 +157,8 @@ void contingencyMatrix(const T* groundTruth, * @param maxLabel: Optional, max value in input ground truth array */ template -void contingencyMatrix(const raft::device_vector_view& groundTruth, +void contingencyMatrix(const raft::handle_t& handle, + const raft::device_vector_view& groundTruth, const raft::device_vector_view& predictedLabel, const raft::device_matrix_view& outMat, void* workspace = nullptr, diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 508be8cc9b..94fe5d40c2 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -65,6 +65,7 @@ void cov(const raft::handle_t& handle, * Mean operation is assumed to be performed on a given column. * * @tparam Type the data type + * @tparam IdxT the index type * @tparam LayoutPolicy Layout type of the input matrix. * @param handle the raft handle * @param data the input matrix (this will get mean-centered at the end!) @@ -77,11 +78,11 @@ void cov(const raft::handle_t& handle, * @note if stable=true, then the input data will be mean centered after this * function returns! */ -template +template void cov(const raft::handle_t& handle, - const raft::device_matrix_view& data, - const raft::device_vector_view& mu, - const raft::device_matrix_view& covar, + const raft::device_matrix_view& data, + const raft::device_vector_view& mu, + const raft::device_matrix_view& covar, bool sample, bool stable) { diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index 56198128f7..58be4ee1d7 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -53,7 +53,7 @@ DataT dispersion(const DataT* centroids, IdxT dim, cudaStream_t stream) { - return detail::dispersion( + return detail::dispersion( centroids, clusterSizes, globalCentroid, nClusters, nPoints, dim, stream); } @@ -77,12 +77,12 @@ DataT dispersion(const DataT* centroids, template DataT dispersion( const raft::handle_t& handle, - const raft::device_matrix_view& centroids, - const raft::device_vector_view& clusterSizes, - const std::optional>& globalCentroid, + const raft::device_matrix_view& centroids, + const raft::device_vector_view& clusterSizes, + const std::optional>& globalCentroid, const IdxT nPoints) { - return detail::dispersion( + return detail::dispersion( centroids.data(), clusterSizes.data(), globalCentroid.data(), centroids.extent(0), nPoints, centroids.extent(1), handle.get_stream()); } diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index 58ee48bbc2..e3d9ef1a6f 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -28,6 +28,7 @@ namespace stats { * @brief Function to calculate entropy * more info on entropy * + * @tparam T data type * @param clusterArray: the array of classes of type T * @param size: the size of the data points of type int * @param lowerLabelRange: the lower bound of the range of labels @@ -49,15 +50,16 @@ double entropy(const T* clusterArray, * @brief Function to calculate entropy * more info on entropy * + * @tparam T data type * @param handle the raft handle * @param clusterArray: the array of classes of type T * @param lowerLabelRange: the lower bound of the range of labels * @param upperLabelRange: the upper bound of the range of labels * @return the entropy score */ -template +template double entropy(const raft::handle_t& handle, - const raft::device_vector_view& clusterArray, + const raft::device_vector_view& clusterArray, const T lowerLabelRange, const T upperLabelRange) { diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index e8176ebc92..8dd69b911b 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include #include @@ -42,7 +43,7 @@ namespace stats { * @param nbins number of bins * @param data input data (length = ncols * nrows) * @param nrows data array length in each column (or batch) - * @param ncols number of columsn (or batch size) + * @param ncols number of columns (or batch size) * @param stream cuda stream * @param binner the operation that computes the bin index of the input data * @@ -61,6 +62,32 @@ void histogram(HistType type, detail::histogram(type, bins, nbins, data, nrows, ncols, stream, binner); } + +/** + * @brief Perform histogram on the input data. It chooses the right load size + * based on the input data vector length. It also supports large-bin cases + * using a specialized smem-based hashing technique. + * @tparam DataT input data type + * @tparam IdxT data type used to compute indices + * @tparam BinnerOp takes the input data and computes its bin index + * @param handle the raft handle + * @param type histogram implementation type to choose + * @param bins the output bins (length = ncols * nbins) + * @param data input data (length = ncols * nrows) + * @param binner the operation that computes the bin index of the input data + * + * @note signature of BinnerOp is `int func(DataT, IdxT);` + */ +template > +void histogram(const raft::handle_t& handle, + HistType type, + const raft::device_matrix_view& bins, + const raft::device_matrix_view& data, + BinnerOp binner = IdentityBinner()) +{ + detail::histogram(type, bins.data_handle(), bins.extent(1), data.data_handle(), data.extent(0), + data.extent(1), handle.get_stream(), binner); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index 5fe92db78a..c6a2f1739b 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -47,6 +48,26 @@ double homogeneity_score(const T* truthClusterArray, truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); } +/** + * @brief Function to calculate the homogeneity score between two clusters + * more info on mutual + * information + * @param handle the raft handle + * @param truthClusterArray: the array of truth classes of type T + * @param predClusterArray: the array of predicted classes of type T + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + */ +template +double homogeneity_score(const raft::handle_t& handle, + const raft::device_vector_view& truthClusterArray, + const raft::device_vector_view& predClusterArray, + T lowerLabelRange, + T upperLabelRange) +{ + return detail::homogeneity_score( + truthClusterArray.data_handle(), predClusterArray.data_handle(), truthClusterArray.extent(0), lowerLabelRange, upperLabelRange, handle.get_stream()); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/information_criterion.cuh b/cpp/include/raft/stats/information_criterion.cuh index 0744dcdffe..9c7cff1e3c 100644 --- a/cpp/include/raft/stats/information_criterion.cuh +++ b/cpp/include/raft/stats/information_criterion.cuh @@ -29,6 +29,7 @@ #pragma once +#include #include #include @@ -63,6 +64,32 @@ void information_criterion_batched(ScalarT* d_ic, d_ic, d_loglikelihood, ic_type, n_params, batch_size, n_samples, stream); } +/** + * Compute the given type of information criterion + * + * @note: it is safe to do the computation in-place (i.e give same pointer + * as input and output) + * + * @param[in] handle the raft handle + * @param[out] d_ic Information criterion to be returned for each + * series (device) length: batch_size + * @param[in] d_loglikelihood Log-likelihood for each series (device) length: batch_size + * @param[in] ic_type Type of criterion to compute. See IC_Type + * @param[in] n_params Number of parameters in the model + * @param[in] n_samples Number of samples in each series + */ +template +void information_criterion_batched(const raft::handle_t& handle, + const raft::device_vector_view& d_ic, + const raft::device_vector_view& d_loglikelihood, + IC_Type ic_type, + IdxT n_params, + IdxT n_samples) +{ + batched::detail::information_criterion( + d_ic.data_handle(), d_loglikelihood.data_handle(), ic_type, n_params, d_ic.extent(0), n_samples, handle.get_stream()); +} + } // namespace stats } // namespace raft #endif diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index b29f277b4a..052599267c 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -41,6 +42,24 @@ DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, return detail::kl_divergence(modelPDF, candidatePDF, size, stream); } +/** + * @brief Function to calculate KL Divergence + * more info on KL + * Divergence + * + * @tparam DataT: Data type of the input array + * @param handle the raft handle + * @param modelPDF: the model array of probability density functions of type DataT + * @param candidatePDF: the candidate array of probability density functions of type DataT + */ +template +DataT kl_divergence(const raft::handle_t& handle, + const raft::device_vector_view& modelPDF, + const raft::device_vector_view candidatePDF) +{ + return detail::kl_divergence(modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.extent(0), handle.get_stream()); +} + }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index eed3159d5d..7d0282d61e 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -19,9 +19,9 @@ #pragma once -#include "detail/mean.cuh" - +#include #include +#include namespace raft { namespace stats { @@ -50,6 +50,31 @@ void mean( detail::mean(mu, data, D, N, sample, rowMajor, stream); } +/** + * @brief Compute mean of the input matrix + * + * Mean operation is assumed to be performed on a given column. + * + * @tparam Type: the data type + * @tparam IdxType Integer type used to for addressing + * @tparam LayoutPolicy Layout type of the input matrix. + * @param handle the raft handle + * @param mu: the output mean vector + * @param data: the input matrix + * @param sample: whether to evaluate sample mean or not. In other words, whether + * to normalize the output using N-1 or N, for true or false, respectively + */ +template +void mean(const raft::handle_t& handle, + const raft::device_vector_view& mu, + const raft::device_matrix_view& data, + bool sample) +{ + detail::mean(mu.data_handle(), data.data_handle(), data.extent(1), + data.extent(0), sample, std::is_same_v, + handle.get_stream()); +} + }; // namespace stats }; // namespace raft diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 10797fd2e1..0c9abbccfa 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -104,6 +104,7 @@ void meanAdd(Type* out, /** * @brief Add the input matrix wrt its mean * @tparam Type the data type + * @tparam IdxType Integer type used for addressing * @tparam LayoutPolicy Layout type of the input matrix. * @tparam TPB threads per block of the cuda kernel launched * @param handle the raft handle @@ -112,7 +113,7 @@ void meanAdd(Type* out, * @param out the output mean-added matrix * @param bcastAlongRows whether to broadcast vector along rows or columns */ -template +template void meanAdd(const raft::handle_t& handle, const raft::device_matrix_view& data, const raft::device_vector_view& mu, diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index 0c3c423493..76e2c25017 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -18,7 +18,8 @@ #pragma once -#include "detail/meanvar.cuh" +#include +#include namespace raft::stats { @@ -55,6 +56,35 @@ void meanvar(Type* mean, detail::meanvar(mean, var, data, D, N, sample, rowMajor, stream); } +/** + * @brief Compute mean and variance for each column of a given matrix. + * + * The operation is performed in a single sweep. Consider using it when you need to compute + * both mean and variance, or when you need to compute variance but don't have the mean. + * It's almost twice faster than running `mean` and `vars` sequentially, because all three + * kernels are memory-bound. + * + * @tparam Type the data type + * @tparam IdxType Integer type used for addressing + * @tparam LayoutPolicy Layout type of the input matrix. + * @param handle the raft handle + * @param [out] mean the output mean vector of size D + * @param [out] var the output variance vector of size D + * @param [in] data the input matrix of size [N, D] + * @param [in] sample whether to evaluate sample variance or not. In other words, whether to + * normalize the variance using N-1 or N, for true or false respectively. + */ +template +void meanvar(const raft::handle_t& handle, + const raft::device_vector_view& mean, + const raft::device_vector_view& var, + const raft::device_matrix_view& data, + bool sample) +{ + detail::meanvar(mean.data_handle(), var.data_handle(), data.data_handle(), data.extent(1), data.extent(0), + sample, std::is_same_v, handle.get_stream()); +} + }; // namespace raft::stats #endif From a6cdb62d6e69d3f7c265a924ee548de05d1fdb94 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Thu, 11 Aug 2022 18:55:49 +0200 Subject: [PATCH 05/40] Start using vanilla mdspan --- cpp/include/raft/stats/minmax.cuh | 59 +++++++++++++++++++++++++++++++ cpp/test/stats/minmax.cu | 25 +++++++------ 2 files changed, 74 insertions(+), 10 deletions(-) diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index 62533b1a00..654b0bd464 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -18,6 +18,8 @@ #pragma once +#include +#include #include #include #include @@ -68,6 +70,63 @@ void minmax(const T* data, data, rowids, colids, nrows, ncols, row_stride, globalmin, globalmax, sampledcols, stream); } + +/** + * @brief Computes min/max across every column of the input matrix, as well as + * optionally allow to subsample based on the given row/col ID mapping vectors + * + * @tparam T the data type + * @tparam TPB number of threads per block + * @param handle the raft handle + * @param data input data col-major of size [nrows, ncols], unless rowids or + * colids length is smaller + * @param rowids actual row ID mappings. It is of length nrows. If you want to + * skip this index lookup entirely, pass nullptr + * @param colids actual col ID mappings. It is of length ncols. If you want to + * skip this index lookup entirely, pass nullptr + * @param row_stride stride (in number of elements) between 2 adjacent columns + * @param globalmin final col-wise global minimum (size = ncols) + * @param globalmax final col-wise global maximum (size = ncols) + * @param sampledcols output sampled data. Pass nullptr if you don't need this + * @note This method makes the following assumptions: + * 1. input and output matrices are assumed to be col-major + * 2. ncols is small enough to fit the whole of min/max values across all cols + * in shared memory + */ +template +void minmax(const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, + std::optional, LayoutPolicy>> rowids, + std::optional, LayoutPolicy>> colids, + raft::mdspan, LayoutPolicy> globalmin, + raft::mdspan, LayoutPolicy> globalmax, + std::optional, LayoutPolicy>> sampledcols) +{ + static_assert(std::is_same_v, "Data should be col-major"); + const unsigned* rowids_ptr = nullptr; + const unsigned* colids_ptr = nullptr; + T* sampledcols_ptr = nullptr; + auto nrows = data.extent(0); + auto ncols = data.extent(1); + int row_stride = data.stride(0); + if (rowids.has_value()) + { + rowids_ptr = rowids.value().data_handle(); + nrows = rowids.value().extent(0); + } + if (colids.has_value()) + { + colids_ptr = colids.value().data_handle(); + ncols = colids.value().extent(0); + } + if (sampledcols.has_value()) + { + sampledcols_ptr = sampledcols.value().data_handle(); + } + detail::minmax( + data.data_handle(), rowids_ptr, colids_ptr, nrows, ncols, row_stride, globalmin.data_handle(), globalmax.data_handle(), sampledcols_ptr, handle.get_stream()); +} + }; // namespace stats }; // namespace raft #endif \ No newline at end of file diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu index 532932b6ba..9098b00077 100644 --- a/cpp/test/stats/minmax.cu +++ b/cpp/test/stats/minmax.cu @@ -17,6 +17,7 @@ #include "../test_utils.h" #include #include +#include #include #include #include @@ -111,22 +112,26 @@ class MinMaxTest : public ::testing::TestWithParam> { nanKernel<<>>( data.data(), mask.data(), len, std::numeric_limits::quiet_NaN()); RAFT_CUDA_TRY(cudaPeekAtLastError()); + auto data_view = raft::make_device_matrix_view( + data.data(), params.rows, params.cols); + std::optional> rowids = std::nullopt; + std::optional> colids = std::nullopt; + std::optional> sampledcols = std::nullopt; + auto globalmin = raft::make_device_vector_view(minmax_act.data(), params.cols); + auto globalmax = raft::make_device_vector_view(minmax_act.data() + params.cols, params.cols); naiveMinMax(data.data(), params.rows, params.cols, minmax_ref.data(), minmax_ref.data() + params.cols, stream); - minmax(data.data(), - nullptr, - nullptr, - params.rows, - params.cols, - params.rows, - minmax_act.data(), - minmax_act.data() + params.cols, - nullptr, - stream); + raft::stats::minmax(handle, + data_view, + rowids, + colids, + globalmin, + globalmax, + sampledcols); } protected: From 16c1d03db42bed7fd493fbf1220e8e2bc3ee3bae Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 15 Aug 2022 15:45:34 +0200 Subject: [PATCH 06/40] Add vanilla mdspan for stats public api --- cpp/include/raft/stats/accuracy.cuh | 16 ++++- .../raft/stats/adjusted_rand_index.cuh | 26 ++++++-- cpp/include/raft/stats/completeness_score.cuh | 29 ++++++--- cpp/include/raft/stats/contingency_matrix.cuh | 58 +++++++++++------- cpp/include/raft/stats/cov.cuh | 24 +++++--- cpp/include/raft/stats/minmax.cuh | 60 +++++++++++-------- cpp/test/stats/minmax.cu | 20 +++---- 7 files changed, 151 insertions(+), 82 deletions(-) diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index e04a75c9e9..f5f10e1ad3 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -43,15 +43,25 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, /** * @brief Compute accuracy of predictions. Useful for classification. * @tparam math_t: data type for predictions (e.g., int for classification) + * @tparam IdxType Index type of matrix extent. + * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can + * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param[in] handle: the raft handle. * @param[in] predictions: array of predictions (GPU pointer). * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). * @return: Accuracy score in [0, 1]; higher is better. */ -template -float accuracy(const raft::handle_t& handle, const raft::device_vector_view& predictions, const raft::device_vector_view& ref_predictions) +template +float accuracy(const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> predictions, + raft::mdspan, LayoutPolicy, AccessorPolicy> ref_predictions, { - return detail::accuracy_score(predictions.data(), ref_predictions.data(), predictions.extent(0), handle.get_stream()); + return detail::accuracy_score(predictions.data_handle(), + ref_predictions.data_handle(), + predictions.extent(0), + handle.get_stream()); } } // namespace stats } // namespace raft diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index d4acb73e6d..9c395d1db7 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -54,17 +54,31 @@ double adjusted_rand_index(const T* firstClusterArray, * here * @tparam T data-type for input label arrays * @tparam MathT integral data-type used for computing n-choose-r + * @tparam IdxType Index type of matrix extent. + * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can + * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param handle: the raft handle. * @param firstClusterArray: the array of classes * @param secondClusterArray: the array of classes */ -template -double adjusted_rand_index(const raft::handle_t& handle, - const raft::device_vector_view& firstClusterArray, - const raft::device_vector_view& secondClusterArray) +template +double adjusted_rand_index( + const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> + firstClusterArray, + raft::mdspan, LayoutPolicy, AccessorPolicy> + secondClusterArray) { - return detail::compute_adjusted_rand_index(firstClusterArray.data(), secondClusterArray.data(), - firstClusterArray.extent(0), handle.get_stream()); + return detail::compute_adjusted_rand_index(firstClusterArray.data_handle(), + secondClusterArray.data_handle(), + firstClusterArray.extent(0), + handle.get_stream()); } }; // end namespace stats diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 1686086667..586322771d 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -50,21 +50,34 @@ double completeness_score(const T* truthClusterArray, /** * @brief Function to calculate the completeness score between two clusters * + * @tparam T the data type + * @tparam IdxType Index type of matrix extent. + * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can + * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param handle: the raft handle. * @param truthClusterArray: the array of truth classes of type T * @param predClusterArray: the array of predicted classes of type T * @param lowerLabelRange: the lower bound of the range of labels * @param upperLabelRange: the upper bound of the range of labels */ -template -double completeness_score(const raft::handle_t& handle, - const raft::device_vector_view& truthClusterArray, - const raft::device_vector_view& predClusterArray, - T lowerLabelRange, - T upperLabelRange) +template +double completeness_score( + const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> + truthClusterArray, + raft::mdspan, LayoutPolicy, AccessorPolicy> + predClusterArray, + T lowerLabelRange, + T upperLabelRange) { - return detail::homogeneity_score( - predClusterArray.data(), truthClusterArray.data(), truthClusterArray.extent(0), lowerLabelRange, upperLabelRange, handle.get_stream()); + return detail::homogeneity_score(predClusterArray.data_handle(), + truthClusterArray.data_handle(), + truthClusterArray.extent(0), + lowerLabelRange, + upperLabelRange, + handle.get_stream()); } }; // end namespace stats diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 6d42daaca9..d040dbdd34 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -50,16 +50,20 @@ void getInputClassCardinality( * @param minLabel: [out] calculated min value in input array * @param maxLabel: [out] calculated max value in input array */ -template +template void getInputClassCardinality( const raft::handle_t& handle, - const raft::device_vector_view& groundTruth, - const raft::host_scalar_view& minLabel, + raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, + const raft::host_scalar_view& minLabel, const raft::host_scalar_view& maxLabel) { - detail::getInputClassCardinality(groundTruth.data(), groundTruth.extent(0), handle.get_stream(), *minLabel.data(), *maxLabel.data()); + detail::getInputClassCardinality(groundTruth.data_handle(), + groundTruth.extent(0), + handle.get_stream(), + *minLabel.data_handle(), + *maxLabel.data_handle()); } - + /** * @brief Calculate workspace size for running contingency matrix calculations * @tparam T label type @@ -90,15 +94,19 @@ size_t getContingencyMatrixWorkspaceSize(int nSamples, * @param minLabel: Optional, min value in input array * @param maxLabel: Optional, max value in input array */ -template +template size_t getContingencyMatrixWorkspaceSize( const raft::handle_t& handle, - const raft::device_vector_view& groundTruth, + raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, T minLabel = std::numeric_limits::max(), T maxLabel = std::numeric_limits::max()) { return detail::getContingencyMatrixWorkspaceSize( - groundTruth.extent(0), groundTruth.data(), handle.get_stream(), minLabel, maxLabel); + groundTruth.extent(0), groundTruth.data_handle(), handle.get_stream(), minLabel, maxLabel); } /** @@ -147,6 +155,11 @@ void contingencyMatrix(const T* groundTruth, * should be checked using function getContingencyMatrixWorkspaceSize * @tparam T label type * @tparam OutT output matrix type + * @tparam IdxType Index type of matrix extent. + * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can + * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param handle: the raft handle. * @param groundTruth: device 1-d array for ground truth (num of rows) * @param predictedLabel: device 1-d array for prediction (num of columns) @@ -156,20 +169,25 @@ void contingencyMatrix(const T* groundTruth, * @param minLabel: Optional, min value in input ground truth array * @param maxLabel: Optional, max value in input ground truth array */ -template -void contingencyMatrix(const raft::handle_t& handle, - const raft::device_vector_view& groundTruth, - const raft::device_vector_view& predictedLabel, - const raft::device_matrix_view& outMat, - void* workspace = nullptr, - size_t workspaceSize = 0, - T minLabel = std::numeric_limits::max(), - T maxLabel = std::numeric_limits::max()) +template +void contingencyMatrix( + const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, + raft::mdspan, LayoutPolicy, AccessorPolicy> predictedLabel, + raft::mdspan, LayoutPolicy, AccessorPolicy> outMat, + void* workspace = nullptr, + size_t workspaceSize = 0, + T minLabel = std::numeric_limits::max(), + T maxLabel = std::numeric_limits::max()) { - detail::contingencyMatrix(groundTruth.data(), - predictedLabel.data(), + detail::contingencyMatrix(groundTruth.data_handle(), + predictedLabel.data_handle(), groundTruth.extent(0), - outMat.data(), + outMat.data_handle(), handle.get_stream(), workspace, workspaceSize, diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 94fe5d40c2..cab1a5c790 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -78,16 +78,24 @@ void cov(const raft::handle_t& handle, * @note if stable=true, then the input data will be mean centered after this * function returns! */ -template +template void cov(const raft::handle_t& handle, - const raft::device_matrix_view& data, - const raft::device_vector_view& mu, - const raft::device_matrix_view& covar, - bool sample, - bool stable) + raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::mdspan, LayoutPolicy, AccessorPolicy> mu, + raft::mdspan, LayoutPolicy, AccessorPolicy> covar, + bool sample, + bool stable) { - detail::cov(handle, covar.data(), data.data(), mu.data(), data.extent(1), data.extent(0), - std::is_same_v, sample, stable, handle.get_stream()); + detail::cov(handle, + covar.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + sample, + stable, + handle.get_stream()); } }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index 654b0bd464..985defce26 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -70,12 +70,16 @@ void minmax(const T* data, data, rowids, colids, nrows, ncols, row_stride, globalmin, globalmax, sampledcols, stream); } - /** * @brief Computes min/max across every column of the input matrix, as well as * optionally allow to subsample based on the given row/col ID mapping vectors * - * @tparam T the data type + * @tparam T Data type of input matrix element. + * @tparam IndexType Index type of matrix extent. + * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can + * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @tparam TPB number of threads per block * @param handle the raft handle * @param data input data col-major of size [nrows, ncols], unless rowids or @@ -84,7 +88,6 @@ void minmax(const T* data, * skip this index lookup entirely, pass nullptr * @param colids actual col ID mappings. It is of length ncols. If you want to * skip this index lookup entirely, pass nullptr - * @param row_stride stride (in number of elements) between 2 adjacent columns * @param globalmin final col-wise global minimum (size = ncols) * @param globalmax final col-wise global maximum (size = ncols) * @param sampledcols output sampled data. Pass nullptr if you don't need this @@ -93,38 +96,45 @@ void minmax(const T* data, * 2. ncols is small enough to fit the whole of min/max values across all cols * in shared memory */ -template +template void minmax(const raft::handle_t& handle, raft::mdspan, LayoutPolicy, AccessorPolicy> data, - std::optional, LayoutPolicy>> rowids, - std::optional, LayoutPolicy>> colids, - raft::mdspan, LayoutPolicy> globalmin, - raft::mdspan, LayoutPolicy> globalmax, - std::optional, LayoutPolicy>> sampledcols) + std::optional>> rowids, + std::optional>> colids, + raft::mdspan> globalmin, + raft::mdspan> globalmax, + std::optional>> sampledcols) { static_assert(std::is_same_v, "Data should be col-major"); const unsigned* rowids_ptr = nullptr; const unsigned* colids_ptr = nullptr; - T* sampledcols_ptr = nullptr; - auto nrows = data.extent(0); - auto ncols = data.extent(1); - int row_stride = data.stride(0); - if (rowids.has_value()) - { + T* sampledcols_ptr = nullptr; + auto nrows = data.extent(0); + auto ncols = data.extent(1); + auto row_stride = data.stride(1); + if (rowids.has_value()) { rowids_ptr = rowids.value().data_handle(); - nrows = rowids.value().extent(0); + nrows = rowids.value().extent(0); } - if (colids.has_value()) - { + if (colids.has_value()) { colids_ptr = colids.value().data_handle(); - ncols = colids.value().extent(0); + ncols = colids.value().extent(0); } - if (sampledcols.has_value()) - { - sampledcols_ptr = sampledcols.value().data_handle(); - } - detail::minmax( - data.data_handle(), rowids_ptr, colids_ptr, nrows, ncols, row_stride, globalmin.data_handle(), globalmax.data_handle(), sampledcols_ptr, handle.get_stream()); + if (sampledcols.has_value()) { sampledcols_ptr = sampledcols.value().data_handle(); } + detail::minmax(data.data_handle(), + rowids_ptr, + colids_ptr, + nrows, + ncols, + row_stride, + globalmin.data_handle(), + globalmax.data_handle(), + sampledcols_ptr, + handle.get_stream()); } }; // namespace stats diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu index 9098b00077..958ff75856 100644 --- a/cpp/test/stats/minmax.cu +++ b/cpp/test/stats/minmax.cu @@ -114,24 +114,20 @@ class MinMaxTest : public ::testing::TestWithParam> { RAFT_CUDA_TRY(cudaPeekAtLastError()); auto data_view = raft::make_device_matrix_view( data.data(), params.rows, params.cols); - std::optional> rowids = std::nullopt; - std::optional> colids = std::nullopt; - std::optional> sampledcols = std::nullopt; - auto globalmin = raft::make_device_vector_view(minmax_act.data(), params.cols); - auto globalmax = raft::make_device_vector_view(minmax_act.data() + params.cols, params.cols); + std::optional> rowids = std::nullopt; + std::optional> colids = std::nullopt; + std::optional> sampledcols = std::nullopt; + auto globalmin = raft::make_device_vector_view(minmax_act.data(), params.cols); + auto globalmax = + raft::make_device_vector_view(minmax_act.data() + params.cols, params.cols); naiveMinMax(data.data(), params.rows, params.cols, minmax_ref.data(), minmax_ref.data() + params.cols, stream); - raft::stats::minmax(handle, - data_view, - rowids, - colids, - globalmin, - globalmax, - sampledcols); + raft::stats::minmax( + handle, data_view, rowids, colids, globalmin, globalmax, sampledcols); } protected: From 3b262cfb82b0914059bce830770f8eaeb4708488 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Fri, 19 Aug 2022 01:38:19 +0200 Subject: [PATCH 07/40] Fix comments, start adding tests --- cpp/include/raft/stats/accuracy.cuh | 5 +- .../raft/stats/adjusted_rand_index.cuh | 3 +- cpp/include/raft/stats/completeness_score.cuh | 3 +- cpp/include/raft/stats/contingency_matrix.cuh | 5 +- cpp/include/raft/stats/cov.cuh | 10 ++-- cpp/include/raft/stats/dispersion.cuh | 39 ++++++++++----- cpp/include/raft/stats/entropy.cuh | 14 ++++-- cpp/include/raft/stats/histogram.cuh | 28 ++++++++--- cpp/include/raft/stats/homogeneity_score.cuh | 24 ++++++--- .../raft/stats/information_criterion.cuh | 31 ++++++++---- cpp/include/raft/stats/kl_divergence.cuh | 15 ++++-- cpp/include/raft/stats/mean.cuh | 24 +++++---- cpp/include/raft/stats/mean_center.cuh | 49 +++++++++++++------ cpp/include/raft/stats/meanvar.cuh | 22 ++++++--- cpp/test/stats/adjusted_rand_index.cu | 16 +++--- cpp/test/stats/completeness_score.cu | 23 +++++---- cpp/test/stats/cov.cu | 27 ++++++---- 17 files changed, 218 insertions(+), 120 deletions(-) diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index f5f10e1ad3..37165c59e9 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -44,8 +44,7 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, * @brief Compute accuracy of predictions. Useful for classification. * @tparam math_t: data type for predictions (e.g., int for classification) * @tparam IdxType Index type of matrix extent. - * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can - * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * @tparam LayoutPolicy Layout type of the input data. * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on * device. * @param[in] handle: the raft handle. @@ -56,7 +55,7 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, template float accuracy(const raft::handle_t& handle, raft::mdspan, LayoutPolicy, AccessorPolicy> predictions, - raft::mdspan, LayoutPolicy, AccessorPolicy> ref_predictions, + raft::mdspan, LayoutPolicy, AccessorPolicy> ref_predictions) { return detail::accuracy_score(predictions.data_handle(), ref_predictions.data_handle(), diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 9c395d1db7..c7e9810cb7 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -55,8 +55,7 @@ double adjusted_rand_index(const T* firstClusterArray, * @tparam T data-type for input label arrays * @tparam MathT integral data-type used for computing n-choose-r * @tparam IdxType Index type of matrix extent. - * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can - * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * @tparam LayoutPolicy Layout type of the input data. * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on * device. * @param handle: the raft handle. diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 586322771d..a2d333af36 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -52,8 +52,7 @@ double completeness_score(const T* truthClusterArray, * * @tparam T the data type * @tparam IdxType Index type of matrix extent. - * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can - * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * @tparam LayoutPolicy Layout type of the input data. * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on * device. * @param handle: the raft handle. diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index d040dbdd34..7152a82d48 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -156,8 +156,7 @@ void contingencyMatrix(const T* groundTruth, * @tparam T label type * @tparam OutT output matrix type * @tparam IdxType Index type of matrix extent. - * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can - * be a submatrix of a larger matrix. Arbitrary stride is not supported. + * @tparam LayoutPolicy Layout type of the input data. * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on * device. * @param handle: the raft handle. @@ -178,7 +177,7 @@ void contingencyMatrix( const raft::handle_t& handle, raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, raft::mdspan, LayoutPolicy, AccessorPolicy> predictedLabel, - raft::mdspan, LayoutPolicy, AccessorPolicy> outMat, + raft::mdspan, LayoutPolicy> outMat, void* workspace = nullptr, size_t workspaceSize = 0, T minLabel = std::numeric_limits::max(), diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index cab1a5c790..0eb9f11867 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -66,11 +66,13 @@ void cov(const raft::handle_t& handle, * * @tparam Type the data type * @tparam IdxT the index type - * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam LayoutPolicy Layout type of the input data. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param handle the raft handle + * @param covar the output covariance matrix * @param data the input matrix (this will get mean-centered at the end!) * @param mu mean vector of the input matrix - * @param covar the output covariance matrix * @param sample whether to evaluate sample covariance or not. In other words, * whether to normalize the output using N-1 or N, for true or false, * respectively @@ -80,9 +82,9 @@ void cov(const raft::handle_t& handle, */ template void cov(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, - raft::mdspan, LayoutPolicy, AccessorPolicy> mu, raft::mdspan, LayoutPolicy, AccessorPolicy> covar, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::device_mdspan, LayoutPolicy> mu, bool sample, bool stable) { diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index 58be4ee1d7..4b9d6ea129 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -61,8 +61,10 @@ DataT dispersion(const DataT* centroids, * @brief Compute cluster dispersion metric. This is very useful for * automatically finding the 'k' (in kmeans) that improves this metric. * @tparam DataT data type - * @tparam IdxT index type - * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam IdxType index type + * @tparam LayoutPolicy Layout type of the input data. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @tparam TPB threads block for kernels launched * @param handle the raft handle * @param centroids the cluster centroids. This is assumed to be row-major @@ -74,18 +76,31 @@ DataT dispersion(const DataT* centroids, * @param nPoints number of points in the dataset * @return the cluster dispersion value */ -template -DataT dispersion( - const raft::handle_t& handle, - const raft::device_matrix_view& centroids, - const raft::device_vector_view& clusterSizes, - const std::optional>& globalCentroid, - const IdxT nPoints) +template +DataT dispersion(const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> centroids, + raft::mdspan> clusterSizes, + std::optional>> globalCentroid, + const IdxType nPoints) { - return detail::dispersion( - centroids.data(), clusterSizes.data(), globalCentroid.data(), centroids.extent(0), nPoints, centroids.extent(1), handle.get_stream()); + DataT* globalCentroid_ptr = nullptr; + if (globalCentroid.has_value()) + { + globalCentroid_ptr = globalCentroid.value().data_handle(); + } + return detail::dispersion(centroids.data_handle(), + clusterSizes.data_handle(), + globalCentroid_ptr, + centroids.extent(0), + nPoints, + centroids.extent(1), + handle.get_stream()); } - + } // end namespace stats } // end namespace raft diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index e3d9ef1a6f..bfdc768a8d 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -51,19 +51,27 @@ double entropy(const T* clusterArray, * more info on entropy * * @tparam T data type + * @tparam IdxT index type + * @tparam LayoutPolicy Layout type of the input data. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param handle the raft handle * @param clusterArray: the array of classes of type T * @param lowerLabelRange: the lower bound of the range of labels * @param upperLabelRange: the upper bound of the range of labels * @return the entropy score */ -template +template double entropy(const raft::handle_t& handle, - const raft::device_vector_view& clusterArray, + raft::mdspan, LayoutPolicy, AccessorPolicy> clusterArray, const T lowerLabelRange, const T upperLabelRange) { - return detail::entropy(clusterArray.data(), clusterArray.extent(0), lowerLabelRange, upperLabelRange, handle.get_stream()); + return detail::entropy(clusterArray.data_handle(), + clusterArray.extent(0), + lowerLabelRange, + upperLabelRange, + handle.get_stream()); } }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index 8dd69b911b..5dbc70c8f4 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -62,14 +62,16 @@ void histogram(HistType type, detail::histogram(type, bins, nbins, data, nrows, ncols, stream, binner); } - /** * @brief Perform histogram on the input data. It chooses the right load size * based on the input data vector length. It also supports large-bin cases * using a specialized smem-based hashing technique. * @tparam DataT input data type - * @tparam IdxT data type used to compute indices + * @tparam IdxType data type used to compute indices * @tparam BinnerOp takes the input data and computes its bin index + * @tparam LayoutPolicy Layout type of the input data. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param handle the raft handle * @param type histogram implementation type to choose * @param bins the output bins (length = ncols * nbins) @@ -78,15 +80,25 @@ void histogram(HistType type, * * @note signature of BinnerOp is `int func(DataT, IdxT);` */ -template > +template , + typename LayoutPolicy, + typename AccessorPolicy> void histogram(const raft::handle_t& handle, HistType type, - const raft::device_matrix_view& bins, - const raft::device_matrix_view& data, - BinnerOp binner = IdentityBinner()) + raft::mdspan, LayoutPolicy> bins, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, + BinnerOp binner = IdentityBinner()) { - detail::histogram(type, bins.data_handle(), bins.extent(1), data.data_handle(), data.extent(0), - data.extent(1), handle.get_stream(), binner); + detail::histogram(type, + bins.data_handle(), + bins.extent(1), + data.data_handle(), + data.extent(0), + data.extent(1), + handle.get_stream(), + binner); } }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index c6a2f1739b..cc9c024081 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -52,21 +52,31 @@ double homogeneity_score(const T* truthClusterArray, * @brief Function to calculate the homogeneity score between two clusters * more info on mutual * information + * + * @tparam T data type + * @tparam IdxType index type + * @tparam LayoutPolicy Layout type of the input data. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param handle the raft handle * @param truthClusterArray: the array of truth classes of type T * @param predClusterArray: the array of predicted classes of type T * @param lowerLabelRange: the lower bound of the range of labels * @param upperLabelRange: the upper bound of the range of labels */ -template +template double homogeneity_score(const raft::handle_t& handle, - const raft::device_vector_view& truthClusterArray, - const raft::device_vector_view& predClusterArray, - T lowerLabelRange, - T upperLabelRange) + raft::mdspan, LayoutPolicy, AccessorPolicy> truthClusterArray, + raft::mdspan, LayoutPolicy, AccessorPolicy> predClusterArray, + T lowerLabelRange, + T upperLabelRange) { - return detail::homogeneity_score( - truthClusterArray.data_handle(), predClusterArray.data_handle(), truthClusterArray.extent(0), lowerLabelRange, upperLabelRange, handle.get_stream()); + return detail::homogeneity_score(truthClusterArray.data_handle(), + predClusterArray.data_handle(), + truthClusterArray.extent(0), + lowerLabelRange, + upperLabelRange, + handle.get_stream()); } }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/information_criterion.cuh b/cpp/include/raft/stats/information_criterion.cuh index 9c7cff1e3c..7a96dc4277 100644 --- a/cpp/include/raft/stats/information_criterion.cuh +++ b/cpp/include/raft/stats/information_criterion.cuh @@ -70,6 +70,11 @@ void information_criterion_batched(ScalarT* d_ic, * @note: it is safe to do the computation in-place (i.e give same pointer * as input and output) * + * @tparam ScalarT data type + * @tparam IdxType index type + * @tparam LayoutPolicy Layout type of the input data. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param[in] handle the raft handle * @param[out] d_ic Information criterion to be returned for each * series (device) length: batch_size @@ -78,18 +83,24 @@ void information_criterion_batched(ScalarT* d_ic, * @param[in] n_params Number of parameters in the model * @param[in] n_samples Number of samples in each series */ -template -void information_criterion_batched(const raft::handle_t& handle, - const raft::device_vector_view& d_ic, - const raft::device_vector_view& d_loglikelihood, - IC_Type ic_type, - IdxT n_params, - IdxT n_samples) +template +void information_criterion_batched( + const raft::handle_t& handle, + raft::mdspan> d_ic, + raft::mdspan, LayoutPolicy, AccessorPolicy> d_loglikelihood, + IC_Type ic_type, + IdxType n_params, + IdxType n_samples) { - batched::detail::information_criterion( - d_ic.data_handle(), d_loglikelihood.data_handle(), ic_type, n_params, d_ic.extent(0), n_samples, handle.get_stream()); + batched::detail::information_criterion(d_ic.data_handle(), + d_loglikelihood.data_handle(), + ic_type, + n_params, + d_ic.extent(0), + n_samples, + handle.get_stream()); } - + } // namespace stats } // namespace raft #endif diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index 052599267c..df5d0965c0 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -48,16 +48,21 @@ DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, * Divergence * * @tparam DataT: Data type of the input array + * @tparam IdxType index type + * @tparam LayoutPolicy Layout type of the input data. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param handle the raft handle * @param modelPDF: the model array of probability density functions of type DataT * @param candidatePDF: the candidate array of probability density functions of type DataT */ -template -DataT kl_divergence(const raft::handle_t& handle, - const raft::device_vector_view& modelPDF, - const raft::device_vector_view candidatePDF) +template +DataT kl_divergence(const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> modelPDF, + raft::mdspan, LayoutPolicy, AccessorPolicy> candidatePDF) { - return detail::kl_divergence(modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.extent(0), handle.get_stream()); + return detail::kl_divergence( + modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.extent(0), handle.get_stream()); } }; // end namespace stats diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index 7d0282d61e..e9daecaa6f 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -55,26 +55,32 @@ void mean( * * Mean operation is assumed to be performed on a given column. * - * @tparam Type: the data type - * @tparam IdxType Integer type used to for addressing + * @tparam Type the data type + * @tparam IdxType index type * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param handle the raft handle * @param mu: the output mean vector * @param data: the input matrix * @param sample: whether to evaluate sample mean or not. In other words, whether * to normalize the output using N-1 or N, for true or false, respectively */ -template +template void mean(const raft::handle_t& handle, - const raft::device_vector_view& mu, - const raft::device_matrix_view& data, + raft::mdspan> mu, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, bool sample) { - detail::mean(mu.data_handle(), data.data_handle(), data.extent(1), - data.extent(0), sample, std::is_same_v, - handle.get_stream()); + detail::mean(mu.data_handle(), + data.data_handle(), + data.extent(1), + data.extent(0), + sample, + std::is_same_v, + handle.get_stream()); } - + }; // namespace stats }; // namespace raft diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 0c9abbccfa..9267a5524d 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -55,23 +55,32 @@ void meanCenter(Type* out, /** * @brief Center the input matrix wrt its mean * @tparam Type the data type + * @tparam IdxType index type * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @tparam TPB threads per block of the cuda kernel launched * @param handle the raft handle + * @param out the output mean-centered matrix * @param data input matrix * @param mu the mean vector - * @param out the output mean-centered matrix * @param bcastAlongRows whether to broadcast vector along rows or columns */ -template +template void meanCenter(const raft::handle_t& handle, - const raft::device_matrix_view& data, - const raft::device_vector_view& mu, - const raft::device_matrix_view& out, + raft::mdspan, LayoutPolicy> out, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::mdspan> mu, bool bcastAlongRows) { - detail::meanCenter(out.data(), data.data(), mu.data(), data.extent(1), data.extent(0), - std::is_same_v, bcastAlongRows, handle.get_stream()); + detail::meanCenter(out.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + bcastAlongRows, + handle.get_stream()); } /** @@ -104,24 +113,32 @@ void meanAdd(Type* out, /** * @brief Add the input matrix wrt its mean * @tparam Type the data type - * @tparam IdxType Integer type used for addressing + * @tparam IdxType index type * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @tparam TPB threads per block of the cuda kernel launched * @param handle the raft handle + * @param out the output mean-centered matrix * @param data input matrix * @param mu the mean vector - * @param out the output mean-added matrix * @param bcastAlongRows whether to broadcast vector along rows or columns */ -template +template void meanAdd(const raft::handle_t& handle, - const raft::device_matrix_view& data, - const raft::device_vector_view& mu, - const raft::device_matrix_view& out, - bool bcastAlongRows) + raft::mdspan, LayoutPolicy> out, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::mdspan> mu, + bool bcastAlongRows) { - detail::meanAdd(out.data(), data.data(), mu.data(), data.extent(1), data.extent(0), - std::is_same_v, bcastAlongRows, handle.get_stream()); + detail::meanAdd(out.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + bcastAlongRows, + handle.get_stream()); } }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index 76e2c25017..d071a8962d 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -67,6 +67,8 @@ void meanvar(Type* mean, * @tparam Type the data type * @tparam IdxType Integer type used for addressing * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on + * device. * @param handle the raft handle * @param [out] mean the output mean vector of size D * @param [out] var the output variance vector of size D @@ -74,17 +76,23 @@ void meanvar(Type* mean, * @param [in] sample whether to evaluate sample variance or not. In other words, whether to * normalize the variance using N-1 or N, for true or false respectively. */ -template +template void meanvar(const raft::handle_t& handle, - const raft::device_vector_view& mean, - const raft::device_vector_view& var, - const raft::device_matrix_view& data, + raft::mdspan> mean, + raft::mdspan> var, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, bool sample) { - detail::meanvar(mean.data_handle(), var.data_handle(), data.data_handle(), data.extent(1), data.extent(0), - sample, std::is_same_v, handle.get_stream()); + detail::meanvar(mean.data_handle(), + var.data_handle(), + data.data_handle(), + data.extent(1), + data.extent(0), + sample, + std::is_same_v, + handle.get_stream()); } - + }; // namespace raft::stats #endif diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/test/stats/adjusted_rand_index.cu index 4bacbadbf7..6f9cd34f61 100644 --- a/cpp/test/stats/adjusted_rand_index.cu +++ b/cpp/test/stats/adjusted_rand_index.cu @@ -18,7 +18,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -40,11 +41,10 @@ struct adjustedRandIndexParam { template class adjustedRandIndexTest : public ::testing::TestWithParam { protected: - adjustedRandIndexTest() : firstClusterArray(0, stream), secondClusterArray(0, stream) {} + adjustedRandIndexTest() : stream(handle.get_stream()), firstClusterArray(0, stream), secondClusterArray(0, stream) {} void SetUp() override { - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); params = ::testing::TestWithParam::GetParam(); nElements = params.nElements; @@ -61,12 +61,11 @@ class adjustedRandIndexTest : public ::testing::TestWithParam( - firstClusterArray.data(), secondClusterArray.data(), nElements, stream); + computed_adjusted_rand_index = adjusted_rand_index, false>>(handle, + raft::make_device_vector_view(firstClusterArray.data(), nElements), + raft::make_device_vector_view(secondClusterArray.data(), nElements)); } - void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } - void SetUpDifferentArrays() { lowerLabelRange = params.lowerLabelRange; @@ -135,6 +134,8 @@ class adjustedRandIndexTest : public ::testing::TestWithParam firstClusterArray; @@ -142,7 +143,6 @@ class adjustedRandIndexTest : public ::testing::TestWithParam inputs = { diff --git a/cpp/test/stats/completeness_score.cu b/cpp/test/stats/completeness_score.cu index f0f06614e3..b4bdf71321 100644 --- a/cpp/test/stats/completeness_score.cu +++ b/cpp/test/stats/completeness_score.cu @@ -40,6 +40,10 @@ template class completenessTest : public ::testing::TestWithParam { protected: // the constructor + completenessTest() : stream(handle.get_stream()) + { + } + void SetUp() override { // getting the parameters @@ -64,9 +68,6 @@ class completenessTest : public ::testing::TestWithParam { } // allocating and initializing memory to the GPU - - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - rmm::device_uvector truthClusterArray(nElements, stream); rmm::device_uvector predClusterArray(nElements, stream); raft::update_device(truthClusterArray.data(), arr1.data(), (int)nElements, stream); @@ -92,18 +93,16 @@ class completenessTest : public ::testing::TestWithParam { if (nElements == 0) truthCompleteness = 1.0; // calling the completeness CUDA implementation - computedCompleteness = raft::stats::completeness_score(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + computedCompleteness = raft::stats::completeness_score( + handle, + raft::make_device_vector_view(truthClusterArray.data(), nElements), + raft::make_device_vector_view(predClusterArray.data(), nElements), + lowerLabelRange, + upperLabelRange); } - // the destructor - void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } - // declaring the data values + raft::handle_t handle; completenessParam params; T lowerLabelRange, upperLabelRange; int nElements = 0; diff --git a/cpp/test/stats/cov.cu b/cpp/test/stats/cov.cu index d9cc3ec8be..2151d77bed 100644 --- a/cpp/test/stats/cov.cu +++ b/cpp/test/stats/cov.cu @@ -69,16 +69,25 @@ class CovTest : public ::testing::TestWithParam> { normal(handle, r, data.data(), len, params.mean, var); raft::stats::mean( mean_act.data(), data.data(), cols, rows, params.sample, params.rowMajor, stream); - cov(handle, - cov_act.data(), - data.data(), - mean_act.data(), - cols, - rows, + if (params.rowMajor) + { + using layout = raft::row_major; + cov(handle, + raft::make_device_matrix_view(cov_act.data(), cols, cols), + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), params.sample, - params.rowMajor, - params.stable, - stream); + params.stable); + } else { + using layout = raft::col_major; + cov(handle, + raft::make_device_matrix_view(cov_act.data(), cols, cols), + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + params.sample, + params.stable); + } + T data_h[6] = {1.0, 2.0, 5.0, 4.0, 2.0, 1.0}; T cov_cm_ref_h[4] = {4.3333, -2.8333, -2.8333, 2.333}; From 21f374ab9483a5dead00408a0c9b9f9cc530d06f Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Sun, 4 Sep 2022 01:37:42 +0200 Subject: [PATCH 08/40] Remove constness, add tests --- cpp/include/raft/stats/accuracy.cuh | 4 +-- .../raft/stats/adjusted_rand_index.cuh | 4 +-- cpp/include/raft/stats/completeness_score.cuh | 4 +-- cpp/include/raft/stats/contingency_matrix.cuh | 10 ++++---- cpp/include/raft/stats/cov.cuh | 2 +- cpp/include/raft/stats/dispersion.cuh | 8 +++--- cpp/include/raft/stats/entropy.cuh | 2 +- cpp/include/raft/stats/histogram.cuh | 4 +-- cpp/include/raft/stats/homogeneity_score.cuh | 4 +-- .../raft/stats/information_criterion.cuh | 4 +-- cpp/include/raft/stats/kl_divergence.cuh | 4 +-- cpp/include/raft/stats/mean.cuh | 4 +-- cpp/include/raft/stats/mean_center.cuh | 12 ++++----- cpp/include/raft/stats/meanvar.cuh | 6 ++--- cpp/include/raft/stats/minmax.cuh | 6 ++--- cpp/test/stats/completeness_score.cu | 4 +-- cpp/test/stats/contingencyMatrix.cu | 25 +++++++++---------- cpp/test/stats/cov.cu | 6 ++--- cpp/test/stats/dispersion.cu | 12 ++++++--- cpp/test/stats/entropy.cu | 10 +++++--- cpp/test/stats/histogram.cu | 5 +++- cpp/test/stats/homogeneity_score.cu | 6 ++--- cpp/test/stats/minmax.cu | 16 ++++++------ 23 files changed, 84 insertions(+), 78 deletions(-) diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index 37165c59e9..c60e86fe35 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -54,8 +54,8 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, */ template float accuracy(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> predictions, - raft::mdspan, LayoutPolicy, AccessorPolicy> ref_predictions) + raft::mdspan, LayoutPolicy, AccessorPolicy> predictions, + raft::mdspan, LayoutPolicy, AccessorPolicy> ref_predictions) { return detail::accuracy_score(predictions.data_handle(), ref_predictions.data_handle(), diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index c7e9810cb7..5a46636fdd 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -69,9 +69,9 @@ template double adjusted_rand_index( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> + raft::mdspan, LayoutPolicy, AccessorPolicy> firstClusterArray, - raft::mdspan, LayoutPolicy, AccessorPolicy> + raft::mdspan, LayoutPolicy, AccessorPolicy> secondClusterArray) { return detail::compute_adjusted_rand_index(firstClusterArray.data_handle(), diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index a2d333af36..252d9a08ce 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -64,9 +64,9 @@ double completeness_score(const T* truthClusterArray, template double completeness_score( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> + raft::mdspan, LayoutPolicy, AccessorPolicy> truthClusterArray, - raft::mdspan, LayoutPolicy, AccessorPolicy> + raft::mdspan, LayoutPolicy, AccessorPolicy> predClusterArray, T lowerLabelRange, T upperLabelRange) diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 7152a82d48..dcd5ebcead 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -53,7 +53,7 @@ void getInputClassCardinality( template void getInputClassCardinality( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, + raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, const raft::host_scalar_view& minLabel, const raft::host_scalar_view& maxLabel) { @@ -101,7 +101,7 @@ template size_t getContingencyMatrixWorkspaceSize( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, + raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, T minLabel = std::numeric_limits::max(), T maxLabel = std::numeric_limits::max()) { @@ -175,9 +175,9 @@ template void contingencyMatrix( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, - raft::mdspan, LayoutPolicy, AccessorPolicy> predictedLabel, - raft::mdspan, LayoutPolicy> outMat, + raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, + raft::mdspan, LayoutPolicy, AccessorPolicy> predictedLabel, + raft::mdspan, LayoutPolicy, AccessorPolicy> outMat, void* workspace = nullptr, size_t workspaceSize = 0, T minLabel = std::numeric_limits::max(), diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 0eb9f11867..fc0dc4043e 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -84,7 +84,7 @@ template , LayoutPolicy, AccessorPolicy> covar, raft::mdspan, LayoutPolicy, AccessorPolicy> data, - raft::device_mdspan, LayoutPolicy> mu, + raft::mdspan, LayoutPolicy, AccessorPolicy> mu, bool sample, bool stable) { diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index 4b9d6ea129..c4d311be0f 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -78,13 +78,13 @@ DataT dispersion(const DataT* centroids, */ template DataT dispersion(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> centroids, - raft::mdspan> clusterSizes, - std::optional>> globalCentroid, + raft::mdspan, LayoutPolicy, AccessorPolicy> centroids, + raft::mdspan, LayoutPolicy, AccessorPolicy> clusterSizes, + std::optional, LayoutPolicy, AccessorPolicy>> globalCentroid, const IdxType nPoints) { DataT* globalCentroid_ptr = nullptr; diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index bfdc768a8d..4fa6400a9d 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -63,7 +63,7 @@ double entropy(const T* clusterArray, */ template double entropy(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> clusterArray, + raft::mdspan, LayoutPolicy, AccessorPolicy> clusterArray, const T lowerLabelRange, const T upperLabelRange) { diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index 5dbc70c8f4..c27acff42f 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -87,8 +87,8 @@ template void histogram(const raft::handle_t& handle, HistType type, - raft::mdspan, LayoutPolicy> bins, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::mdspan, LayoutPolicy, AccessorPolicy> bins, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, BinnerOp binner = IdentityBinner()) { detail::histogram(type, diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index cc9c024081..ee32a2b5a2 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -66,8 +66,8 @@ double homogeneity_score(const T* truthClusterArray, */ template double homogeneity_score(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> truthClusterArray, - raft::mdspan, LayoutPolicy, AccessorPolicy> predClusterArray, + raft::mdspan, LayoutPolicy, AccessorPolicy> truthClusterArray, + raft::mdspan, LayoutPolicy, AccessorPolicy> predClusterArray, T lowerLabelRange, T upperLabelRange) { diff --git a/cpp/include/raft/stats/information_criterion.cuh b/cpp/include/raft/stats/information_criterion.cuh index 7a96dc4277..0e8c3a0cf4 100644 --- a/cpp/include/raft/stats/information_criterion.cuh +++ b/cpp/include/raft/stats/information_criterion.cuh @@ -86,8 +86,8 @@ void information_criterion_batched(ScalarT* d_ic, template void information_criterion_batched( const raft::handle_t& handle, - raft::mdspan> d_ic, - raft::mdspan, LayoutPolicy, AccessorPolicy> d_loglikelihood, + raft::mdspan, LayoutPolicy, AccessorPolicy> d_ic, + raft::mdspan, LayoutPolicy, AccessorPolicy> d_loglikelihood, IC_Type ic_type, IdxType n_params, IdxType n_samples) diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index df5d0965c0..3af463c80c 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -58,8 +58,8 @@ DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, */ template DataT kl_divergence(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> modelPDF, - raft::mdspan, LayoutPolicy, AccessorPolicy> candidatePDF) + raft::mdspan, LayoutPolicy, AccessorPolicy> modelPDF, + raft::mdspan, LayoutPolicy, AccessorPolicy> candidatePDF) { return detail::kl_divergence( modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.extent(0), handle.get_stream()); diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index e9daecaa6f..7a19300daa 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -68,8 +68,8 @@ void mean( */ template void mean(const raft::handle_t& handle, - raft::mdspan> mu, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::mdspan, LayoutPolicy, AccessorPolicy> mu, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, bool sample) { detail::mean(mu.data_handle(), diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 9267a5524d..58561223f8 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -68,9 +68,9 @@ void meanCenter(Type* out, */ template void meanCenter(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy> out, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, - raft::mdspan> mu, + raft::mdspan, LayoutPolicy, AccessorPolicy> out, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::mdspan, LayoutPolicy, AccessorPolicy> mu, bool bcastAlongRows) { detail::meanCenter(out.data_handle(), @@ -126,9 +126,9 @@ void meanAdd(Type* out, */ template void meanAdd(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy> out, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, - raft::mdspan> mu, + raft::mdspan, LayoutPolicy, AccessorPolicy> out, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::mdspan, LayoutPolicy, AccessorPolicy> mu, bool bcastAlongRows) { detail::meanAdd(out.data_handle(), diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index d071a8962d..7bf22d1195 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -78,9 +78,9 @@ void meanvar(Type* mean, */ template void meanvar(const raft::handle_t& handle, - raft::mdspan> mean, - raft::mdspan> var, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::mdspan, LayoutPolicy, AccessorPolicy> mean, + raft::mdspan, LayoutPolicy, AccessorPolicy> var, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, bool sample) { detail::meanvar(mean.data_handle(), diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index 985defce26..11f4eb848e 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -102,9 +102,9 @@ template void minmax(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, - std::optional>> rowids, - std::optional>> colids, + raft::mdspan, LayoutPolicy, AccessorPolicy> data, + std::optional>> rowids, + std::optional>> colids, raft::mdspan> globalmin, raft::mdspan> globalmax, std::optional>> sampledcols) diff --git a/cpp/test/stats/completeness_score.cu b/cpp/test/stats/completeness_score.cu index b4bdf71321..91c70120ea 100644 --- a/cpp/test/stats/completeness_score.cu +++ b/cpp/test/stats/completeness_score.cu @@ -95,8 +95,8 @@ class completenessTest : public ::testing::TestWithParam { // calling the completeness CUDA implementation computedCompleteness = raft::stats::completeness_score( handle, - raft::make_device_vector_view(truthClusterArray.data(), nElements), - raft::make_device_vector_view(predClusterArray.data(), nElements), + raft::make_device_vector_view(truthClusterArray.data(), nElements), + raft::make_device_vector_view(predClusterArray.data(), nElements), lowerLabelRange, upperLabelRange); } diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu index 5c8d6da566..df09ad2523 100644 --- a/cpp/test/stats/contingencyMatrix.cu +++ b/cpp/test/stats/contingencyMatrix.cu @@ -40,7 +40,8 @@ template class ContingencyMatrixTest : public ::testing::TestWithParam { protected: ContingencyMatrixTest() - : pWorkspace(0, stream), + : stream(handle.get_stream()), + pWorkspace(0, stream), dY(0, stream), dYHat(0, stream), dComputedOutput(0, stream), @@ -80,7 +81,6 @@ class ContingencyMatrixTest : public ::testing::TestWithParam())); } + raft::handle_t handle; ContingencyMatrixParam params; int numUniqueClasses = -1; T minLabel, maxLabel; diff --git a/cpp/test/stats/cov.cu b/cpp/test/stats/cov.cu index 2151d77bed..edd2108618 100644 --- a/cpp/test/stats/cov.cu +++ b/cpp/test/stats/cov.cu @@ -75,15 +75,15 @@ class CovTest : public ::testing::TestWithParam> { cov(handle, raft::make_device_matrix_view(cov_act.data(), cols, cols), raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(mean_act.data(), cols), params.sample, params.stable); } else { using layout = raft::col_major; - cov(handle, + cov(handle, raft::make_device_matrix_view(cov_act.data(), cols, cols), raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(mean_act.data(), cols), params.sample, params.stable); } diff --git a/cpp/test/stats/dispersion.cu b/cpp/test/stats/dispersion.cu index b8fd9dfe80..6580286c5b 100644 --- a/cpp/test/stats/dispersion.cu +++ b/cpp/test/stats/dispersion.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -44,14 +45,13 @@ template template class DispersionTest : public ::testing::TestWithParam> { protected: - DispersionTest() : exp_mean(0, stream), act_mean(0, stream) {} + DispersionTest() : stream(handle.get_stream()), exp_mean(0, stream), act_mean(0, stream) {} void SetUp() override { params = ::testing::TestWithParam>::GetParam(); raft::random::RngState r(params.seed); int len = params.clusters * params.dim; - stream = handle.get_stream(); rmm::device_uvector data(len, stream); rmm::device_uvector counts(params.clusters, stream); exp_mean.resize(params.dim, stream); @@ -64,8 +64,12 @@ class DispersionTest : public ::testing::TestWithParam> { for (const auto& val : h_counts) { npoints += val; } - actualVal = dispersion( - data.data(), counts.data(), act_mean.data(), params.clusters, npoints, params.dim, stream); + actualVal = dispersion( + handle, + raft::make_device_matrix_view(data.data(), params.clusters, params.dim), + raft::make_device_vector_view(counts.data(), params.clusters), + std::make_optional(raft::make_device_vector_view(act_mean.data(), params.dim)), + npoints); expectedVal = T(0); std::vector h_data(len, T(0)); raft::update_host(&(h_data[0]), data.data(), len, stream); diff --git a/cpp/test/stats/entropy.cu b/cpp/test/stats/entropy.cu index fb9e82058e..d82d0eb57b 100644 --- a/cpp/test/stats/entropy.cu +++ b/cpp/test/stats/entropy.cu @@ -38,6 +38,9 @@ template class entropyTest : public ::testing::TestWithParam { protected: // the constructor + entropyTest() : stream(handle.get_stream()) + {} + void SetUp() override { // getting the parameters @@ -74,17 +77,18 @@ class entropyTest : public ::testing::TestWithParam { } // allocating and initializing memory to the GPU - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); rmm::device_uvector clusterArray(nElements, stream); raft::update_device(clusterArray.data(), &arr1[0], (int)nElements, stream); raft::interruptible::synchronize(stream); // calling the entropy CUDA implementation computedEntropy = raft::stats::entropy( - clusterArray.data(), nElements, lowerLabelRange, upperLabelRange, stream); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); + handle, + raft::make_device_vector_view(clusterArray.data(), nElements), + lowerLabelRange, upperLabelRange); } + raft::handle_t handle; // declaring the data values entropyParam params; T lowerLabelRange, upperLabelRange; diff --git a/cpp/test/stats/histogram.cu b/cpp/test/stats/histogram.cu index f09c01c84a..9b5099f9b6 100644 --- a/cpp/test/stats/histogram.cu +++ b/cpp/test/stats/histogram.cu @@ -85,7 +85,10 @@ class HistTest : public ::testing::TestWithParam { cudaMemsetAsync(ref_bins.data(), 0, sizeof(int) * params.nbins * params.ncols, stream)); naiveHist(ref_bins.data(), params.nbins, in.data(), params.nrows, params.ncols, stream); histogram( - params.type, bins.data(), params.nbins, in.data(), params.nrows, params.ncols, stream); + handle, + params.type, + raft::make_device_matrix_view(bins.data(), params.nbins, params.ncols), + raft::make_device_matrix_view(in.data(), params.nrows, params.ncols)); handle.sync_stream(); } diff --git a/cpp/test/stats/homogeneity_score.cu b/cpp/test/stats/homogeneity_score.cu index 697cea55ad..b39407e0e9 100644 --- a/cpp/test/stats/homogeneity_score.cu +++ b/cpp/test/stats/homogeneity_score.cu @@ -47,6 +47,7 @@ class homogeneityTest : public ::testing::TestWithParam { nElements = params.nElements; lowerLabelRange = params.lowerLabelRange; upperLabelRange = params.upperLabelRange; + stream = handle.get_stream(); // generating random value test input std::vector arr1(nElements, 0); @@ -63,9 +64,6 @@ class homogeneityTest : public ::testing::TestWithParam { } // allocating and initializing memory to the GPU - - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - rmm::device_uvector truthClusterArray(nElements, stream); rmm::device_uvector predClusterArray(nElements, stream); raft::update_device(truthClusterArray.data(), &arr1[0], (int)nElements, stream); @@ -97,10 +95,10 @@ class homogeneityTest : public ::testing::TestWithParam { lowerLabelRange, upperLabelRange, stream); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } // declaring the data values + raft::handle_t handle; homogeneityParam params; T lowerLabelRange, upperLabelRange; int nElements = 0; diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu index 958ff75856..ffc989ceab 100644 --- a/cpp/test/stats/minmax.cu +++ b/cpp/test/stats/minmax.cu @@ -112,14 +112,6 @@ class MinMaxTest : public ::testing::TestWithParam> { nanKernel<<>>( data.data(), mask.data(), len, std::numeric_limits::quiet_NaN()); RAFT_CUDA_TRY(cudaPeekAtLastError()); - auto data_view = raft::make_device_matrix_view( - data.data(), params.rows, params.cols); - std::optional> rowids = std::nullopt; - std::optional> colids = std::nullopt; - std::optional> sampledcols = std::nullopt; - auto globalmin = raft::make_device_vector_view(minmax_act.data(), params.cols); - auto globalmax = - raft::make_device_vector_view(minmax_act.data() + params.cols, params.cols); naiveMinMax(data.data(), params.rows, params.cols, @@ -127,7 +119,13 @@ class MinMaxTest : public ::testing::TestWithParam> { minmax_ref.data() + params.cols, stream); raft::stats::minmax( - handle, data_view, rowids, colids, globalmin, globalmax, sampledcols); + handle, + raft::make_device_matrix_view(data.data(), params.rows, params.cols), + std::nullopt, + std::nullopt, + raft::make_device_vector_view(minmax_act.data(), params.cols), + raft::make_device_vector_view(minmax_act.data() + params.cols, params.cols), + std::nullopt); } protected: From 82a9bd7b2ba056d7161ac278024ca9eeca162a94 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 5 Sep 2022 16:44:11 +0200 Subject: [PATCH 09/40] Add remaining tests, fix style --- cpp/include/raft/stats/accuracy.cuh | 7 +++-- .../raft/stats/adjusted_rand_index.cuh | 6 ++-- cpp/include/raft/stats/completeness_score.cuh | 6 ++-- cpp/include/raft/stats/dispersion.cuh | 21 +++++++------- cpp/include/raft/stats/entropy.cuh | 9 +++--- cpp/include/raft/stats/histogram.cuh | 16 +++++------ cpp/include/raft/stats/homogeneity_score.cuh | 11 ++++---- cpp/include/raft/stats/kl_divergence.cuh | 7 +++-- cpp/include/raft/stats/mean.cuh | 5 +++- cpp/include/raft/stats/mean_center.cuh | 12 ++++++-- cpp/include/raft/stats/meanvar.cuh | 5 +++- cpp/test/stats/adjusted_rand_index.cu | 10 +++++-- cpp/test/stats/completeness_score.cu | 4 +-- cpp/test/stats/cov.cu | 28 +++++++++---------- cpp/test/stats/dispersion.cu | 2 +- cpp/test/stats/entropy.cu | 12 ++++---- cpp/test/stats/histogram.cu | 9 +++--- cpp/test/stats/homogeneity_score.cu | 14 +++++----- cpp/test/stats/information_criterion.cu | 14 +++++----- cpp/test/stats/kl_divergence.cu | 9 +++--- cpp/test/stats/mean.cu | 14 +++++++++- cpp/test/stats/mean_center.cu | 26 +++++++++++------ cpp/test/stats/meanvar.cu | 24 ++++++++++------ cpp/test/stats/minmax.cu | 18 ++++++------ 24 files changed, 166 insertions(+), 123 deletions(-) diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index c60e86fe35..f746127395 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -53,9 +53,10 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, * @return: Accuracy score in [0, 1]; higher is better. */ template -float accuracy(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> predictions, - raft::mdspan, LayoutPolicy, AccessorPolicy> ref_predictions) +float accuracy( + const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> predictions, + raft::mdspan, LayoutPolicy, AccessorPolicy> ref_predictions) { return detail::accuracy_score(predictions.data_handle(), ref_predictions.data_handle(), diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 5a46636fdd..40d75dba23 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -69,10 +69,8 @@ template double adjusted_rand_index( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> - firstClusterArray, - raft::mdspan, LayoutPolicy, AccessorPolicy> - secondClusterArray) + raft::mdspan, LayoutPolicy, AccessorPolicy> firstClusterArray, + raft::mdspan, LayoutPolicy, AccessorPolicy> secondClusterArray) { return detail::compute_adjusted_rand_index(firstClusterArray.data_handle(), secondClusterArray.data_handle(), diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 252d9a08ce..1eeba22c67 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -64,10 +64,8 @@ double completeness_score(const T* truthClusterArray, template double completeness_score( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> - truthClusterArray, - raft::mdspan, LayoutPolicy, AccessorPolicy> - predClusterArray, + raft::mdspan, LayoutPolicy, AccessorPolicy> truthClusterArray, + raft::mdspan, LayoutPolicy, AccessorPolicy> predClusterArray, T lowerLabelRange, T upperLabelRange) { diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index c4d311be0f..beaf5beee5 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -77,21 +77,20 @@ DataT dispersion(const DataT* centroids, * @return the cluster dispersion value */ template -DataT dispersion(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> centroids, - raft::mdspan, LayoutPolicy, AccessorPolicy> clusterSizes, - std::optional, LayoutPolicy, AccessorPolicy>> globalCentroid, - const IdxType nPoints) + int TPB = 256> +DataT dispersion( + const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> centroids, + raft::mdspan> clusterSizes, + std::optional, LayoutPolicy, AccessorPolicy>> + globalCentroid, + const IdxType nPoints) { DataT* globalCentroid_ptr = nullptr; - if (globalCentroid.has_value()) - { - globalCentroid_ptr = globalCentroid.value().data_handle(); - } + if (globalCentroid.has_value()) { globalCentroid_ptr = globalCentroid.value().data_handle(); } return detail::dispersion(centroids.data_handle(), clusterSizes.data_handle(), globalCentroid_ptr, diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index 4fa6400a9d..1f5818b98d 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -62,10 +62,11 @@ double entropy(const T* clusterArray, * @return the entropy score */ template -double entropy(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> clusterArray, - const T lowerLabelRange, - const T upperLabelRange) +double entropy( + const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> clusterArray, + const T lowerLabelRange, + const T upperLabelRange) { return detail::entropy(clusterArray.data_handle(), clusterArray.extent(0), diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index c27acff42f..4c4287e52c 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -81,7 +81,7 @@ void histogram(HistType type, * @note signature of BinnerOp is `int func(DataT, IdxT);` */ template , typename LayoutPolicy, typename AccessorPolicy> @@ -92,13 +92,13 @@ void histogram(const raft::handle_t& handle, BinnerOp binner = IdentityBinner()) { detail::histogram(type, - bins.data_handle(), - bins.extent(1), - data.data_handle(), - data.extent(0), - data.extent(1), - handle.get_stream(), - binner); + bins.data_handle(), + bins.extent(1), + data.data_handle(), + data.extent(0), + data.extent(1), + handle.get_stream(), + binner); } }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index ee32a2b5a2..8c946b0b53 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -65,11 +65,12 @@ double homogeneity_score(const T* truthClusterArray, * @param upperLabelRange: the upper bound of the range of labels */ template -double homogeneity_score(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> truthClusterArray, - raft::mdspan, LayoutPolicy, AccessorPolicy> predClusterArray, - T lowerLabelRange, - T upperLabelRange) +double homogeneity_score( + const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> truthClusterArray, + raft::mdspan, LayoutPolicy, AccessorPolicy> predClusterArray, + T lowerLabelRange, + T upperLabelRange) { return detail::homogeneity_score(truthClusterArray.data_handle(), predClusterArray.data_handle(), diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index 3af463c80c..fbce44c6d9 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -57,9 +57,10 @@ DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, * @param candidatePDF: the candidate array of probability density functions of type DataT */ template -DataT kl_divergence(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> modelPDF, - raft::mdspan, LayoutPolicy, AccessorPolicy> candidatePDF) +DataT kl_divergence( + const raft::handle_t& handle, + raft::mdspan, LayoutPolicy, AccessorPolicy> modelPDF, + raft::mdspan, LayoutPolicy, AccessorPolicy> candidatePDF) { return detail::kl_divergence( modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.extent(0), handle.get_stream()); diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index 7a19300daa..8ebf801ebe 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -66,7 +66,10 @@ void mean( * @param sample: whether to evaluate sample mean or not. In other words, whether * to normalize the output using N-1 or N, for true or false, respectively */ -template +template void mean(const raft::handle_t& handle, raft::mdspan, LayoutPolicy, AccessorPolicy> mu, raft::mdspan, LayoutPolicy, AccessorPolicy> data, diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 58561223f8..47142d967e 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -66,7 +66,11 @@ void meanCenter(Type* out, * @param mu the mean vector * @param bcastAlongRows whether to broadcast vector along rows or columns */ -template +template void meanCenter(const raft::handle_t& handle, raft::mdspan, LayoutPolicy, AccessorPolicy> out, raft::mdspan, LayoutPolicy, AccessorPolicy> data, @@ -124,7 +128,11 @@ void meanAdd(Type* out, * @param mu the mean vector * @param bcastAlongRows whether to broadcast vector along rows or columns */ -template +template void meanAdd(const raft::handle_t& handle, raft::mdspan, LayoutPolicy, AccessorPolicy> out, raft::mdspan, LayoutPolicy, AccessorPolicy> data, diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index 7bf22d1195..2c1a04c69e 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -76,7 +76,10 @@ void meanvar(Type* mean, * @param [in] sample whether to evaluate sample variance or not. In other words, whether to * normalize the variance using N-1 or N, for true or false respectively. */ -template +template void meanvar(const raft::handle_t& handle, raft::mdspan, LayoutPolicy, AccessorPolicy> mean, raft::mdspan, LayoutPolicy, AccessorPolicy> var, diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/test/stats/adjusted_rand_index.cu index 6f9cd34f61..a39500c4e0 100644 --- a/cpp/test/stats/adjusted_rand_index.cu +++ b/cpp/test/stats/adjusted_rand_index.cu @@ -41,7 +41,10 @@ struct adjustedRandIndexParam { template class adjustedRandIndexTest : public ::testing::TestWithParam { protected: - adjustedRandIndexTest() : stream(handle.get_stream()), firstClusterArray(0, stream), secondClusterArray(0, stream) {} + adjustedRandIndexTest() + : stream(handle.get_stream()), firstClusterArray(0, stream), secondClusterArray(0, stream) + { + } void SetUp() override { @@ -61,7 +64,8 @@ class adjustedRandIndexTest : public ::testing::TestWithParam, false>>(handle, + computed_adjusted_rand_index = adjusted_rand_index( + handle, raft::make_device_vector_view(firstClusterArray.data(), nElements), raft::make_device_vector_view(secondClusterArray.data(), nElements)); } @@ -135,7 +139,7 @@ class adjustedRandIndexTest : public ::testing::TestWithParam firstClusterArray; diff --git a/cpp/test/stats/completeness_score.cu b/cpp/test/stats/completeness_score.cu index 91c70120ea..5bdffbed07 100644 --- a/cpp/test/stats/completeness_score.cu +++ b/cpp/test/stats/completeness_score.cu @@ -40,9 +40,7 @@ template class completenessTest : public ::testing::TestWithParam { protected: // the constructor - completenessTest() : stream(handle.get_stream()) - { - } + completenessTest() : stream(handle.get_stream()) {} void SetUp() override { diff --git a/cpp/test/stats/cov.cu b/cpp/test/stats/cov.cu index edd2108618..dd782f0600 100644 --- a/cpp/test/stats/cov.cu +++ b/cpp/test/stats/cov.cu @@ -69,25 +69,23 @@ class CovTest : public ::testing::TestWithParam> { normal(handle, r, data.data(), len, params.mean, var); raft::stats::mean( mean_act.data(), data.data(), cols, rows, params.sample, params.rowMajor, stream); - if (params.rowMajor) - { + if (params.rowMajor) { using layout = raft::row_major; cov(handle, - raft::make_device_matrix_view(cov_act.data(), cols, cols), - raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), - params.sample, - params.stable); + raft::make_device_matrix_view(cov_act.data(), cols, cols), + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + params.sample, + params.stable); } else { - using layout = raft::col_major; - cov(handle, - raft::make_device_matrix_view(cov_act.data(), cols, cols), - raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), - params.sample, - params.stable); + using layout = raft::col_major; + cov(handle, + raft::make_device_matrix_view(cov_act.data(), cols, cols), + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + params.sample, + params.stable); } - T data_h[6] = {1.0, 2.0, 5.0, 4.0, 2.0, 1.0}; T cov_cm_ref_h[4] = {4.3333, -2.8333, -2.8333, 2.333}; diff --git a/cpp/test/stats/dispersion.cu b/cpp/test/stats/dispersion.cu index 6580286c5b..75369f794c 100644 --- a/cpp/test/stats/dispersion.cu +++ b/cpp/test/stats/dispersion.cu @@ -16,12 +16,12 @@ #include "../test_utils.h" #include +#include #include #include #include #include #include -#include #include #include #include diff --git a/cpp/test/stats/entropy.cu b/cpp/test/stats/entropy.cu index d82d0eb57b..36bab5c58c 100644 --- a/cpp/test/stats/entropy.cu +++ b/cpp/test/stats/entropy.cu @@ -38,8 +38,7 @@ template class entropyTest : public ::testing::TestWithParam { protected: // the constructor - entropyTest() : stream(handle.get_stream()) - {} + entropyTest() : stream(handle.get_stream()) {} void SetUp() override { @@ -82,10 +81,11 @@ class entropyTest : public ::testing::TestWithParam { raft::interruptible::synchronize(stream); // calling the entropy CUDA implementation - computedEntropy = raft::stats::entropy( - handle, - raft::make_device_vector_view(clusterArray.data(), nElements), - lowerLabelRange, upperLabelRange); + computedEntropy = + raft::stats::entropy(handle, + raft::make_device_vector_view(clusterArray.data(), nElements), + lowerLabelRange, + upperLabelRange); } raft::handle_t handle; diff --git a/cpp/test/stats/histogram.cu b/cpp/test/stats/histogram.cu index 9b5099f9b6..910bd22904 100644 --- a/cpp/test/stats/histogram.cu +++ b/cpp/test/stats/histogram.cu @@ -84,11 +84,10 @@ class HistTest : public ::testing::TestWithParam { RAFT_CUDA_TRY( cudaMemsetAsync(ref_bins.data(), 0, sizeof(int) * params.nbins * params.ncols, stream)); naiveHist(ref_bins.data(), params.nbins, in.data(), params.nrows, params.ncols, stream); - histogram( - handle, - params.type, - raft::make_device_matrix_view(bins.data(), params.nbins, params.ncols), - raft::make_device_matrix_view(in.data(), params.nrows, params.ncols)); + histogram(handle, + params.type, + raft::make_device_matrix_view(bins.data(), params.nbins, params.ncols), + raft::make_device_matrix_view(in.data(), params.nrows, params.ncols)); handle.sync_stream(); } diff --git a/cpp/test/stats/homogeneity_score.cu b/cpp/test/stats/homogeneity_score.cu index b39407e0e9..1c1a9526a0 100644 --- a/cpp/test/stats/homogeneity_score.cu +++ b/cpp/test/stats/homogeneity_score.cu @@ -47,7 +47,7 @@ class homogeneityTest : public ::testing::TestWithParam { nElements = params.nElements; lowerLabelRange = params.lowerLabelRange; upperLabelRange = params.upperLabelRange; - stream = handle.get_stream(); + stream = handle.get_stream(); // generating random value test input std::vector arr1(nElements, 0); @@ -89,12 +89,12 @@ class homogeneityTest : public ::testing::TestWithParam { if (nElements == 0) truthHomogeneity = 1.0; // calling the homogeneity CUDA implementation - computedHomogeneity = raft::stats::homogeneity_score(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + computedHomogeneity = raft::stats::homogeneity_score( + handle, + raft::make_device_vector_view(truthClusterArray.data(), nElements), + raft::make_device_vector_view(predClusterArray.data(), nElements), + lowerLabelRange, + upperLabelRange); } // declaring the data values diff --git a/cpp/test/stats/information_criterion.cu b/cpp/test/stats/information_criterion.cu index d61f8591a5..29039ee664 100644 --- a/cpp/test/stats/information_criterion.cu +++ b/cpp/test/stats/information_criterion.cu @@ -89,13 +89,13 @@ class BatchedICTest : public ::testing::TestWithParam> { raft::update_device(loglike_d.data(), loglike_h.data(), params.batch_size, stream); // Compute the tested results - information_criterion_batched(res_d.data(), - loglike_d.data(), - params.ic_type, - params.n_params, - params.batch_size, - params.n_samples, - stream); + information_criterion_batched( + handle, + raft::make_device_vector_view(res_d.data(), params.batch_size), + raft::make_device_vector_view(loglike_d.data(), params.batch_size), + params.ic_type, + params.n_params, + params.n_samples); // Compute the expected results naive_ic(res_h.data(), diff --git a/cpp/test/stats/kl_divergence.cu b/cpp/test/stats/kl_divergence.cu index d66a832e30..ac8734f0df 100644 --- a/cpp/test/stats/kl_divergence.cu +++ b/cpp/test/stats/kl_divergence.cu @@ -39,6 +39,7 @@ class klDivergenceTest : public ::testing::TestWithParam { { // getting the parameters params = ::testing::TestWithParam::GetParam(); + stream = handle.get_stream(); nElements = params.nElements; @@ -54,8 +55,6 @@ class klDivergenceTest : public ::testing::TestWithParam { h_candidatePDF.begin(), h_candidatePDF.end(), [&]() { return realGenerator(dre); }); // allocating and initializing memory to the GPU - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - rmm::device_uvector d_modelPDF(nElements, stream); rmm::device_uvector d_candidatePDF(nElements, stream); RAFT_CUDA_TRY(cudaMemset(d_modelPDF.data(), 0, d_modelPDF.size() * sizeof(DataT))); @@ -75,11 +74,13 @@ class klDivergenceTest : public ::testing::TestWithParam { // calling the kl_divergence CUDA implementation computedklDivergence = - raft::stats::kl_divergence(d_modelPDF.data(), d_candidatePDF.data(), nElements, stream); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); + raft::stats::kl_divergence(handle, + raft::make_device_vector_view(d_modelPDF.data(), nElements), + raft::make_device_vector_view(d_candidatePDF.data(), nElements)); } // declaring the data values + raft::handle_t handle; klDivergenceParam params; int nElements = 0; DataT truthklDivergence = 0; diff --git a/cpp/test/stats/mean.cu b/cpp/test/stats/mean.cu index b7f24d5642..a485ae18c8 100644 --- a/cpp/test/stats/mean.cu +++ b/cpp/test/stats/mean.cu @@ -65,7 +65,19 @@ class MeanTest : public ::testing::TestWithParam> { void meanSGtest(T* data, cudaStream_t stream) { int rows = params.rows, cols = params.cols; - mean(mean_act.data(), data, cols, rows, params.sample, params.rowMajor, stream); + if (params.rowMajor) { + using layout = raft::row_major; + mean(handle, + raft::make_device_vector_view(mean_act.data(), rows * cols), + raft::make_device_matrix_view(data, rows, cols), + params.sample); + } else { + using layout = raft::col_major; + mean(handle, + raft::make_device_vector_view(mean_act.data(), rows * cols), + raft::make_device_matrix_view(data, rows, cols), + params.sample); + } } protected: diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu index 3d92a52fb4..8c76e64fa1 100644 --- a/cpp/test/stats/mean_center.cu +++ b/cpp/test/stats/mean_center.cu @@ -58,18 +58,26 @@ class MeanCenterTest : public ::testing::TestWithParam(out.data(), rows, cols), + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(meanVec.data(), meanVecSize), + params.bcastAlongRows); + } else { + using layout = raft::col_major; + meanCenter(handle, + raft::make_device_matrix_view(out.data(), rows, cols), + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(meanVec.data(), meanVecSize), + params.bcastAlongRows); + } raft::linalg::naiveMatVec(out_ref.data(), data.data(), meanVec.data(), diff --git a/cpp/test/stats/meanvar.cu b/cpp/test/stats/meanvar.cu index 65d33e331c..9b408bf1f5 100644 --- a/cpp/test/stats/meanvar.cu +++ b/cpp/test/stats/meanvar.cu @@ -67,14 +67,22 @@ class MeanVarTest : public ::testing::TestWithParam> { { random::RngState r(params.seed); normal(handle, r, data.data(), params.cols * params.rows, params.mean, params.stddev); - meanvar(mean_act.data(), - vars_act.data(), - data.data(), - params.cols, - params.rows, - params.sample, - params.rowMajor, - stream); + + if (params.rowMajor) { + using layout = raft::row_major; + meanvar(handle, + raft::make_device_vector_view(mean_act.data(), params.cols), + raft::make_device_vector_view(vars_act.data(), params.cols), + raft::make_device_matrix_view(data.data(), params.rows, params.cols), + params.sample); + } else { + using layout = raft::col_major; + meanvar(handle, + raft::make_device_vector_view(mean_act.data(), params.cols), + raft::make_device_vector_view(vars_act.data(), params.cols), + raft::make_device_matrix_view(data.data(), params.rows, params.cols), + params.sample); + } RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); } diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu index ffc989ceab..eb3e11c908 100644 --- a/cpp/test/stats/minmax.cu +++ b/cpp/test/stats/minmax.cu @@ -118,14 +118,16 @@ class MinMaxTest : public ::testing::TestWithParam> { minmax_ref.data(), minmax_ref.data() + params.cols, stream); - raft::stats::minmax( - handle, - raft::make_device_matrix_view(data.data(), params.rows, params.cols), - std::nullopt, - std::nullopt, - raft::make_device_vector_view(minmax_act.data(), params.cols), - raft::make_device_vector_view(minmax_act.data() + params.cols, params.cols), - std::nullopt); + raft::stats::minmax(handle, + raft::make_device_matrix_view( + data.data(), params.rows, params.cols), + std::nullopt, + std::nullopt, + raft::make_device_vector_view( + minmax_act.data(), params.cols), + raft::make_device_vector_view( + minmax_act.data() + params.cols, params.cols), + std::nullopt); } protected: From 7ff8a2fb758468841830687703d6604fd2e1f9e3 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Wed, 21 Sep 2022 00:13:30 +0200 Subject: [PATCH 10/40] Using device_*_view instead of vanilla mdspan --- cpp/include/raft/stats/accuracy.cuh | 17 ++-- .../raft/stats/adjusted_rand_index.cuh | 23 +++-- cpp/include/raft/stats/completeness_score.cuh | 24 +++--- cpp/include/raft/stats/contingency_matrix.cuh | 83 ++++++++++--------- cpp/include/raft/stats/cov.cuh | 15 ++-- cpp/include/raft/stats/dispersion.cuh | 19 +++-- cpp/include/raft/stats/entropy.cuh | 18 ++-- cpp/include/raft/stats/histogram.cuh | 12 +-- cpp/include/raft/stats/homogeneity_score.cuh | 24 +++--- .../raft/stats/information_criterion.cuh | 14 ++-- cpp/include/raft/stats/kl_divergence.cuh | 14 ++-- cpp/include/raft/stats/mean.cuh | 18 ++-- cpp/include/raft/stats/mean_center.cuh | 38 +++++---- cpp/include/raft/stats/meanvar.cuh | 22 ++--- cpp/include/raft/stats/minmax.cuh | 24 ++---- cpp/test/stats/adjusted_rand_index.cu | 4 +- cpp/test/stats/completeness_score.cu | 4 +- cpp/test/stats/entropy.cu | 2 +- cpp/test/stats/histogram.cu | 2 +- cpp/test/stats/homogeneity_score.cu | 4 +- cpp/test/stats/information_criterion.cu | 2 +- cpp/test/stats/kl_divergence.cu | 4 +- cpp/test/stats/mean_center.cu | 8 +- cpp/test/stats/meanvar.cu | 4 +- cpp/test/stats/minmax.cu | 2 +- 25 files changed, 203 insertions(+), 198 deletions(-) diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index f746127395..60e20f28f1 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -42,25 +42,26 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, /** * @brief Compute accuracy of predictions. Useful for classification. - * @tparam math_t: data type for predictions (e.g., int for classification) + * @tparam DataT: data type for predictions (e.g., int for classification) * @tparam IdxType Index type of matrix extent. - * @tparam LayoutPolicy Layout type of the input data. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param[in] handle: the raft handle. * @param[in] predictions: array of predictions (GPU pointer). * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). * @return: Accuracy score in [0, 1]; higher is better. */ -template +template float accuracy( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> predictions, - raft::mdspan, LayoutPolicy, AccessorPolicy> ref_predictions) + raft::device_vector_view predictions, + raft::device_vector_view ref_predictions) { + RAFT_EXPECTS(predictions.size() == ref_predictions.size(), "Size mismatch"); + RAFT_EXPECTS(predictions.is_exhaustive(), "predictions must be contiguous"); + RAFT_EXPECTS(ref_predictions.is_exhaustive(), "ref_predictions must be contiguous"); + return detail::accuracy_score(predictions.data_handle(), ref_predictions.data_handle(), - predictions.extent(0), + predictions.size(), handle.get_stream()); } } // namespace stats diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 40d75dba23..6e0094447d 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -52,29 +52,28 @@ double adjusted_rand_index(const T* firstClusterArray, /** * @brief Function to calculate Adjusted RandIndex as described * here - * @tparam T data-type for input label arrays + * @tparam DataT data-type for input label arrays * @tparam MathT integral data-type used for computing n-choose-r * @tparam IdxType Index type of matrix extent. - * @tparam LayoutPolicy Layout type of the input data. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param handle: the raft handle. * @param firstClusterArray: the array of classes * @param secondClusterArray: the array of classes */ -template + typename IdxType> double adjusted_rand_index( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> firstClusterArray, - raft::mdspan, LayoutPolicy, AccessorPolicy> secondClusterArray) + raft::device_vector_view firstClusterArray, + raft::device_vector_view secondClusterArray) { - return detail::compute_adjusted_rand_index(firstClusterArray.data_handle(), + RAFT_EXPECTS(firstClusterArray.size() == secondClusterArray.size(), "Size mismatch"); + RAFT_EXPECTS(firstClusterArray.is_exhaustive(), "firstClusterArray must be contiguous"); + RAFT_EXPECTS(secondClusterArray.is_exhaustive(), "secondClusterArray must be contiguous"); + + return detail::compute_adjusted_rand_index(firstClusterArray.data_handle(), secondClusterArray.data_handle(), - firstClusterArray.extent(0), + firstClusterArray.size(), handle.get_stream()); } diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 1eeba22c67..e5251d39fc 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -50,28 +50,28 @@ double completeness_score(const T* truthClusterArray, /** * @brief Function to calculate the completeness score between two clusters * - * @tparam T the data type + * @tparam DataT the data type * @tparam IdxType Index type of matrix extent. - * @tparam LayoutPolicy Layout type of the input data. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param handle: the raft handle. - * @param truthClusterArray: the array of truth classes of type T - * @param predClusterArray: the array of predicted classes of type T + * @param truthClusterArray: the array of truth classes of type DataT + * @param predClusterArray: the array of predicted classes of type DataT * @param lowerLabelRange: the lower bound of the range of labels * @param upperLabelRange: the upper bound of the range of labels */ -template +template double completeness_score( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> truthClusterArray, - raft::mdspan, LayoutPolicy, AccessorPolicy> predClusterArray, - T lowerLabelRange, - T upperLabelRange) + raft::device_vector_view truthClusterArray, + raft::device_vector_view predClusterArray, + DataT lowerLabelRange, + DataT upperLabelRange) { + RAFT_EXPECTS(truthClusterArray.size() == predClusterArray.size(), "Size mismatch"); + RAFT_EXPECTS(truthClusterArray.is_exhaustive(), "truthClusterArray must be contiguous"); + RAFT_EXPECTS(predClusterArray.is_exhaustive(), "predClusterArray must be contiguous"); return detail::homogeneity_score(predClusterArray.data_handle(), truthClusterArray.data_handle(), - truthClusterArray.extent(0), + truthClusterArray.size(), lowerLabelRange, upperLabelRange, handle.get_stream()); diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index dcd5ebcead..6288fc36e9 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -50,12 +50,12 @@ void getInputClassCardinality( * @param minLabel: [out] calculated min value in input array * @param maxLabel: [out] calculated max value in input array */ -template +template void getInputClassCardinality( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, - const raft::host_scalar_view& minLabel, - const raft::host_scalar_view& maxLabel) + raft::device_vector_view groundTruth, + raft::host_scalar_view minLabel, + raft::host_scalar_view maxLabel) { detail::getInputClassCardinality(groundTruth.data_handle(), groundTruth.extent(0), @@ -88,22 +88,18 @@ size_t getContingencyMatrixWorkspaceSize(int nSamples, /** * @brief Calculate workspace size for running contingency matrix calculations * @tparam T label type - * @tparam OutT output matrix type * @param handle: the raft handle. * @param groundTruth: device 1-d array for ground truth (num of rows) * @param minLabel: Optional, min value in input array * @param maxLabel: Optional, max value in input array */ -template +template size_t getContingencyMatrixWorkspaceSize( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, - T minLabel = std::numeric_limits::max(), - T maxLabel = std::numeric_limits::max()) + raft::device_vector_view groundTruth, + DataT minLabel = std::numeric_limits::max(), + DataT maxLabel = std::numeric_limits::max()) { return detail::getContingencyMatrixWorkspaceSize( groundTruth.extent(0), groundTruth.data_handle(), handle.get_stream(), minLabel, maxLabel); @@ -119,7 +115,7 @@ size_t getContingencyMatrixWorkspaceSize( * @param groundTruth: device 1-d array for ground truth (num of rows) * @param predictedLabel: device 1-d array for prediction (num of columns) * @param nSamples: number of elements in input array - * @param outMat: output buffer for contingecy matrix + * @param outMat: output buffer for contingency matrix * @param stream: cuda stream for execution * @param workspace: Optional, workspace memory allocation * @param workspaceSize: Optional, size of workspace memory @@ -153,45 +149,52 @@ void contingencyMatrix(const T* groundTruth, * labels. Users should call function getInputClassCardinality to find * and allocate memory for output. Similarly workspace requirements * should be checked using function getContingencyMatrixWorkspaceSize - * @tparam T label type - * @tparam OutT output matrix type + * @tparam DataT label type + * @tparam OutType output matrix type * @tparam IdxType Index type of matrix extent. * @tparam LayoutPolicy Layout type of the input data. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param handle: the raft handle. * @param groundTruth: device 1-d array for ground truth (num of rows) * @param predictedLabel: device 1-d array for prediction (num of columns) - * @param outMat: output buffer for contingecy matrix + * @param outMat: output buffer for contingency matrix * @param workspace: Optional, workspace memory allocation - * @param workspaceSize: Optional, size of workspace memory * @param minLabel: Optional, min value in input ground truth array * @param maxLabel: Optional, max value in input ground truth array */ -template + typename LayoutPolicy> void contingencyMatrix( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> groundTruth, - raft::mdspan, LayoutPolicy, AccessorPolicy> predictedLabel, - raft::mdspan, LayoutPolicy, AccessorPolicy> outMat, - void* workspace = nullptr, - size_t workspaceSize = 0, - T minLabel = std::numeric_limits::max(), - T maxLabel = std::numeric_limits::max()) + raft::device_vector_view groundTruth, + raft::device_vector_view predictedLabel, + raft::device_matrix_view outMat, + std::optional> workspace, + DataT minLabel = std::numeric_limits::max(), + DataT maxLabel = std::numeric_limits::max()) { - detail::contingencyMatrix(groundTruth.data_handle(), - predictedLabel.data_handle(), - groundTruth.extent(0), - outMat.data_handle(), - handle.get_stream(), - workspace, - workspaceSize, - minLabel, - maxLabel); + RAFT_EXPECTS(groundTruth.size() == predictedLabel.size(), "Size mismatch"); + RAFT_EXPECTS(groundTruth.is_exhaustive(), "groundTruth must be contiguous"); + RAFT_EXPECTS(predictedLabel.is_exhaustive(), "predictedLabel must be contiguous"); + RAFT_EXPECTS(outMat.is_exhaustive(), "outMat must be contiguous"); + + DataT* workspace_p = nullptr; + IdxType workspace_size = 0; + if (workspace.has_value()) + { + workspace_p = workspace.value().data_handle(); + workspace_size = workspace.value().size(); + } + detail::contingencyMatrix(groundTruth.data_handle(), + predictedLabel.data_handle(), + groundTruth.size(), + outMat.data_handle(), + handle.get_stream(), + workspace_p, + workspace_size, + minLabel, + maxLabel); } }; // namespace stats diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index fc0dc4043e..4e4abd16f6 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -67,8 +67,6 @@ void cov(const raft::handle_t& handle, * @tparam Type the data type * @tparam IdxT the index type * @tparam LayoutPolicy Layout type of the input data. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param handle the raft handle * @param covar the output covariance matrix * @param data the input matrix (this will get mean-centered at the end!) @@ -80,14 +78,19 @@ void cov(const raft::handle_t& handle, * @note if stable=true, then the input data will be mean centered after this * function returns! */ -template +template void cov(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> covar, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, - raft::mdspan, LayoutPolicy, AccessorPolicy> mu, + raft::device_matrix_view covar, + raft::device_matrix_view data, + raft::device_vector_view mu, bool sample, bool stable) { + RAFT_EXPECTS(data.size() == covar.size(), "Size mismatch"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); + RAFT_EXPECTS(covar.is_exhaustive(), "covar must be contiguous"); + RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); + detail::cov(handle, covar.data_handle(), data.data_handle(), diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index beaf5beee5..65d89f209d 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -63,8 +63,6 @@ DataT dispersion(const DataT* centroids, * @tparam DataT data type * @tparam IdxType index type * @tparam LayoutPolicy Layout type of the input data. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @tparam TPB threads block for kernels launched * @param handle the raft handle * @param centroids the cluster centroids. This is assumed to be row-major @@ -79,18 +77,23 @@ DataT dispersion(const DataT* centroids, template DataT dispersion( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> centroids, - raft::mdspan> clusterSizes, - std::optional, LayoutPolicy, AccessorPolicy>> - globalCentroid, + raft::device_matrix_view centroids, + raft::device_vector_view clusterSizes, + std::optional> globalCentroid, const IdxType nPoints) { + RAFT_EXPECTS(clusterSizes.size() == centroids.extent(0), "Size mismatch"); + RAFT_EXPECTS(clusterSizes.is_exhaustive(), "clusterSizes must be contiguous"); + DataT* globalCentroid_ptr = nullptr; - if (globalCentroid.has_value()) { globalCentroid_ptr = globalCentroid.value().data_handle(); } + if (globalCentroid.has_value()) + { + RAFT_EXPECTS(globalCentroid.value().is_exhaustive(), "globalCentroid must be contiguous"); + globalCentroid_ptr = globalCentroid.value().data_handle(); + } return detail::dispersion(centroids.data_handle(), clusterSizes.data_handle(), globalCentroid_ptr, diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index 1f5818b98d..098ad5c433 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -50,26 +50,24 @@ double entropy(const T* clusterArray, * @brief Function to calculate entropy * more info on entropy * - * @tparam T data type + * @tparam DataT data type * @tparam IdxT index type - * @tparam LayoutPolicy Layout type of the input data. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param handle the raft handle - * @param clusterArray: the array of classes of type T + * @param clusterArray: the array of classes of type DataT * @param lowerLabelRange: the lower bound of the range of labels * @param upperLabelRange: the upper bound of the range of labels * @return the entropy score */ -template +template double entropy( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> clusterArray, - const T lowerLabelRange, - const T upperLabelRange) + raft::device_vector_view clusterArray, + const DataT lowerLabelRange, + const DataT upperLabelRange) { + RAFT_EXPECTS(clusterArray.is_exhaustive(), "clusterArray must be contiguous"); return detail::entropy(clusterArray.data_handle(), - clusterArray.extent(0), + clusterArray.size(), lowerLabelRange, upperLabelRange, handle.get_stream()); diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index 4c4287e52c..81122f2f59 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -70,8 +70,6 @@ void histogram(HistType type, * @tparam IdxType data type used to compute indices * @tparam BinnerOp takes the input data and computes its bin index * @tparam LayoutPolicy Layout type of the input data. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param handle the raft handle * @param type histogram implementation type to choose * @param bins the output bins (length = ncols * nbins) @@ -83,14 +81,16 @@ void histogram(HistType type, template , - typename LayoutPolicy, - typename AccessorPolicy> + typename LayoutPolicy> void histogram(const raft::handle_t& handle, HistType type, - raft::mdspan, LayoutPolicy, AccessorPolicy> bins, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::device_matrix_view bins, + raft::device_matrix_view data, BinnerOp binner = IdentityBinner()) { + RAFT_EXPECTS(bins.extent(0) == data.extent(0), "Size mismatch"); + RAFT_EXPECTS(bins.is_exhaustive(), "bins must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); detail::histogram(type, bins.data_handle(), bins.extent(1), diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index 8c946b0b53..5b2d83a535 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -53,28 +53,28 @@ double homogeneity_score(const T* truthClusterArray, * more info on mutual * information * - * @tparam T data type + * @tparam DataT data type * @tparam IdxType index type - * @tparam LayoutPolicy Layout type of the input data. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param handle the raft handle - * @param truthClusterArray: the array of truth classes of type T - * @param predClusterArray: the array of predicted classes of type T + * @param truthClusterArray: the array of truth classes of type DataT + * @param predClusterArray: the array of predicted classes of type DataT * @param lowerLabelRange: the lower bound of the range of labels * @param upperLabelRange: the upper bound of the range of labels */ -template +template double homogeneity_score( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> truthClusterArray, - raft::mdspan, LayoutPolicy, AccessorPolicy> predClusterArray, - T lowerLabelRange, - T upperLabelRange) + raft::device_vector_view truthClusterArray, + raft::device_vector_view predClusterArray, + DataT lowerLabelRange, + DataT upperLabelRange) { + RAFT_EXPECTS(truthClusterArray.size() == predClusterArray.size(), "Size mismatch"); + RAFT_EXPECTS(truthClusterArray.is_exhaustive(), "truthClusterArray must be contiguous"); + RAFT_EXPECTS(predClusterArray.is_exhaustive(), "predClusterArray must be contiguous"); return detail::homogeneity_score(truthClusterArray.data_handle(), predClusterArray.data_handle(), - truthClusterArray.extent(0), + truthClusterArray.size(), lowerLabelRange, upperLabelRange, handle.get_stream()); diff --git a/cpp/include/raft/stats/information_criterion.cuh b/cpp/include/raft/stats/information_criterion.cuh index 0e8c3a0cf4..425d0369f8 100644 --- a/cpp/include/raft/stats/information_criterion.cuh +++ b/cpp/include/raft/stats/information_criterion.cuh @@ -70,11 +70,8 @@ void information_criterion_batched(ScalarT* d_ic, * @note: it is safe to do the computation in-place (i.e give same pointer * as input and output) * - * @tparam ScalarT data type + * @tparam DataT data type * @tparam IdxType index type - * @tparam LayoutPolicy Layout type of the input data. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param[in] handle the raft handle * @param[out] d_ic Information criterion to be returned for each * series (device) length: batch_size @@ -83,15 +80,18 @@ void information_criterion_batched(ScalarT* d_ic, * @param[in] n_params Number of parameters in the model * @param[in] n_samples Number of samples in each series */ -template +template void information_criterion_batched( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> d_ic, - raft::mdspan, LayoutPolicy, AccessorPolicy> d_loglikelihood, + raft::device_vector_view d_ic, + raft::device_vector_view d_loglikelihood, IC_Type ic_type, IdxType n_params, IdxType n_samples) { + RAFT_EXPECTS(d_ic.size() == d_loglikelihood.size(), "Size mismatch"); + RAFT_EXPECTS(d_ic.is_exhaustive(), "d_ic must be contiguous"); + RAFT_EXPECTS(d_loglikelihood.is_exhaustive(), "d_loglikelihood must be contiguous"); batched::detail::information_criterion(d_ic.data_handle(), d_loglikelihood.data_handle(), ic_type, diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index fbce44c6d9..cd9cc14c1b 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -49,21 +49,21 @@ DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, * * @tparam DataT: Data type of the input array * @tparam IdxType index type - * @tparam LayoutPolicy Layout type of the input data. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param handle the raft handle * @param modelPDF: the model array of probability density functions of type DataT * @param candidatePDF: the candidate array of probability density functions of type DataT */ -template +template DataT kl_divergence( const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> modelPDF, - raft::mdspan, LayoutPolicy, AccessorPolicy> candidatePDF) + raft::device_vector_view modelPDF, + raft::device_vector_view candidatePDF) { + RAFT_EXPECTS(modelPDF.size() == candidatePDF.size(), "Size mismatch"); + RAFT_EXPECTS(modelPDF.is_exhaustive(), "modelPDF must be contiguous"); + RAFT_EXPECTS(candidatePDF.is_exhaustive(), "candidatePDF must be contiguous"); return detail::kl_divergence( - modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.extent(0), handle.get_stream()); + modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.size(), handle.get_stream()); } }; // end namespace stats diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index 8ebf801ebe..a5807c4a10 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -55,26 +55,26 @@ void mean( * * Mean operation is assumed to be performed on a given column. * - * @tparam Type the data type + * @tparam DataT the data type * @tparam IdxType index type * @tparam LayoutPolicy Layout type of the input matrix. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param handle the raft handle * @param mu: the output mean vector * @param data: the input matrix * @param sample: whether to evaluate sample mean or not. In other words, whether * to normalize the output using N-1 or N, for true or false, respectively */ -template +template void mean(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> mu, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::device_vector_view mu, + raft::device_matrix_view data, bool sample) { + RAFT_EXPECTS(data.extent(0) == mu.size(), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); detail::mean(mu.data_handle(), data.data_handle(), data.extent(1), diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 47142d967e..2db287ad20 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -54,11 +54,9 @@ void meanCenter(Type* out, /** * @brief Center the input matrix wrt its mean - * @tparam Type the data type + * @tparam DataT the data type * @tparam IdxType index type * @tparam LayoutPolicy Layout type of the input matrix. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @tparam TPB threads per block of the cuda kernel launched * @param handle the raft handle * @param out the output mean-centered matrix @@ -66,18 +64,21 @@ void meanCenter(Type* out, * @param mu the mean vector * @param bcastAlongRows whether to broadcast vector along rows or columns */ -template void meanCenter(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> out, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, - raft::mdspan, LayoutPolicy, AccessorPolicy> mu, + raft::device_matrix_view out, + raft::device_matrix_view data, + raft::device_vector_view mu, bool bcastAlongRows) { - detail::meanCenter(out.data_handle(), + RAFT_EXPECTS(out.size() == data.size(), "Size mismatch"); + RAFT_EXPECTS(data.extent(0) == mu.size(), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); + detail::meanCenter(out.data_handle(), data.data_handle(), mu.data_handle(), data.extent(1), @@ -119,8 +120,6 @@ void meanAdd(Type* out, * @tparam Type the data type * @tparam IdxType index type * @tparam LayoutPolicy Layout type of the input matrix. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @tparam TPB threads per block of the cuda kernel launched * @param handle the raft handle * @param out the output mean-centered matrix @@ -128,18 +127,21 @@ void meanAdd(Type* out, * @param mu the mean vector * @param bcastAlongRows whether to broadcast vector along rows or columns */ -template void meanAdd(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> out, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, - raft::mdspan, LayoutPolicy, AccessorPolicy> mu, + raft::device_matrix_view out, + raft::device_matrix_view data, + raft::device_vector_view mu, bool bcastAlongRows) { - detail::meanAdd(out.data_handle(), + RAFT_EXPECTS(out.size() == data.size(), "Size mismatch"); + RAFT_EXPECTS(data.extent(0) == mu.size(), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); + detail::meanAdd(out.data_handle(), data.data_handle(), mu.data_handle(), data.extent(1), diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index 2c1a04c69e..6f19ee9581 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -64,11 +64,9 @@ void meanvar(Type* mean, * It's almost twice faster than running `mean` and `vars` sequentially, because all three * kernels are memory-bound. * - * @tparam Type the data type + * @tparam DataT the data type * @tparam IdxType Integer type used for addressing * @tparam LayoutPolicy Layout type of the input matrix. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. * @param handle the raft handle * @param [out] mean the output mean vector of size D * @param [out] var the output variance vector of size D @@ -76,16 +74,20 @@ void meanvar(Type* mean, * @param [in] sample whether to evaluate sample variance or not. In other words, whether to * normalize the variance using N-1 or N, for true or false respectively. */ -template +template void meanvar(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> mean, - raft::mdspan, LayoutPolicy, AccessorPolicy> var, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, + raft::device_vector_view mean, + raft::device_vector_view var, + raft::device_matrix_view data, bool sample) { + RAFT_EXPECTS(data.extent(0) == var.size(), "Size mismatch betwen data and var"); + RAFT_EXPECTS(mean.size() == var.size(), "Size mismatch betwen mean and var"); + RAFT_EXPECTS(mean.is_exhaustive(), "mean must be contiguous"); + RAFT_EXPECTS(var.is_exhaustive(), "var must be contiguous"); + RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); detail::meanvar(mean.data_handle(), var.data_handle(), data.data_handle(), diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index 11f4eb848e..7749bb2551 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -74,12 +74,8 @@ void minmax(const T* data, * @brief Computes min/max across every column of the input matrix, as well as * optionally allow to subsample based on the given row/col ID mapping vectors * - * @tparam T Data type of input matrix element. - * @tparam IndexType Index type of matrix extent. - * @tparam LayoutPolicy Layout type of the input matrix. When layout is strided, it can - * be a submatrix of a larger matrix. Arbitrary stride is not supported. - * @tparam AccessorPolicy Accessor for the input and output, must be valid accessor on - * device. + * @tparam DataT Data type of input matrix element. + * @tparam IdxType Index type of matrix extent. * @tparam TPB number of threads per block * @param handle the raft handle * @param data input data col-major of size [nrows, ncols], unless rowids or @@ -96,20 +92,18 @@ void minmax(const T* data, * 2. ncols is small enough to fit the whole of min/max values across all cols * in shared memory */ -template void minmax(const raft::handle_t& handle, - raft::mdspan, LayoutPolicy, AccessorPolicy> data, - std::optional>> rowids, - std::optional>> colids, - raft::mdspan> globalmin, - raft::mdspan> globalmax, - std::optional>> sampledcols) + raft::device_matrix_view data, + std::optional> rowids, + std::optional> colids, + raft::device_vector_view globalmin, + raft::device_vector_view globalmax, + std::optional> sampledcols) { - static_assert(std::is_same_v, "Data should be col-major"); const unsigned* rowids_ptr = nullptr; const unsigned* colids_ptr = nullptr; T* sampledcols_ptr = nullptr; diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/test/stats/adjusted_rand_index.cu index a39500c4e0..b7b7b4129b 100644 --- a/cpp/test/stats/adjusted_rand_index.cu +++ b/cpp/test/stats/adjusted_rand_index.cu @@ -66,8 +66,8 @@ class adjustedRandIndexTest : public ::testing::TestWithParam( handle, - raft::make_device_vector_view(firstClusterArray.data(), nElements), - raft::make_device_vector_view(secondClusterArray.data(), nElements)); + raft::make_device_vector_view(firstClusterArray.data(), nElements), + raft::make_device_vector_view(secondClusterArray.data(), nElements)); } void SetUpDifferentArrays() diff --git a/cpp/test/stats/completeness_score.cu b/cpp/test/stats/completeness_score.cu index 5bdffbed07..12d90be46b 100644 --- a/cpp/test/stats/completeness_score.cu +++ b/cpp/test/stats/completeness_score.cu @@ -93,8 +93,8 @@ class completenessTest : public ::testing::TestWithParam { // calling the completeness CUDA implementation computedCompleteness = raft::stats::completeness_score( handle, - raft::make_device_vector_view(truthClusterArray.data(), nElements), - raft::make_device_vector_view(predClusterArray.data(), nElements), + raft::make_device_vector_view(truthClusterArray.data(), nElements), + raft::make_device_vector_view(predClusterArray.data(), nElements), lowerLabelRange, upperLabelRange); } diff --git a/cpp/test/stats/entropy.cu b/cpp/test/stats/entropy.cu index 36bab5c58c..1729ea45a7 100644 --- a/cpp/test/stats/entropy.cu +++ b/cpp/test/stats/entropy.cu @@ -83,7 +83,7 @@ class entropyTest : public ::testing::TestWithParam { // calling the entropy CUDA implementation computedEntropy = raft::stats::entropy(handle, - raft::make_device_vector_view(clusterArray.data(), nElements), + raft::make_device_vector_view(clusterArray.data(), nElements), lowerLabelRange, upperLabelRange); } diff --git a/cpp/test/stats/histogram.cu b/cpp/test/stats/histogram.cu index 910bd22904..d759e6c9f4 100644 --- a/cpp/test/stats/histogram.cu +++ b/cpp/test/stats/histogram.cu @@ -87,7 +87,7 @@ class HistTest : public ::testing::TestWithParam { histogram(handle, params.type, raft::make_device_matrix_view(bins.data(), params.nbins, params.ncols), - raft::make_device_matrix_view(in.data(), params.nrows, params.ncols)); + raft::make_device_matrix_view(in.data(), params.nrows, params.ncols)); handle.sync_stream(); } diff --git a/cpp/test/stats/homogeneity_score.cu b/cpp/test/stats/homogeneity_score.cu index 1c1a9526a0..f358f3fc28 100644 --- a/cpp/test/stats/homogeneity_score.cu +++ b/cpp/test/stats/homogeneity_score.cu @@ -91,8 +91,8 @@ class homogeneityTest : public ::testing::TestWithParam { // calling the homogeneity CUDA implementation computedHomogeneity = raft::stats::homogeneity_score( handle, - raft::make_device_vector_view(truthClusterArray.data(), nElements), - raft::make_device_vector_view(predClusterArray.data(), nElements), + raft::make_device_vector_view(truthClusterArray.data(), nElements), + raft::make_device_vector_view(predClusterArray.data(), nElements), lowerLabelRange, upperLabelRange); } diff --git a/cpp/test/stats/information_criterion.cu b/cpp/test/stats/information_criterion.cu index 29039ee664..1779d6ef1b 100644 --- a/cpp/test/stats/information_criterion.cu +++ b/cpp/test/stats/information_criterion.cu @@ -92,7 +92,7 @@ class BatchedICTest : public ::testing::TestWithParam> { information_criterion_batched( handle, raft::make_device_vector_view(res_d.data(), params.batch_size), - raft::make_device_vector_view(loglike_d.data(), params.batch_size), + raft::make_device_vector_view(loglike_d.data(), params.batch_size), params.ic_type, params.n_params, params.n_samples); diff --git a/cpp/test/stats/kl_divergence.cu b/cpp/test/stats/kl_divergence.cu index ac8734f0df..ca8a6735f8 100644 --- a/cpp/test/stats/kl_divergence.cu +++ b/cpp/test/stats/kl_divergence.cu @@ -75,8 +75,8 @@ class klDivergenceTest : public ::testing::TestWithParam { // calling the kl_divergence CUDA implementation computedklDivergence = raft::stats::kl_divergence(handle, - raft::make_device_vector_view(d_modelPDF.data(), nElements), - raft::make_device_vector_view(d_candidatePDF.data(), nElements)); + raft::make_device_vector_view(d_modelPDF.data(), nElements), + raft::make_device_vector_view(d_candidatePDF.data(), nElements)); } // declaring the data values diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu index 8c76e64fa1..251913e35e 100644 --- a/cpp/test/stats/mean_center.cu +++ b/cpp/test/stats/mean_center.cu @@ -67,15 +67,15 @@ class MeanCenterTest : public ::testing::TestWithParam(out.data(), rows, cols), - raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(meanVec.data(), meanVecSize), + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(meanVec.data(), meanVecSize), params.bcastAlongRows); } else { using layout = raft::col_major; meanCenter(handle, raft::make_device_matrix_view(out.data(), rows, cols), - raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(meanVec.data(), meanVecSize), + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(meanVec.data(), meanVecSize), params.bcastAlongRows); } raft::linalg::naiveMatVec(out_ref.data(), diff --git a/cpp/test/stats/meanvar.cu b/cpp/test/stats/meanvar.cu index 9b408bf1f5..d261db9e34 100644 --- a/cpp/test/stats/meanvar.cu +++ b/cpp/test/stats/meanvar.cu @@ -73,14 +73,14 @@ class MeanVarTest : public ::testing::TestWithParam> { meanvar(handle, raft::make_device_vector_view(mean_act.data(), params.cols), raft::make_device_vector_view(vars_act.data(), params.cols), - raft::make_device_matrix_view(data.data(), params.rows, params.cols), + raft::make_device_matrix_view(data.data(), params.rows, params.cols), params.sample); } else { using layout = raft::col_major; meanvar(handle, raft::make_device_vector_view(mean_act.data(), params.cols), raft::make_device_vector_view(vars_act.data(), params.cols), - raft::make_device_matrix_view(data.data(), params.rows, params.cols), + raft::make_device_matrix_view(data.data(), params.rows, params.cols), params.sample); } RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu index eb3e11c908..6f4c05bb72 100644 --- a/cpp/test/stats/minmax.cu +++ b/cpp/test/stats/minmax.cu @@ -119,7 +119,7 @@ class MinMaxTest : public ::testing::TestWithParam> { minmax_ref.data() + params.cols, stream); raft::stats::minmax(handle, - raft::make_device_matrix_view( + raft::make_device_matrix_view( data.data(), params.rows, params.cols), std::nullopt, std::nullopt, From 1e52d852fc0479f6dd09fe7b4e2cbb6f1cf5c93e Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Wed, 21 Sep 2022 17:53:37 +0200 Subject: [PATCH 11/40] Template fix, add static_assert and fix tests --- cpp/include/raft/stats/accuracy.cuh | 9 ++- .../raft/stats/adjusted_rand_index.cuh | 17 ++--- cpp/include/raft/stats/completeness_score.cuh | 11 ++- cpp/include/raft/stats/contingency_matrix.cuh | 68 +++++++++---------- cpp/include/raft/stats/cov.cuh | 3 + cpp/include/raft/stats/detail/histogram.cuh | 2 +- cpp/include/raft/stats/dispersion.cuh | 21 +++--- cpp/include/raft/stats/entropy.cuh | 9 ++- cpp/include/raft/stats/histogram.cuh | 17 ++--- cpp/include/raft/stats/homogeneity_score.cuh | 11 ++- .../raft/stats/information_criterion.cuh | 13 ++-- cpp/include/raft/stats/kl_divergence.cuh | 7 +- cpp/include/raft/stats/mean.cuh | 9 +-- cpp/include/raft/stats/mean_center.cuh | 52 +++++++------- cpp/include/raft/stats/meanvar.cuh | 9 +-- cpp/include/raft/stats/minmax.cuh | 31 ++++----- cpp/test/stats/contingencyMatrix.cu | 7 +- cpp/test/stats/cov.cu | 4 +- cpp/test/stats/dispersion.cu | 8 +-- cpp/test/stats/histogram.cu | 6 +- cpp/test/stats/kl_divergence.cu | 8 +-- cpp/test/stats/mean.cu | 8 +-- cpp/test/stats/mean_center.cu | 6 +- cpp/test/stats/meanvar.cu | 22 +++--- cpp/test/stats/minmax.cu | 20 +++--- 25 files changed, 187 insertions(+), 191 deletions(-) diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index 60e20f28f1..155ba7b527 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -50,15 +50,14 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, * @return: Accuracy score in [0, 1]; higher is better. */ template -float accuracy( - const raft::handle_t& handle, - raft::device_vector_view predictions, - raft::device_vector_view ref_predictions) +float accuracy(const raft::handle_t& handle, + raft::device_vector_view predictions, + raft::device_vector_view ref_predictions) { RAFT_EXPECTS(predictions.size() == ref_predictions.size(), "Size mismatch"); RAFT_EXPECTS(predictions.is_exhaustive(), "predictions must be contiguous"); RAFT_EXPECTS(ref_predictions.is_exhaustive(), "ref_predictions must be contiguous"); - + return detail::accuracy_score(predictions.data_handle(), ref_predictions.data_handle(), predictions.size(), diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 6e0094447d..fb0f534c1f 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -59,22 +59,19 @@ double adjusted_rand_index(const T* firstClusterArray, * @param firstClusterArray: the array of classes * @param secondClusterArray: the array of classes */ -template -double adjusted_rand_index( - const raft::handle_t& handle, - raft::device_vector_view firstClusterArray, - raft::device_vector_view secondClusterArray) +template +double adjusted_rand_index(const raft::handle_t& handle, + raft::device_vector_view firstClusterArray, + raft::device_vector_view secondClusterArray) { RAFT_EXPECTS(firstClusterArray.size() == secondClusterArray.size(), "Size mismatch"); RAFT_EXPECTS(firstClusterArray.is_exhaustive(), "firstClusterArray must be contiguous"); RAFT_EXPECTS(secondClusterArray.is_exhaustive(), "secondClusterArray must be contiguous"); return detail::compute_adjusted_rand_index(firstClusterArray.data_handle(), - secondClusterArray.data_handle(), - firstClusterArray.size(), - handle.get_stream()); + secondClusterArray.data_handle(), + firstClusterArray.size(), + handle.get_stream()); } }; // end namespace stats diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index e5251d39fc..2a4fed3d1c 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -59,12 +59,11 @@ double completeness_score(const T* truthClusterArray, * @param upperLabelRange: the upper bound of the range of labels */ template -double completeness_score( - const raft::handle_t& handle, - raft::device_vector_view truthClusterArray, - raft::device_vector_view predClusterArray, - DataT lowerLabelRange, - DataT upperLabelRange) +double completeness_score(const raft::handle_t& handle, + raft::device_vector_view truthClusterArray, + raft::device_vector_view predClusterArray, + DataT lowerLabelRange, + DataT upperLabelRange) { RAFT_EXPECTS(truthClusterArray.size() == predClusterArray.size(), "Size mismatch"); RAFT_EXPECTS(truthClusterArray.is_exhaustive(), "truthClusterArray must be contiguous"); diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 6288fc36e9..4977e18fa1 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -51,11 +51,10 @@ void getInputClassCardinality( * @param maxLabel: [out] calculated max value in input array */ template -void getInputClassCardinality( - const raft::handle_t& handle, - raft::device_vector_view groundTruth, - raft::host_scalar_view minLabel, - raft::host_scalar_view maxLabel) +void getInputClassCardinality(const raft::handle_t& handle, + raft::device_vector_view groundTruth, + raft::host_scalar_view minLabel, + raft::host_scalar_view maxLabel) { detail::getInputClassCardinality(groundTruth.data_handle(), groundTruth.extent(0), @@ -93,13 +92,11 @@ size_t getContingencyMatrixWorkspaceSize(int nSamples, * @param minLabel: Optional, min value in input array * @param maxLabel: Optional, max value in input array */ -template -size_t getContingencyMatrixWorkspaceSize( - const raft::handle_t& handle, - raft::device_vector_view groundTruth, - DataT minLabel = std::numeric_limits::max(), - DataT maxLabel = std::numeric_limits::max()) +template +size_t getContingencyMatrixWorkspaceSize(const raft::handle_t& handle, + raft::device_vector_view groundTruth, + DataT minLabel = std::numeric_limits::max(), + DataT maxLabel = std::numeric_limits::max()) { return detail::getContingencyMatrixWorkspaceSize( groundTruth.extent(0), groundTruth.data_handle(), handle.get_stream(), minLabel, maxLabel); @@ -164,37 +161,38 @@ void contingencyMatrix(const T* groundTruth, template -void contingencyMatrix( - const raft::handle_t& handle, - raft::device_vector_view groundTruth, - raft::device_vector_view predictedLabel, - raft::device_matrix_view outMat, - std::optional> workspace, - DataT minLabel = std::numeric_limits::max(), - DataT maxLabel = std::numeric_limits::max()) + typename LayoutPolicy, + typename WorkspaceType, + typename = raft::enable_if_mdspan> +void contingencyMatrix(const raft::handle_t& handle, + raft::device_vector_view groundTruth, + raft::device_vector_view predictedLabel, + raft::device_matrix_view outMat, + std::optional workspace, + DataT minLabel = std::numeric_limits::max(), + DataT maxLabel = std::numeric_limits::max()) { RAFT_EXPECTS(groundTruth.size() == predictedLabel.size(), "Size mismatch"); RAFT_EXPECTS(groundTruth.is_exhaustive(), "groundTruth must be contiguous"); RAFT_EXPECTS(predictedLabel.is_exhaustive(), "predictedLabel must be contiguous"); RAFT_EXPECTS(outMat.is_exhaustive(), "outMat must be contiguous"); - DataT* workspace_p = nullptr; - IdxType workspace_size = 0; - if (workspace.has_value()) - { - workspace_p = workspace.value().data_handle(); - workspace_size = workspace.value().size(); + using workspaceElemType = typename WorkspaceType::element_type; + workspaceElemType* workspace_p = nullptr; + IdxType workspace_size = 0; + if (workspace.has_value()) { + workspace_p = workspace.value().data_handle(); + workspace_size = workspace.value().size() * sizeof(workspaceElemType); } detail::contingencyMatrix(groundTruth.data_handle(), - predictedLabel.data_handle(), - groundTruth.size(), - outMat.data_handle(), - handle.get_stream(), - workspace_p, - workspace_size, - minLabel, - maxLabel); + predictedLabel.data_handle(), + groundTruth.size(), + outMat.data_handle(), + handle.get_stream(), + workspace_p, + workspace_size, + minLabel, + maxLabel); } }; // namespace stats diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 4e4abd16f6..fdc0e4acdf 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -86,6 +86,9 @@ void cov(const raft::handle_t& handle, bool sample, bool stable) { + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); RAFT_EXPECTS(data.size() == covar.size(), "Size mismatch"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); RAFT_EXPECTS(covar.is_exhaustive(), "covar must be contiguous"); diff --git a/cpp/include/raft/stats/detail/histogram.cuh b/cpp/include/raft/stats/detail/histogram.cuh index 65241f524f..b29149d1b9 100644 --- a/cpp/include/raft/stats/detail/histogram.cuh +++ b/cpp/include/raft/stats/detail/histogram.cuh @@ -465,7 +465,7 @@ HistType selectBestHistAlgo(IdxT nbins) * @param nbins number of bins * @param data input data (length = ncols * nrows) * @param nrows data array length in each column (or batch) - * @param ncols number of columsn (or batch size) + * @param ncols number of columns (or batch size) * @param stream cuda stream * @param binner the operation that computes the bin index of the input data * diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index 65d89f209d..7ccd37d0ee 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -74,23 +74,18 @@ DataT dispersion(const DataT* centroids, * @param nPoints number of points in the dataset * @return the cluster dispersion value */ -template -DataT dispersion( - const raft::handle_t& handle, - raft::device_matrix_view centroids, - raft::device_vector_view clusterSizes, - std::optional> globalCentroid, - const IdxType nPoints) +template +DataT dispersion(const raft::handle_t& handle, + raft::device_matrix_view centroids, + raft::device_vector_view clusterSizes, + std::optional> globalCentroid, + const IdxType nPoints) { - RAFT_EXPECTS(clusterSizes.size() == centroids.extent(0), "Size mismatch"); + RAFT_EXPECTS(clusterSizes.extent(0) == centroids.extent(0), "Size mismatch"); RAFT_EXPECTS(clusterSizes.is_exhaustive(), "clusterSizes must be contiguous"); DataT* globalCentroid_ptr = nullptr; - if (globalCentroid.has_value()) - { + if (globalCentroid.has_value()) { RAFT_EXPECTS(globalCentroid.value().is_exhaustive(), "globalCentroid must be contiguous"); globalCentroid_ptr = globalCentroid.value().data_handle(); } diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index 098ad5c433..65b9dfb4b0 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -59,11 +59,10 @@ double entropy(const T* clusterArray, * @return the entropy score */ template -double entropy( - const raft::handle_t& handle, - raft::device_vector_view clusterArray, - const DataT lowerLabelRange, - const DataT upperLabelRange) +double entropy(const raft::handle_t& handle, + raft::device_vector_view clusterArray, + const DataT lowerLabelRange, + const DataT upperLabelRange) { RAFT_EXPECTS(clusterArray.is_exhaustive(), "clusterArray must be contiguous"); return detail::entropy(clusterArray.data_handle(), diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index 81122f2f59..229db6ebba 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -72,28 +72,29 @@ void histogram(HistType type, * @tparam LayoutPolicy Layout type of the input data. * @param handle the raft handle * @param type histogram implementation type to choose - * @param bins the output bins (length = ncols * nbins) - * @param data input data (length = ncols * nrows) + * @param bins the output bins col-major (length = nbins * ncols) + * @param data input data col-major (length = nrows * ncols) * @param binner the operation that computes the bin index of the input data * * @note signature of BinnerOp is `int func(DataT, IdxT);` */ template , - typename LayoutPolicy> + typename BinnerOp = IdentityBinner> void histogram(const raft::handle_t& handle, HistType type, - raft::device_matrix_view bins, - raft::device_matrix_view data, + raft::device_matrix_view bins, + raft::device_matrix_view data, BinnerOp binner = IdentityBinner()) { - RAFT_EXPECTS(bins.extent(0) == data.extent(0), "Size mismatch"); + RAFT_EXPECTS(std::is_integral_v && data.extent(0) <= std::numeric_limits::max(), + "Index type not supported"); + RAFT_EXPECTS(bins.extent(1) == data.extent(1), "Size mismatch"); RAFT_EXPECTS(bins.is_exhaustive(), "bins must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); detail::histogram(type, bins.data_handle(), - bins.extent(1), + bins.extent(0), data.data_handle(), data.extent(0), data.extent(1), diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index 5b2d83a535..3f55bbb832 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -62,12 +62,11 @@ double homogeneity_score(const T* truthClusterArray, * @param upperLabelRange: the upper bound of the range of labels */ template -double homogeneity_score( - const raft::handle_t& handle, - raft::device_vector_view truthClusterArray, - raft::device_vector_view predClusterArray, - DataT lowerLabelRange, - DataT upperLabelRange) +double homogeneity_score(const raft::handle_t& handle, + raft::device_vector_view truthClusterArray, + raft::device_vector_view predClusterArray, + DataT lowerLabelRange, + DataT upperLabelRange) { RAFT_EXPECTS(truthClusterArray.size() == predClusterArray.size(), "Size mismatch"); RAFT_EXPECTS(truthClusterArray.is_exhaustive(), "truthClusterArray must be contiguous"); diff --git a/cpp/include/raft/stats/information_criterion.cuh b/cpp/include/raft/stats/information_criterion.cuh index 425d0369f8..65de7d6886 100644 --- a/cpp/include/raft/stats/information_criterion.cuh +++ b/cpp/include/raft/stats/information_criterion.cuh @@ -81,13 +81,12 @@ void information_criterion_batched(ScalarT* d_ic, * @param[in] n_samples Number of samples in each series */ template -void information_criterion_batched( - const raft::handle_t& handle, - raft::device_vector_view d_ic, - raft::device_vector_view d_loglikelihood, - IC_Type ic_type, - IdxType n_params, - IdxType n_samples) +void information_criterion_batched(const raft::handle_t& handle, + raft::device_vector_view d_ic, + raft::device_vector_view d_loglikelihood, + IC_Type ic_type, + IdxType n_params, + IdxType n_samples) { RAFT_EXPECTS(d_ic.size() == d_loglikelihood.size(), "Size mismatch"); RAFT_EXPECTS(d_ic.is_exhaustive(), "d_ic must be contiguous"); diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index cd9cc14c1b..f42c3853d7 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -54,10 +54,9 @@ DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, * @param candidatePDF: the candidate array of probability density functions of type DataT */ template -DataT kl_divergence( - const raft::handle_t& handle, - raft::device_vector_view modelPDF, - raft::device_vector_view candidatePDF) +DataT kl_divergence(const raft::handle_t& handle, + raft::device_vector_view modelPDF, + raft::device_vector_view candidatePDF) { RAFT_EXPECTS(modelPDF.size() == candidatePDF.size(), "Size mismatch"); RAFT_EXPECTS(modelPDF.is_exhaustive(), "modelPDF must be contiguous"); diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index a5807c4a10..d80e36d998 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -64,15 +64,16 @@ void mean( * @param sample: whether to evaluate sample mean or not. In other words, whether * to normalize the output using N-1 or N, for true or false, respectively */ -template +template void mean(const raft::handle_t& handle, raft::device_vector_view mu, raft::device_matrix_view data, bool sample) { - RAFT_EXPECTS(data.extent(0) == mu.size(), "Size mismatch betwen data and mu"); + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); + RAFT_EXPECTS(data.extent(0) == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); detail::mean(mu.data_handle(), diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 2db287ad20..8b2b1e5afd 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -64,28 +64,29 @@ void meanCenter(Type* out, * @param mu the mean vector * @param bcastAlongRows whether to broadcast vector along rows or columns */ -template +template void meanCenter(const raft::handle_t& handle, raft::device_matrix_view out, raft::device_matrix_view data, raft::device_vector_view mu, - bool bcastAlongRows) + bool bcastAlongRows, + std::integral_constant) { + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); RAFT_EXPECTS(out.size() == data.size(), "Size mismatch"); - RAFT_EXPECTS(data.extent(0) == mu.size(), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(data.extent(0) == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); detail::meanCenter(out.data_handle(), - data.data_handle(), - mu.data_handle(), - data.extent(1), - data.extent(0), - std::is_same_v, - bcastAlongRows, - handle.get_stream()); + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + bcastAlongRows, + handle.get_stream()); } /** @@ -127,28 +128,29 @@ void meanAdd(Type* out, * @param mu the mean vector * @param bcastAlongRows whether to broadcast vector along rows or columns */ -template +template void meanAdd(const raft::handle_t& handle, raft::device_matrix_view out, raft::device_matrix_view data, raft::device_vector_view mu, - bool bcastAlongRows) + bool bcastAlongRows, + std::integral_constant) { + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); RAFT_EXPECTS(out.size() == data.size(), "Size mismatch"); RAFT_EXPECTS(data.extent(0) == mu.size(), "Size mismatch betwen data and mu"); RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); detail::meanAdd(out.data_handle(), - data.data_handle(), - mu.data_handle(), - data.extent(1), - data.extent(0), - std::is_same_v, - bcastAlongRows, - handle.get_stream()); + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + bcastAlongRows, + handle.get_stream()); } }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index 6f19ee9581..b37da4478c 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -74,16 +74,17 @@ void meanvar(Type* mean, * @param [in] sample whether to evaluate sample variance or not. In other words, whether to * normalize the variance using N-1 or N, for true or false respectively. */ -template +template void meanvar(const raft::handle_t& handle, raft::device_vector_view mean, raft::device_vector_view var, raft::device_matrix_view data, bool sample) { - RAFT_EXPECTS(data.extent(0) == var.size(), "Size mismatch betwen data and var"); + static_assert( + std::is_same_v || std::is_same_v, + "Data layout not supported"); + RAFT_EXPECTS(data.extent(0) == var.extent(0), "Size mismatch betwen data and var"); RAFT_EXPECTS(mean.size() == var.size(), "Size mismatch betwen mean and var"); RAFT_EXPECTS(mean.is_exhaustive(), "mean must be contiguous"); RAFT_EXPECTS(var.is_exhaustive(), "var must be contiguous"); diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index 7749bb2551..a972c513b3 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -87,26 +87,25 @@ void minmax(const T* data, * @param globalmin final col-wise global minimum (size = ncols) * @param globalmax final col-wise global maximum (size = ncols) * @param sampledcols output sampled data. Pass nullptr if you don't need this + * @param TPB threads_pre_block * @note This method makes the following assumptions: * 1. input and output matrices are assumed to be col-major * 2. ncols is small enough to fit the whole of min/max values across all cols * in shared memory */ -template +template void minmax(const raft::handle_t& handle, raft::device_matrix_view data, std::optional> rowids, std::optional> colids, raft::device_vector_view globalmin, raft::device_vector_view globalmax, - std::optional> sampledcols) + std::optional> sampledcols, + std::integral_constant) { const unsigned* rowids_ptr = nullptr; const unsigned* colids_ptr = nullptr; - T* sampledcols_ptr = nullptr; + DataT* sampledcols_ptr = nullptr; auto nrows = data.extent(0); auto ncols = data.extent(1); auto row_stride = data.stride(1); @@ -119,16 +118,16 @@ void minmax(const raft::handle_t& handle, ncols = colids.value().extent(0); } if (sampledcols.has_value()) { sampledcols_ptr = sampledcols.value().data_handle(); } - detail::minmax(data.data_handle(), - rowids_ptr, - colids_ptr, - nrows, - ncols, - row_stride, - globalmin.data_handle(), - globalmax.data_handle(), - sampledcols_ptr, - handle.get_stream()); + detail::minmax(data.data_handle(), + rowids_ptr, + colids_ptr, + nrows, + ncols, + row_stride, + globalmin.data_handle(), + globalmax.data_handle(), + sampledcols_ptr, + handle.get_stream()); } }; // namespace stats diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu index df09ad2523..c07aab4392 100644 --- a/cpp/test/stats/contingencyMatrix.cu +++ b/cpp/test/stats/contingencyMatrix.cu @@ -123,11 +123,10 @@ class ContingencyMatrixTest : public ::testing::TestWithParam(dY.data(), numElements), + raft::make_device_vector_view(dYHat.data(), numElements), raft::make_device_matrix_view(dComputedOutput.data(), numUniqueClasses, numUniqueClasses), - (void*)pWorkspace.data(), - workspaceSz, + std::make_optional(raft::make_device_vector_view(pWorkspace.data(), workspaceSz)), minLabel, maxLabel); diff --git a/cpp/test/stats/cov.cu b/cpp/test/stats/cov.cu index dd782f0600..97caffc7a9 100644 --- a/cpp/test/stats/cov.cu +++ b/cpp/test/stats/cov.cu @@ -74,7 +74,7 @@ class CovTest : public ::testing::TestWithParam> { cov(handle, raft::make_device_matrix_view(cov_act.data(), cols, cols), raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(mean_act.data(), cols), params.sample, params.stable); } else { @@ -82,7 +82,7 @@ class CovTest : public ::testing::TestWithParam> { cov(handle, raft::make_device_matrix_view(cov_act.data(), cols, cols), raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(mean_act.data(), cols), params.sample, params.stable); } diff --git a/cpp/test/stats/dispersion.cu b/cpp/test/stats/dispersion.cu index 75369f794c..53ecec780e 100644 --- a/cpp/test/stats/dispersion.cu +++ b/cpp/test/stats/dispersion.cu @@ -64,11 +64,11 @@ class DispersionTest : public ::testing::TestWithParam> { for (const auto& val : h_counts) { npoints += val; } - actualVal = dispersion( + actualVal = dispersion( handle, - raft::make_device_matrix_view(data.data(), params.clusters, params.dim), - raft::make_device_vector_view(counts.data(), params.clusters), - std::make_optional(raft::make_device_vector_view(act_mean.data(), params.dim)), + raft::make_device_matrix_view(data.data(), params.clusters, params.dim), + raft::make_device_vector_view(counts.data(), params.clusters), + std::make_optional(raft::make_device_vector_view(act_mean.data(), params.dim)), npoints); expectedVal = T(0); std::vector h_data(len, T(0)); diff --git a/cpp/test/stats/histogram.cu b/cpp/test/stats/histogram.cu index d759e6c9f4..cbf528d700 100644 --- a/cpp/test/stats/histogram.cu +++ b/cpp/test/stats/histogram.cu @@ -86,8 +86,10 @@ class HistTest : public ::testing::TestWithParam { naiveHist(ref_bins.data(), params.nbins, in.data(), params.nrows, params.ncols, stream); histogram(handle, params.type, - raft::make_device_matrix_view(bins.data(), params.nbins, params.ncols), - raft::make_device_matrix_view(in.data(), params.nrows, params.ncols)); + raft::make_device_matrix_view( + bins.data(), params.nbins, params.ncols), + raft::make_device_matrix_view( + in.data(), params.nrows, params.ncols)); handle.sync_stream(); } diff --git a/cpp/test/stats/kl_divergence.cu b/cpp/test/stats/kl_divergence.cu index ca8a6735f8..0199ba8b9a 100644 --- a/cpp/test/stats/kl_divergence.cu +++ b/cpp/test/stats/kl_divergence.cu @@ -73,10 +73,10 @@ class klDivergenceTest : public ::testing::TestWithParam { } // calling the kl_divergence CUDA implementation - computedklDivergence = - raft::stats::kl_divergence(handle, - raft::make_device_vector_view(d_modelPDF.data(), nElements), - raft::make_device_vector_view(d_candidatePDF.data(), nElements)); + computedklDivergence = raft::stats::kl_divergence( + handle, + raft::make_device_vector_view(d_modelPDF.data(), nElements), + raft::make_device_vector_view(d_candidatePDF.data(), nElements)); } // declaring the data values diff --git a/cpp/test/stats/mean.cu b/cpp/test/stats/mean.cu index a485ae18c8..1fcc72d773 100644 --- a/cpp/test/stats/mean.cu +++ b/cpp/test/stats/mean.cu @@ -68,14 +68,14 @@ class MeanTest : public ::testing::TestWithParam> { if (params.rowMajor) { using layout = raft::row_major; mean(handle, - raft::make_device_vector_view(mean_act.data(), rows * cols), - raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), rows * cols), + raft::make_device_matrix_view(data, rows, cols), params.sample); } else { using layout = raft::col_major; mean(handle, - raft::make_device_vector_view(mean_act.data(), rows * cols), - raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), rows * cols), + raft::make_device_matrix_view(data, rows, cols), params.sample); } } diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu index 251913e35e..a4a34bf3cc 100644 --- a/cpp/test/stats/mean_center.cu +++ b/cpp/test/stats/mean_center.cu @@ -69,14 +69,16 @@ class MeanCenterTest : public ::testing::TestWithParam(out.data(), rows, cols), raft::make_device_matrix_view(data.data(), rows, cols), raft::make_device_vector_view(meanVec.data(), meanVecSize), - params.bcastAlongRows); + params.bcastAlongRows, + std::integral_constant{}); } else { using layout = raft::col_major; meanCenter(handle, raft::make_device_matrix_view(out.data(), rows, cols), raft::make_device_matrix_view(data.data(), rows, cols), raft::make_device_vector_view(meanVec.data(), meanVecSize), - params.bcastAlongRows); + params.bcastAlongRows, + std::integral_constant{}); } raft::linalg::naiveMatVec(out_ref.data(), data.data(), diff --git a/cpp/test/stats/meanvar.cu b/cpp/test/stats/meanvar.cu index d261db9e34..9e0f6013df 100644 --- a/cpp/test/stats/meanvar.cu +++ b/cpp/test/stats/meanvar.cu @@ -70,18 +70,20 @@ class MeanVarTest : public ::testing::TestWithParam> { if (params.rowMajor) { using layout = raft::row_major; - meanvar(handle, - raft::make_device_vector_view(mean_act.data(), params.cols), - raft::make_device_vector_view(vars_act.data(), params.cols), - raft::make_device_matrix_view(data.data(), params.rows, params.cols), - params.sample); + meanvar( + handle, + raft::make_device_vector_view(mean_act.data(), params.cols), + raft::make_device_vector_view(vars_act.data(), params.cols), + raft::make_device_matrix_view(data.data(), params.rows, params.cols), + params.sample); } else { using layout = raft::col_major; - meanvar(handle, - raft::make_device_vector_view(mean_act.data(), params.cols), - raft::make_device_vector_view(vars_act.data(), params.cols), - raft::make_device_matrix_view(data.data(), params.rows, params.cols), - params.sample); + meanvar( + handle, + raft::make_device_vector_view(mean_act.data(), params.cols), + raft::make_device_vector_view(vars_act.data(), params.cols), + raft::make_device_matrix_view(data.data(), params.rows, params.cols), + params.sample); } RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); } diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu index 6f4c05bb72..dde73c9744 100644 --- a/cpp/test/stats/minmax.cu +++ b/cpp/test/stats/minmax.cu @@ -118,16 +118,16 @@ class MinMaxTest : public ::testing::TestWithParam> { minmax_ref.data(), minmax_ref.data() + params.cols, stream); - raft::stats::minmax(handle, - raft::make_device_matrix_view( - data.data(), params.rows, params.cols), - std::nullopt, - std::nullopt, - raft::make_device_vector_view( - minmax_act.data(), params.cols), - raft::make_device_vector_view( - minmax_act.data() + params.cols, params.cols), - std::nullopt); + raft::stats::minmax( + handle, + raft::make_device_matrix_view( + data.data(), params.rows, params.cols), + std::nullopt, + std::nullopt, + raft::make_device_vector_view(minmax_act.data(), params.cols), + raft::make_device_vector_view(minmax_act.data() + params.cols, params.cols), + std::nullopt, + std::integral_constant{}); } protected: From 37894d7a556b26f06b4b0718bd2136c0326550c1 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Wed, 21 Sep 2022 18:03:33 +0200 Subject: [PATCH 12/40] Add optional argument to contingency matrix --- cpp/include/raft/stats/contingency_matrix.cuh | 12 ++++++++---- cpp/test/stats/contingencyMatrix.cu | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 4977e18fa1..f067f100d4 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -169,8 +169,8 @@ void contingencyMatrix(const raft::handle_t& handle, raft::device_vector_view predictedLabel, raft::device_matrix_view outMat, std::optional workspace, - DataT minLabel = std::numeric_limits::max(), - DataT maxLabel = std::numeric_limits::max()) + std::optional minLabel = std::nullopt, + std::optional maxLabel = std::nullopt) { RAFT_EXPECTS(groundTruth.size() == predictedLabel.size(), "Size mismatch"); RAFT_EXPECTS(groundTruth.is_exhaustive(), "groundTruth must be contiguous"); @@ -184,6 +184,10 @@ void contingencyMatrix(const raft::handle_t& handle, workspace_p = workspace.value().data_handle(); workspace_size = workspace.value().size() * sizeof(workspaceElemType); } + DataT minLabelValue = std::numeric_limits::max(); + DataT maxLabelValue = std::numeric_limits::max(); + if (minLabel.has_value()) { minLabelValue = minLabel.value(); } + if (maxLabel.has_value()) { maxLabelValue = maxLabel.value(); } detail::contingencyMatrix(groundTruth.data_handle(), predictedLabel.data_handle(), groundTruth.size(), @@ -191,8 +195,8 @@ void contingencyMatrix(const raft::handle_t& handle, handle.get_stream(), workspace_p, workspace_size, - minLabel, - maxLabel); + minLabelValue, + maxLabelValue); } }; // namespace stats diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu index c07aab4392..2a59d709d6 100644 --- a/cpp/test/stats/contingencyMatrix.cu +++ b/cpp/test/stats/contingencyMatrix.cu @@ -127,8 +127,8 @@ class ContingencyMatrixTest : public ::testing::TestWithParam(dYHat.data(), numElements), raft::make_device_matrix_view(dComputedOutput.data(), numUniqueClasses, numUniqueClasses), std::make_optional(raft::make_device_vector_view(pWorkspace.data(), workspaceSz)), - minLabel, - maxLabel); + std::make_optional(minLabel), + std::make_optional(maxLabel)); raft::interruptible::synchronize(stream); ASSERT_TRUE(raft::devArrMatch(dComputedOutput.data(), From 8e95d2ff3efc0581ab26808310d20a8c53fb78a8 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Wed, 21 Sep 2022 18:38:43 +0200 Subject: [PATCH 13/40] Prefer extent over size, change workspace type of contingency --- cpp/include/raft/stats/accuracy.cuh | 2 +- .../raft/stats/adjusted_rand_index.cuh | 2 +- cpp/include/raft/stats/completeness_score.cuh | 2 +- cpp/include/raft/stats/contingency_matrix.cuh | 25 +++++++++---------- cpp/include/raft/stats/entropy.cuh | 2 +- cpp/include/raft/stats/homogeneity_score.cuh | 2 +- cpp/include/raft/stats/kl_divergence.cuh | 2 +- cpp/test/stats/contingencyMatrix.cu | 2 +- 8 files changed, 19 insertions(+), 20 deletions(-) diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index 155ba7b527..feb31688fb 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -60,7 +60,7 @@ float accuracy(const raft::handle_t& handle, return detail::accuracy_score(predictions.data_handle(), ref_predictions.data_handle(), - predictions.size(), + predictions.extent(0), handle.get_stream()); } } // namespace stats diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index fb0f534c1f..579800fdc9 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -70,7 +70,7 @@ double adjusted_rand_index(const raft::handle_t& handle, return detail::compute_adjusted_rand_index(firstClusterArray.data_handle(), secondClusterArray.data_handle(), - firstClusterArray.size(), + firstClusterArray.extent(0), handle.get_stream()); } diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 2a4fed3d1c..50780309f0 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -70,7 +70,7 @@ double completeness_score(const raft::handle_t& handle, RAFT_EXPECTS(predClusterArray.is_exhaustive(), "predClusterArray must be contiguous"); return detail::homogeneity_score(predClusterArray.data_handle(), truthClusterArray.data_handle(), - truthClusterArray.size(), + truthClusterArray.extent(0), lowerLabelRange, upperLabelRange, handle.get_stream()); diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index f067f100d4..db404ff45e 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -162,27 +162,26 @@ template > -void contingencyMatrix(const raft::handle_t& handle, - raft::device_vector_view groundTruth, - raft::device_vector_view predictedLabel, - raft::device_matrix_view outMat, - std::optional workspace, - std::optional minLabel = std::nullopt, - std::optional maxLabel = std::nullopt) + typename WorkspaceDataType> +void contingencyMatrix( + const raft::handle_t& handle, + raft::device_vector_view groundTruth, + raft::device_vector_view predictedLabel, + raft::device_matrix_view outMat, + std::optional> workspace, + std::optional minLabel = std::nullopt, + std::optional maxLabel = std::nullopt) { RAFT_EXPECTS(groundTruth.size() == predictedLabel.size(), "Size mismatch"); RAFT_EXPECTS(groundTruth.is_exhaustive(), "groundTruth must be contiguous"); RAFT_EXPECTS(predictedLabel.is_exhaustive(), "predictedLabel must be contiguous"); RAFT_EXPECTS(outMat.is_exhaustive(), "outMat must be contiguous"); - using workspaceElemType = typename WorkspaceType::element_type; - workspaceElemType* workspace_p = nullptr; + WorkspaceDataType* workspace_p = nullptr; IdxType workspace_size = 0; if (workspace.has_value()) { workspace_p = workspace.value().data_handle(); - workspace_size = workspace.value().size() * sizeof(workspaceElemType); + workspace_size = workspace.value().size() * sizeof(WorkspaceDataType); } DataT minLabelValue = std::numeric_limits::max(); DataT maxLabelValue = std::numeric_limits::max(); @@ -190,7 +189,7 @@ void contingencyMatrix(const raft::handle_t& handle, if (maxLabel.has_value()) { maxLabelValue = maxLabel.value(); } detail::contingencyMatrix(groundTruth.data_handle(), predictedLabel.data_handle(), - groundTruth.size(), + groundTruth.extent(0), outMat.data_handle(), handle.get_stream(), workspace_p, diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index 65b9dfb4b0..a3800a0628 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -66,7 +66,7 @@ double entropy(const raft::handle_t& handle, { RAFT_EXPECTS(clusterArray.is_exhaustive(), "clusterArray must be contiguous"); return detail::entropy(clusterArray.data_handle(), - clusterArray.size(), + clusterArray.extent(0), lowerLabelRange, upperLabelRange, handle.get_stream()); diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index 3f55bbb832..641f16ab9e 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -73,7 +73,7 @@ double homogeneity_score(const raft::handle_t& handle, RAFT_EXPECTS(predClusterArray.is_exhaustive(), "predClusterArray must be contiguous"); return detail::homogeneity_score(truthClusterArray.data_handle(), predClusterArray.data_handle(), - truthClusterArray.size(), + truthClusterArray.extent(0), lowerLabelRange, upperLabelRange, handle.get_stream()); diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index f42c3853d7..eabf95454b 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -62,7 +62,7 @@ DataT kl_divergence(const raft::handle_t& handle, RAFT_EXPECTS(modelPDF.is_exhaustive(), "modelPDF must be contiguous"); RAFT_EXPECTS(candidatePDF.is_exhaustive(), "candidatePDF must be contiguous"); return detail::kl_divergence( - modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.size(), handle.get_stream()); + modelPDF.data_handle(), candidatePDF.data_handle(), modelPDF.extent(0), handle.get_stream()); } }; // end namespace stats diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu index 2a59d709d6..76672f2c37 100644 --- a/cpp/test/stats/contingencyMatrix.cu +++ b/cpp/test/stats/contingencyMatrix.cu @@ -126,7 +126,7 @@ class ContingencyMatrixTest : public ::testing::TestWithParam(dY.data(), numElements), raft::make_device_vector_view(dYHat.data(), numElements), raft::make_device_matrix_view(dComputedOutput.data(), numUniqueClasses, numUniqueClasses), - std::make_optional(raft::make_device_vector_view(pWorkspace.data(), workspaceSz)), + std::make_optional(raft::make_device_vector_view(pWorkspace.data(), workspaceSz)), std::make_optional(minLabel), std::make_optional(maxLabel)); From e9a929c71c6c15978abd62ab5126d0474c7ad721 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Thu, 22 Sep 2022 16:09:02 +0200 Subject: [PATCH 14/40] Add device_mdspan include. Fix parameter order --- cpp/include/raft/stats/accuracy.cuh | 2 +- .../raft/stats/adjusted_rand_index.cuh | 2 +- cpp/include/raft/stats/completeness_score.cuh | 2 +- cpp/include/raft/stats/contingency_matrix.cuh | 27 ++++++++++++++----- cpp/include/raft/stats/cov.cuh | 8 +++--- cpp/include/raft/stats/dispersion.cuh | 6 ++--- cpp/include/raft/stats/entropy.cuh | 4 +-- cpp/include/raft/stats/histogram.cuh | 7 +++-- cpp/include/raft/stats/homogeneity_score.cuh | 2 +- .../raft/stats/information_criterion.cuh | 7 ++--- cpp/include/raft/stats/kl_divergence.cuh | 2 +- cpp/include/raft/stats/mean.cuh | 6 ++--- cpp/include/raft/stats/mean_center.cuh | 10 +++---- cpp/include/raft/stats/meanvar.cuh | 6 ++--- cpp/include/raft/stats/minmax.cuh | 2 +- cpp/test/nvtx.cpp | 2 +- cpp/test/stats/adjusted_rand_index.cu | 2 +- cpp/test/stats/cov.cu | 4 +-- cpp/test/stats/dispersion.cu | 3 ++- cpp/test/stats/histogram.cu | 12 ++++----- cpp/test/stats/information_criterion.cu | 2 +- cpp/test/stats/mean.cu | 4 +-- cpp/test/stats/mean_center.cu | 4 +-- cpp/test/stats/meanvar.cu | 4 +-- cpp/test/stats/minmax.cu | 2 +- 25 files changed, 73 insertions(+), 59 deletions(-) diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index feb31688fb..ef856724aa 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -19,7 +19,7 @@ #pragma once -#include +#include #include namespace raft { diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 579800fdc9..4ec8c0efa0 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -24,7 +24,7 @@ #pragma once -#include +#include #include namespace raft { diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 50780309f0..ea72ed86cd 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -19,7 +19,7 @@ #pragma once -#include +#include #include namespace raft { diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index db404ff45e..1179ce9404 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -19,7 +19,9 @@ #pragma once -#include +#include +#include +#include #include namespace raft { @@ -44,9 +46,10 @@ void getInputClassCardinality( /** * @brief use this to allocate output matrix size * size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int) + * @tparam DataT label type + * @tparam IdxType Index type of matrix extent. * @param handle: the raft handle. * @param groundTruth: device 1-d array for ground truth (num of rows) - * @param nSamples: number of elements in input array * @param minLabel: [out] calculated min value in input array * @param maxLabel: [out] calculated max value in input array */ @@ -56,6 +59,8 @@ void getInputClassCardinality(const raft::handle_t& handle, raft::host_scalar_view minLabel, raft::host_scalar_view maxLabel) { + RAFT_EXPECTS(minLabel.data_handle() != nullptr, "Invalid minLabel pointer"); + RAFT_EXPECTS(maxLabel.data_handle() != nullptr, "Invalid maxLabel pointer"); detail::getInputClassCardinality(groundTruth.data_handle(), groundTruth.extent(0), handle.get_stream(), @@ -86,7 +91,8 @@ size_t getContingencyMatrixWorkspaceSize(int nSamples, /** * @brief Calculate workspace size for running contingency matrix calculations - * @tparam T label type + * @tparam DataT label type + * @tparam IdxType Index type of matrix extent. * @param handle: the raft handle. * @param groundTruth: device 1-d array for ground truth (num of rows) * @param minLabel: Optional, min value in input array @@ -95,11 +101,18 @@ size_t getContingencyMatrixWorkspaceSize(int nSamples, template size_t getContingencyMatrixWorkspaceSize(const raft::handle_t& handle, raft::device_vector_view groundTruth, - DataT minLabel = std::numeric_limits::max(), - DataT maxLabel = std::numeric_limits::max()) + std::optional minLabel = std::nullopt, + std::optional maxLabel = std::nullopt) { - return detail::getContingencyMatrixWorkspaceSize( - groundTruth.extent(0), groundTruth.data_handle(), handle.get_stream(), minLabel, maxLabel); + DataT minLabelValue = std::numeric_limits::max(); + DataT maxLabelValue = std::numeric_limits::max(); + if (minLabel.has_value()) { minLabelValue = minLabel.value(); } + if (maxLabel.has_value()) { maxLabelValue = maxLabel.value(); } + return detail::getContingencyMatrixWorkspaceSize(groundTruth.extent(0), + groundTruth.data_handle(), + handle.get_stream(), + minLabelValue, + maxLabelValue); } /** diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index fdc0e4acdf..6d8c8683f6 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -19,7 +19,7 @@ #pragma once -#include +#include #include namespace raft { namespace stats { @@ -64,13 +64,13 @@ void cov(const raft::handle_t& handle, * * Mean operation is assumed to be performed on a given column. * - * @tparam Type the data type + * @tparam DataT the data type * @tparam IdxT the index type * @tparam LayoutPolicy Layout type of the input data. * @param handle the raft handle - * @param covar the output covariance matrix * @param data the input matrix (this will get mean-centered at the end!) * @param mu mean vector of the input matrix + * @param covar the output covariance matrix * @param sample whether to evaluate sample covariance or not. In other words, * whether to normalize the output using N-1 or N, for true or false, * respectively @@ -80,9 +80,9 @@ void cov(const raft::handle_t& handle, */ template void cov(const raft::handle_t& handle, - raft::device_matrix_view covar, raft::device_matrix_view data, raft::device_vector_view mu, + raft::device_matrix_view covar, bool sample, bool stable) { diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index 7ccd37d0ee..c6e8eab3d5 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -20,7 +20,7 @@ #pragma once #include -#include +#include #include namespace raft { @@ -62,7 +62,6 @@ DataT dispersion(const DataT* centroids, * automatically finding the 'k' (in kmeans) that improves this metric. * @tparam DataT data type * @tparam IdxType index type - * @tparam LayoutPolicy Layout type of the input data. * @tparam TPB threads block for kernels launched * @param handle the raft handle * @param centroids the cluster centroids. This is assumed to be row-major @@ -79,7 +78,8 @@ DataT dispersion(const raft::handle_t& handle, raft::device_matrix_view centroids, raft::device_vector_view clusterSizes, std::optional> globalCentroid, - const IdxType nPoints) + const IdxType nPoints, + std::integral_constant) { RAFT_EXPECTS(clusterSizes.extent(0) == centroids.extent(0), "Size mismatch"); RAFT_EXPECTS(clusterSizes.is_exhaustive(), "clusterSizes must be contiguous"); diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index a3800a0628..5e474f909f 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -18,7 +18,7 @@ #define __ENTROPY_H #pragma once -#include +#include #include namespace raft { @@ -51,7 +51,7 @@ double entropy(const T* clusterArray, * more info on entropy * * @tparam DataT data type - * @tparam IdxT index type + * @tparam IdxType index type * @param handle the raft handle * @param clusterArray: the array of classes of type DataT * @param lowerLabelRange: the lower bound of the range of labels diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index 229db6ebba..1d9e2b2194 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -19,7 +19,7 @@ #pragma once -#include +#include #include #include @@ -69,11 +69,10 @@ void histogram(HistType type, * @tparam DataT input data type * @tparam IdxType data type used to compute indices * @tparam BinnerOp takes the input data and computes its bin index - * @tparam LayoutPolicy Layout type of the input data. * @param handle the raft handle * @param type histogram implementation type to choose - * @param bins the output bins col-major (length = nbins * ncols) * @param data input data col-major (length = nrows * ncols) + * @param bins the output bins col-major (length = nbins * ncols) * @param binner the operation that computes the bin index of the input data * * @note signature of BinnerOp is `int func(DataT, IdxT);` @@ -83,8 +82,8 @@ template > void histogram(const raft::handle_t& handle, HistType type, - raft::device_matrix_view bins, raft::device_matrix_view data, + raft::device_matrix_view bins, BinnerOp binner = IdentityBinner()) { RAFT_EXPECTS(std::is_integral_v && data.extent(0) <= std::numeric_limits::max(), diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index 641f16ab9e..f7f90e928b 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -19,7 +19,7 @@ #pragma once -#include +#include #include namespace raft { diff --git a/cpp/include/raft/stats/information_criterion.cuh b/cpp/include/raft/stats/information_criterion.cuh index 65de7d6886..7ec1cd8812 100644 --- a/cpp/include/raft/stats/information_criterion.cuh +++ b/cpp/include/raft/stats/information_criterion.cuh @@ -29,7 +29,8 @@ #pragma once -#include +#include +#include #include #include @@ -73,17 +74,17 @@ void information_criterion_batched(ScalarT* d_ic, * @tparam DataT data type * @tparam IdxType index type * @param[in] handle the raft handle + * @param[in] d_loglikelihood Log-likelihood for each series (device) length: batch_size * @param[out] d_ic Information criterion to be returned for each * series (device) length: batch_size - * @param[in] d_loglikelihood Log-likelihood for each series (device) length: batch_size * @param[in] ic_type Type of criterion to compute. See IC_Type * @param[in] n_params Number of parameters in the model * @param[in] n_samples Number of samples in each series */ template void information_criterion_batched(const raft::handle_t& handle, - raft::device_vector_view d_ic, raft::device_vector_view d_loglikelihood, + raft::device_vector_view d_ic, IC_Type ic_type, IdxType n_params, IdxType n_samples) diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index eabf95454b..d81ece33eb 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -19,7 +19,7 @@ #pragma once -#include +#include #include namespace raft { diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index b2175f9884..b5e7284129 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -19,8 +19,8 @@ #pragma once +#include #include -#include #include namespace raft { @@ -59,15 +59,15 @@ void mean( * @tparam IdxType index type * @tparam LayoutPolicy Layout type of the input matrix. * @param handle the raft handle - * @param mu: the output mean vector * @param data: the input matrix + * @param mu: the output mean vector * @param sample: whether to evaluate sample mean or not. In other words, whether * to normalize the output using N-1 or N, for true or false, respectively */ template void mean(const raft::handle_t& handle, - raft::device_vector_view mu, raft::device_matrix_view data, + raft::device_vector_view mu, bool sample) { static_assert( diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 8b2b1e5afd..3d14c7fa86 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -19,7 +19,7 @@ #pragma once -#include +#include #include namespace raft { @@ -59,16 +59,16 @@ void meanCenter(Type* out, * @tparam LayoutPolicy Layout type of the input matrix. * @tparam TPB threads per block of the cuda kernel launched * @param handle the raft handle - * @param out the output mean-centered matrix * @param data input matrix * @param mu the mean vector + * @param out the output mean-centered matrix * @param bcastAlongRows whether to broadcast vector along rows or columns */ template void meanCenter(const raft::handle_t& handle, - raft::device_matrix_view out, raft::device_matrix_view data, raft::device_vector_view mu, + raft::device_matrix_view out, bool bcastAlongRows, std::integral_constant) { @@ -123,16 +123,16 @@ void meanAdd(Type* out, * @tparam LayoutPolicy Layout type of the input matrix. * @tparam TPB threads per block of the cuda kernel launched * @param handle the raft handle - * @param out the output mean-centered matrix * @param data input matrix * @param mu the mean vector + * @param out the output mean-centered matrix * @param bcastAlongRows whether to broadcast vector along rows or columns */ template void meanAdd(const raft::handle_t& handle, - raft::device_matrix_view out, raft::device_matrix_view data, raft::device_vector_view mu, + raft::device_matrix_view out, bool bcastAlongRows, std::integral_constant) { diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index b37da4478c..f2e9b56bea 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -18,7 +18,7 @@ #pragma once -#include +#include #include namespace raft::stats { @@ -68,17 +68,17 @@ void meanvar(Type* mean, * @tparam IdxType Integer type used for addressing * @tparam LayoutPolicy Layout type of the input matrix. * @param handle the raft handle + * @param [in] data the input matrix of size [N, D] * @param [out] mean the output mean vector of size D * @param [out] var the output variance vector of size D - * @param [in] data the input matrix of size [N, D] * @param [in] sample whether to evaluate sample variance or not. In other words, whether to * normalize the variance using N-1 or N, for true or false respectively. */ template void meanvar(const raft::handle_t& handle, + raft::device_matrix_view data, raft::device_vector_view mean, raft::device_vector_view var, - raft::device_matrix_view data, bool sample) { static_assert( diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index 31f75f131e..fdc47a785d 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -19,7 +19,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/cpp/test/nvtx.cpp b/cpp/test/nvtx.cpp index 81f692a215..d982642929 100644 --- a/cpp/test/nvtx.cpp +++ b/cpp/test/nvtx.cpp @@ -15,7 +15,7 @@ */ #ifdef NVTX_ENABLED #include -#include +#include /** * tests for the functionality of generating next color based on string * entered in the NVTX Range marker wrappers diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/test/stats/adjusted_rand_index.cu index 7eabb9b035..e7e892b7fd 100644 --- a/cpp/test/stats/adjusted_rand_index.cu +++ b/cpp/test/stats/adjusted_rand_index.cu @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/test/stats/cov.cu b/cpp/test/stats/cov.cu index a799610339..890c5b7826 100644 --- a/cpp/test/stats/cov.cu +++ b/cpp/test/stats/cov.cu @@ -72,17 +72,17 @@ class CovTest : public ::testing::TestWithParam> { if (params.rowMajor) { using layout = raft::row_major; cov(handle, - raft::make_device_matrix_view(cov_act.data(), cols, cols), raft::make_device_matrix_view(data.data(), rows, cols), raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_matrix_view(cov_act.data(), cols, cols), params.sample, params.stable); } else { using layout = raft::col_major; cov(handle, - raft::make_device_matrix_view(cov_act.data(), cols, cols), raft::make_device_matrix_view(data.data(), rows, cols), raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_matrix_view(cov_act.data(), cols, cols), params.sample, params.stable); } diff --git a/cpp/test/stats/dispersion.cu b/cpp/test/stats/dispersion.cu index 485b80364e..5cb353a69a 100644 --- a/cpp/test/stats/dispersion.cu +++ b/cpp/test/stats/dispersion.cu @@ -69,7 +69,8 @@ class DispersionTest : public ::testing::TestWithParam> { raft::make_device_matrix_view(data.data(), params.clusters, params.dim), raft::make_device_vector_view(counts.data(), params.clusters), std::make_optional(raft::make_device_vector_view(act_mean.data(), params.dim)), - npoints); + npoints, + std::integral_constant{}); expectedVal = T(0); std::vector h_data(len, T(0)); raft::update_host(&(h_data[0]), data.data(), len, stream); diff --git a/cpp/test/stats/histogram.cu b/cpp/test/stats/histogram.cu index 76b5d27a5b..8581cf9492 100644 --- a/cpp/test/stats/histogram.cu +++ b/cpp/test/stats/histogram.cu @@ -84,12 +84,12 @@ class HistTest : public ::testing::TestWithParam { RAFT_CUDA_TRY( cudaMemsetAsync(ref_bins.data(), 0, sizeof(int) * params.nbins * params.ncols, stream)); naiveHist(ref_bins.data(), params.nbins, in.data(), params.nrows, params.ncols, stream); - histogram(handle, - params.type, - raft::make_device_matrix_view( - bins.data(), params.nbins, params.ncols), - raft::make_device_matrix_view( - in.data(), params.nrows, params.ncols)); + histogram(handle, + params.type, + raft::make_device_matrix_view( + in.data(), params.nrows, params.ncols), + raft::make_device_matrix_view( + bins.data(), params.nbins, params.ncols)); handle.sync_stream(); } diff --git a/cpp/test/stats/information_criterion.cu b/cpp/test/stats/information_criterion.cu index 0cc5cb78d8..1fb0d0ad30 100644 --- a/cpp/test/stats/information_criterion.cu +++ b/cpp/test/stats/information_criterion.cu @@ -91,8 +91,8 @@ class BatchedICTest : public ::testing::TestWithParam> { // Compute the tested results information_criterion_batched( handle, - raft::make_device_vector_view(res_d.data(), params.batch_size), raft::make_device_vector_view(loglike_d.data(), params.batch_size), + raft::make_device_vector_view(res_d.data(), params.batch_size), params.ic_type, params.n_params, params.n_samples); diff --git a/cpp/test/stats/mean.cu b/cpp/test/stats/mean.cu index ec9f69c817..ab06697005 100644 --- a/cpp/test/stats/mean.cu +++ b/cpp/test/stats/mean.cu @@ -68,14 +68,14 @@ class MeanTest : public ::testing::TestWithParam> { if (params.rowMajor) { using layout = raft::row_major; mean(handle, - raft::make_device_vector_view(mean_act.data(), rows * cols), raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), rows * cols), params.sample); } else { using layout = raft::col_major; mean(handle, - raft::make_device_vector_view(mean_act.data(), rows * cols), raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), rows * cols), params.sample); } } diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu index 03058e73bc..2cedf9944e 100644 --- a/cpp/test/stats/mean_center.cu +++ b/cpp/test/stats/mean_center.cu @@ -66,17 +66,17 @@ class MeanCenterTest : public ::testing::TestWithParam(out.data(), rows, cols), raft::make_device_matrix_view(data.data(), rows, cols), raft::make_device_vector_view(meanVec.data(), meanVecSize), + raft::make_device_matrix_view(out.data(), rows, cols), params.bcastAlongRows, std::integral_constant{}); } else { using layout = raft::col_major; meanCenter(handle, - raft::make_device_matrix_view(out.data(), rows, cols), raft::make_device_matrix_view(data.data(), rows, cols), raft::make_device_vector_view(meanVec.data(), meanVecSize), + raft::make_device_matrix_view(out.data(), rows, cols), params.bcastAlongRows, std::integral_constant{}); } diff --git a/cpp/test/stats/meanvar.cu b/cpp/test/stats/meanvar.cu index d71cd51a3f..424395c5e8 100644 --- a/cpp/test/stats/meanvar.cu +++ b/cpp/test/stats/meanvar.cu @@ -72,17 +72,17 @@ class MeanVarTest : public ::testing::TestWithParam> { using layout = raft::row_major; meanvar( handle, + raft::make_device_matrix_view(data.data(), params.rows, params.cols), raft::make_device_vector_view(mean_act.data(), params.cols), raft::make_device_vector_view(vars_act.data(), params.cols), - raft::make_device_matrix_view(data.data(), params.rows, params.cols), params.sample); } else { using layout = raft::col_major; meanvar( handle, + raft::make_device_matrix_view(data.data(), params.rows, params.cols), raft::make_device_vector_view(mean_act.data(), params.cols), raft::make_device_vector_view(vars_act.data(), params.cols), - raft::make_device_matrix_view(data.data(), params.rows, params.cols), params.sample); } RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu index 80fd9a4bc4..d3e0aa2637 100644 --- a/cpp/test/stats/minmax.cu +++ b/cpp/test/stats/minmax.cu @@ -17,7 +17,7 @@ #include "../test_utils.h" #include #include -#include +#include #include #include #include From bbdf6dd39d3aa41a92e271bbb84e06c985cd3012 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Thu, 22 Sep 2022 16:11:53 +0200 Subject: [PATCH 15/40] Fix copyright --- cpp/test/nvtx.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/test/nvtx.cpp b/cpp/test/nvtx.cpp index d982642929..635fe55012 100644 --- a/cpp/test/nvtx.cpp +++ b/cpp/test/nvtx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 55c0c91eed2aaaf5be83bc1d716a3f4b8b97fbb6 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Fri, 23 Sep 2022 14:17:21 +0200 Subject: [PATCH 16/40] Fix tests --- cpp/include/raft/stats/cov.cuh | 8 +++++--- cpp/include/raft/stats/mean.cuh | 2 +- cpp/include/raft/stats/mean_center.cuh | 6 ++++-- cpp/include/raft/stats/meanvar.cuh | 2 +- cpp/test/stats/mean.cu | 6 +++--- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 6d8c8683f6..db99b7e31b 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -69,8 +69,9 @@ void cov(const raft::handle_t& handle, * @tparam LayoutPolicy Layout type of the input data. * @param handle the raft handle * @param data the input matrix (this will get mean-centered at the end!) - * @param mu mean vector of the input matrix - * @param covar the output covariance matrix + * (length = nrows * ncols) + * @param mu mean vector of the input matrix (length = ncols) + * @param covar the output covariance matrix (length = ncols * ncols) * @param sample whether to evaluate sample covariance or not. In other words, * whether to normalize the output using N-1 or N, for true or false, * respectively @@ -89,7 +90,8 @@ void cov(const raft::handle_t& handle, static_assert( std::is_same_v || std::is_same_v, "Data layout not supported"); - RAFT_EXPECTS(data.size() == covar.size(), "Size mismatch"); + RAFT_EXPECTS(data.extent(1) == covar.extent(0) && data.extent(1) == covar.extent(1), + "Size mismatch"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); RAFT_EXPECTS(covar.is_exhaustive(), "covar must be contiguous"); RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index b5e7284129..24c293abb6 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -73,7 +73,7 @@ void mean(const raft::handle_t& handle, static_assert( std::is_same_v || std::is_same_v, "Data layout not supported"); - RAFT_EXPECTS(data.extent(0) == mu.extent(0), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(data.extent(1) == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); detail::mean(mu.data_handle(), diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 3d14c7fa86..988d88ff48 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -75,8 +75,9 @@ void meanCenter(const raft::handle_t& handle, static_assert( std::is_same_v || std::is_same_v, "Data layout not supported"); + auto meanVecSize = bcastAlongRows ? data.extent(1) : data.extent(0); RAFT_EXPECTS(out.size() == data.size(), "Size mismatch"); - RAFT_EXPECTS(data.extent(0) == mu.extent(0), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(meanVecSize == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); detail::meanCenter(out.data_handle(), @@ -139,8 +140,9 @@ void meanAdd(const raft::handle_t& handle, static_assert( std::is_same_v || std::is_same_v, "Data layout not supported"); + auto meanVecSize = bcastAlongRows ? data.extent(1) : data.extent(0); RAFT_EXPECTS(out.size() == data.size(), "Size mismatch"); - RAFT_EXPECTS(data.extent(0) == mu.size(), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(meanVecSize == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); detail::meanAdd(out.data_handle(), diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index f2e9b56bea..b7533caf97 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -84,7 +84,7 @@ void meanvar(const raft::handle_t& handle, static_assert( std::is_same_v || std::is_same_v, "Data layout not supported"); - RAFT_EXPECTS(data.extent(0) == var.extent(0), "Size mismatch betwen data and var"); + RAFT_EXPECTS(data.extent(1) == var.extent(0), "Size mismatch betwen data and var"); RAFT_EXPECTS(mean.size() == var.size(), "Size mismatch betwen mean and var"); RAFT_EXPECTS(mean.is_exhaustive(), "mean must be contiguous"); RAFT_EXPECTS(var.is_exhaustive(), "var must be contiguous"); diff --git a/cpp/test/stats/mean.cu b/cpp/test/stats/mean.cu index ab06697005..b299f81f68 100644 --- a/cpp/test/stats/mean.cu +++ b/cpp/test/stats/mean.cu @@ -49,7 +49,7 @@ class MeanTest : public ::testing::TestWithParam> { rows(params.rows), cols(params.cols), data(rows * cols, stream), - mean_act(rows * cols, stream) + mean_act(cols, stream) { } @@ -69,13 +69,13 @@ class MeanTest : public ::testing::TestWithParam> { using layout = raft::row_major; mean(handle, raft::make_device_matrix_view(data, rows, cols), - raft::make_device_vector_view(mean_act.data(), rows * cols), + raft::make_device_vector_view(mean_act.data(), cols), params.sample); } else { using layout = raft::col_major; mean(handle, raft::make_device_matrix_view(data, rows, cols), - raft::make_device_vector_view(mean_act.data(), rows * cols), + raft::make_device_vector_view(mean_act.data(), cols), params.sample); } } From b5cf18bf50c23bdcf7fec1383587c5b8bf03298f Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 26 Sep 2022 16:42:46 +0200 Subject: [PATCH 17/40] Update remaining stats function and their tests with mdspan --- cpp/include/raft/stats/mutual_info_score.cuh | 30 +++++++ cpp/include/raft/stats/r2_score.cuh | 28 +++++++ cpp/include/raft/stats/rand_index.cuh | 25 ++++++ cpp/include/raft/stats/regression_metrics.cuh | 43 ++++++++++ cpp/include/raft/stats/silhouette_score.cuh | 78 +++++++++++++++++ cpp/include/raft/stats/stddev.cuh | 84 ++++++++++++++++++- cpp/include/raft/stats/sum.cuh | 35 +++++++- .../raft/stats/trustworthiness_score.cuh | 33 ++++++++ cpp/include/raft/stats/v_measure.cuh | 36 ++++++++ cpp/include/raft/stats/weighted_mean.cuh | 84 +++++++++++++++++++ cpp/test/stats/mutual_info_score.cu | 19 ++--- cpp/test/stats/rand_index.cu | 11 +-- cpp/test/stats/silhouette_score.cu | 34 ++++---- cpp/test/stats/stddev.cu | 45 ++++++++-- cpp/test/stats/sum.cu | 4 +- cpp/test/stats/trustworthiness.cu | 10 ++- cpp/test/stats/v_measure.cu | 20 ++--- 17 files changed, 559 insertions(+), 60 deletions(-) diff --git a/cpp/include/raft/stats/mutual_info_score.cuh b/cpp/include/raft/stats/mutual_info_score.cuh index 9e48168e74..2af8a939fc 100644 --- a/cpp/include/raft/stats/mutual_info_score.cuh +++ b/cpp/include/raft/stats/mutual_info_score.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -46,6 +47,35 @@ double mutual_info_score(const T* firstClusterArray, firstClusterArray, secondClusterArray, size, lowerLabelRange, upperLabelRange, stream); } +/** + * @brief Function to calculate the mutual information between two clusters + * more info on mutual information + * @tparam value_t the data type + * @tparam idx_t index type + * @param handle the raft handle + * @param first_cluster_array: the array of classes of type value_t + * @param second_cluster_array: the array of classes of type value_t + * @param lower_label_range: the lower bound of the range of labels + * @param upper_label_range: the upper bound of the range of labels + */ +template +double mutual_info_score(const raft::handle_t& handle, + raft::device_vector_view first_cluster_array, + raft::device_vector_view second_cluster_array, + value_t lower_label_range, + value_t upper_label_range) +{ + RAFT_EXPECTS(first_cluster_array.extent(0) == second_cluster_array.extent(0), + "Size mismatch betwen first_cluster_array and second_cluster_array"); + RAFT_EXPECTS(first_cluster_array.is_exhaustive(), "first_cluster_array must be contiguous"); + RAFT_EXPECTS(second_cluster_array.is_exhaustive(), "second_cluster_array must be contiguous"); + return detail::mutual_info_score(first_cluster_array.data_handle(), + second_cluster_array.data_handle(), + first_cluster_array.extent(0), + lower_label_range, + upper_label_range, + handle.get_stream()); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/r2_score.cuh b/cpp/include/raft/stats/r2_score.cuh index 88fac5aaa6..44774d710c 100644 --- a/cpp/include/raft/stats/r2_score.cuh +++ b/cpp/include/raft/stats/r2_score.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -45,6 +46,33 @@ math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) return detail::r2_score(y, y_hat, n, stream); } +/** + * Calculates the "Coefficient of Determination" (R-Squared) score + * normalizing the sum of squared errors by the total sum of squares. + * + * This score indicates the proportionate amount of variation in an + * expected response variable is explained by the independent variables + * in a linear regression model. The larger the R-squared value, the + * more variability is explained by the linear regression model. + * + * @tparam value_t the data type + * @tparam idx_t index type + * @param handle the raft handle + * @param y: Array of ground-truth response variables + * @param y_hat: Array of predicted response variables + * @return: The R-squared value. + */ +template +value_t r2_score(const raft::handle_t& handle, + raft::device_vector_view y, + raft::device_vector_view y_hat) +{ + RAFT_EXPECTS(y.extent(0) == y_hat.extent(0), "Size mismatch betwen y and y_hat"); + RAFT_EXPECTS(y.is_exhaustive(), "y must be contiguous"); + RAFT_EXPECTS(y_hat.is_exhaustive(), "y_hat must be contiguous"); + return detail::r2_score(y.data_handle(), y_hat.data_handle(), y.extent(0), handle.get_stream()); +} + } // namespace stats } // namespace raft diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh index 82bf046c4e..d689dace87 100644 --- a/cpp/include/raft/stats/rand_index.cuh +++ b/cpp/include/raft/stats/rand_index.cuh @@ -18,6 +18,8 @@ #pragma once +#include +#include #include namespace raft { @@ -37,6 +39,29 @@ double rand_index(T* firstClusterArray, T* secondClusterArray, uint64_t size, cu return detail::compute_rand_index(firstClusterArray, secondClusterArray, size, stream); } +/** + * @brief Function to calculate RandIndex + * more info on rand index + * @tparam value_t the data type + * @tparam idx_t index type + * @param handle the raft handle + * @param first_cluster_array: the array of classes of type value_t + * @param second_cluster_array: the array of classes of type value_t + */ +template +double rand_index(const raft::handle_t& handle, + raft::device_vector_view first_cluster_array, + raft::device_vector_view second_cluster_array) +{ + RAFT_EXPECTS(first_cluster_array.extent(0) == second_cluster_array.extent(0), + "Size mismatch betwen first_cluster_array and second_cluster_array"); + RAFT_EXPECTS(first_cluster_array.is_exhaustive(), "first_cluster_array must be contiguous"); + RAFT_EXPECTS(second_cluster_array.is_exhaustive(), "second_cluster_array must be contiguous"); + return detail::compute_rand_index(first_cluster_array.data_handle(), + second_cluster_array.data_handle(), + second_cluster_array.extent(0), + handle.get_stream()); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/regression_metrics.cuh b/cpp/include/raft/stats/regression_metrics.cuh index 0fb6d39967..3c1ef094d0 100644 --- a/cpp/include/raft/stats/regression_metrics.cuh +++ b/cpp/include/raft/stats/regression_metrics.cuh @@ -18,6 +18,9 @@ #pragma once +#include +#include +#include #include namespace raft { @@ -49,6 +52,46 @@ void regression_metrics(const T* predictions, detail::regression_metrics( predictions, ref_predictions, n, stream, mean_abs_error, mean_squared_error, median_abs_error); } + +/** + * @brief Compute regression metrics mean absolute error, mean squared error, median absolute error + * @tparam value_t the data type for predictions (e.g., float or double for regression). + * @tparam idx_t index type + * @param[in] handle the raft handle + * @param[in] predictions: array of predictions. + * @param[in] ref_predictions: array of reference (ground-truth) predictions. + * @param[out] mean_abs_error: Mean Absolute Error. Sum over n of (|predictions[i] - + * ref_predictions[i]|) / n. + * @param[out] mean_squared_error: Mean Squared Error. Sum over n of ((predictions[i] - + * ref_predictions[i])^2) / n. + * @param[out] median_abs_error: Median Absolute Error. Median of |predictions[i] - + * ref_predictions[i]| for i in [0, n). + */ +template +void regression_metrics(const raft::handle_t& handle, + raft::device_vector_view predictions, + raft::device_vector_view ref_predictions, + raft::host_scalar_view mean_abs_error, + raft::host_scalar_view mean_squared_error, + raft::host_scalar_view median_abs_error) +{ + RAFT_EXPECTS(predictions.extent(0) == ref_predictions.extent(0), + "Size mismatch betwen predictions and ref_predictions"); + RAFT_EXPECTS(predictions.is_exhaustive(), "predictions must be contiguous"); + RAFT_EXPECTS(ref_predictions.is_exhaustive(), "ref_predictions must be contiguous"); + RAFT_EXPECTS(mean_abs_error.data_handle() != nullptr, "mean_abs_error view must not be empty"); + RAFT_EXPECTS(mean_squared_error.data_handle() != nullptr, + "mean_squared_error view must not be empty"); + RAFT_EXPECTS(median_abs_error.data_handle() != nullptr, + "median_abs_error view must not be empty"); + detail::regression_metrics(predictions.data_handle(), + ref_predictions.data_handle(), + predictions.extent(0), + handle.get_stream(), + *mean_abs_error.data_handle(), + *mean_squared_error.data_handle(), + *median_abs_error.data_handle()); +} } // namespace stats } // namespace raft diff --git a/cpp/include/raft/stats/silhouette_score.cuh b/cpp/include/raft/stats/silhouette_score.cuh index 9f02cf6d74..86f6be5040 100644 --- a/cpp/include/raft/stats/silhouette_score.cuh +++ b/cpp/include/raft/stats/silhouette_score.cuh @@ -18,6 +18,7 @@ #pragma once +#include #include #include @@ -73,6 +74,83 @@ value_t silhouette_score_batched( handle, X, n_rows, n_cols, y, n_labels, scores, chunk, metric); } +/** + * @brief main function that returns the average silhouette score for a given set of data and its + * clusterings + * @tparam value_t: type of the data samples + * @tparam label_t: type of the labels + * @tparam idx_t index type + * @param[in] handle: raft handle for managing expensive resources + * @param[in] X_in: input matrix Data in row-major format (nRows x nCols) + * @param[in] labels: the pointer to the array containing labels for every data sample (length: + * nRows) + * @param[out] silhouette_score_per_sample: optional array populated with the silhouette score + * for every sample (length: nRows) + * @param n_unique_labels: number of unique labels in the labels array + * @param metric: the numerical value that maps to the type of distance metric to be used in the + * calculations + */ +template +value_t silhouette_score( + const raft::handle_t& handle, + raft::device_matrix_view X_in, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, + idx_t n_unique_labels, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + RAFT_EXPECTS(labels.extent(0) == X_in.extent(0), "Size mismatch betwen labels and data"); + + value_t* silhouette_score_per_sample_ptr = nullptr; + if (silhouette_score_per_sample.has_value()) { + silhouette_score_per_sample_ptr = silhouette_score_per_sample.value().data_handle(); + RAFT_EXPECTS(silhouette_score_per_sample.value().extent(0) == X_in.extent(0), + "Size mismatch betwen silhouette_score_per_sample and data"); + } + return detail::silhouette_score(handle, + X_in.data_handle(), + X_in.extent(0), + X_in.extent(1), + labels.data_handle(), + n_unique_labels, + silhouette_score_per_sample_ptr, + handle.get_stream(), + metric); +} + +template +value_t silhouette_score_batched( + const raft::handle_t& handle, + raft::device_matrix_view X, + raft::device_vector_view y, + std::optional> scores, + idx_t n_unique_labels, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + static_assert(std::is_integral_v, + "silhouette_score_batched: The index type " + "of each mdspan argument must be an integral type."); + static_assert(std::is_integral_v, + "silhouette_score_batched: The label type must be an integral type."); + RAFT_EXPECTS(y.extent(0) == X.extent(0), "Size mismatch betwen y and X"); + + value_t* scores_ptr = nullptr; + idx_t nscores = 0; + if (scores.has_value()) { + scores_ptr = scores.value().data_handle(); + nscores = scores.value().extent(0); + } + return batched::detail::silhouette_score(handle, + X.data_handle(), + X.extent(0), + X.extent(1), + y.data_handle(), + n_unique_labels, + scores_ptr, + nscores, + metric); +} + }; // namespace stats }; // namespace raft diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh index 3fc41ebc8c..5520599226 100644 --- a/cpp/include/raft/stats/stddev.cuh +++ b/cpp/include/raft/stats/stddev.cuh @@ -18,9 +18,9 @@ #pragma once -#include "detail/stddev.cuh" - +#include #include +#include namespace raft { namespace stats { @@ -87,6 +87,86 @@ void vars(Type* var, detail::vars(var, data, mu, D, N, sample, rowMajor, stream); } +/** + * @brief Compute stddev of the input matrix + * + * Stddev operation is assumed to be performed on a given column. + * + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @tparam layout_t Layout type of the input matrix. + * @param handle the raft handle + * @param data the input matrix + * @param mu the mean vector + * @param std the output stddev vector + * @param sample whether to evaluate sample stddev or not. In other words, + * whether + * to normalize the output using N-1 or N, for true or false, respectively + */ +template +void stddev(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + raft::device_vector_view std, + bool sample) +{ + constexpr bool is_row_major = std::is_same_v; + constexpr bool is_col_major = std::is_same_v; + static_assert(is_row_major || is_col_major, + "stddev: Layout must be either " + "raft::row_major or raft::col_major (or one of their aliases)"); + RAFT_EXPECTS(mu.size() == std.size(), "Size mismatch between mu and std"); + RAFT_EXPECTS(mu.extent(0) == data.extent(1), "Size mismatch between data and mu"); + detail::stddev(std.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + sample, + is_row_major, + handle.get_stream()); +} + +/** + * @brief Compute variance of the input matrix + * + * Variance operation is assumed to be performed on a given column. + * + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @tparam layout_t Layout type of the input matrix. + * @param handle the raft handle + * @param data the input matrix + * @param mu the mean vector + * @param var the output stddev vector + * @param sample whether to evaluate sample stddev or not. In other words, + * whether + * to normalize the output using N-1 or N, for true or false, respectively + */ +template +void vars(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + raft::device_vector_view var, + bool sample) +{ + constexpr bool is_row_major = std::is_same_v; + constexpr bool is_col_major = std::is_same_v; + static_assert(is_row_major || is_col_major, + "vars: Layout must be either " + "raft::row_major or raft::col_major (or one of their aliases)"); + RAFT_EXPECTS(mu.size() == var.size(), "Size mismatch between mu and std"); + RAFT_EXPECTS(mu.extent(0) == data.extent(1), "Size mismatch between data and mu"); + detail::vars(var.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + sample, + is_row_major, + handle.get_stream()); +} + }; // namespace stats }; // namespace raft diff --git a/cpp/include/raft/stats/sum.cuh b/cpp/include/raft/stats/sum.cuh index 89135dd076..1156890095 100644 --- a/cpp/include/raft/stats/sum.cuh +++ b/cpp/include/raft/stats/sum.cuh @@ -19,8 +19,8 @@ #pragma once -#include "detail/sum.cuh" - +#include +#include #include namespace raft { @@ -46,6 +46,37 @@ void sum(Type* output, const Type* input, IdxType D, IdxType N, bool rowMajor, c detail::sum(output, input, D, N, rowMajor, stream); } +/** + * @brief Compute sum of the input matrix + * + * Sum operation is assumed to be performed on a given column. + * + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @tparam layout_t Layout type of the input matrix. + * @param handle the raft handle + * @param input the input matrix + * @param output the output mean vector + */ +template +void sum(const raft::handle_t& handle, + raft::device_matrix_view input, + raft::device_vector_view output) +{ + constexpr bool is_row_major = std::is_same_v; + constexpr bool is_col_major = std::is_same_v; + static_assert(is_row_major || is_col_major, + "sum: Layout must be either " + "raft::row_major or raft::col_major (or one of their aliases)"); + RAFT_EXPECTS(input.extent(1) == output.extent(0), "Size mismatch between input and output"); + detail::sum(output.data_handle(), + input.data_handle(), + input.extent(1), + input.extent(0), + is_row_major, + handle.get_stream()); +} + }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/trustworthiness_score.cuh b/cpp/include/raft/stats/trustworthiness_score.cuh index c89eab8d2b..03c9e38e99 100644 --- a/cpp/include/raft/stats/trustworthiness_score.cuh +++ b/cpp/include/raft/stats/trustworthiness_score.cuh @@ -18,6 +18,8 @@ #define __TRUSTWORTHINESS_SCORE_H #pragma once +#include +#include #include namespace raft { @@ -48,6 +50,37 @@ double trustworthiness_score(const raft::handle_t& h, return detail::trustworthiness_score( h, X, X_embedded, n, m, d, n_neighbors, batchSize); } + +/** + * @brief Compute the trustworthiness score + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @param[in] handle the raft handle + * @param[in] X: Data in original dimension + * @param[in] X_embedded: Data in target dimension (embedding) + * @param[in] n_neighbors Number of neighbors considered by trustworthiness score + * @param[in] batch_size Batch size + * @return[out] Trustworthiness score + */ +template +double trustworthiness_score(const raft::handle_t& handle, + raft::device_matrix_view X, + raft::device_matrix_view X_embedded, + int n_neighbors, + int batch_size = 512) +{ + RAFT_EXPECTS(X.extent(0) == X_embedded.extent(0), "Size mismatch between X and X_embedded"); + RAFT_EXPECTS(std::is_integral_v && X.extent(0) <= std::numeric_limits::max(), + "Index type not supported"); + return detail::trustworthiness_score(handle, + X.data_handle(), + X_embedded.data_handle(), + X.extent(0), + X.extent(1), + X_embedded.extent(1), + n_neighbors, + batch_size); +} } // namespace stats } // namespace raft diff --git a/cpp/include/raft/stats/v_measure.cuh b/cpp/include/raft/stats/v_measure.cuh index dd6ebd9b15..c0d97915a7 100644 --- a/cpp/include/raft/stats/v_measure.cuh +++ b/cpp/include/raft/stats/v_measure.cuh @@ -18,6 +18,8 @@ #define __V_MEASURE_H #pragma once +#include +#include #include namespace raft { @@ -47,6 +49,40 @@ double v_measure(const T* truthClusterArray, truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream, beta); } +/** + * @brief Function to calculate the v-measure between two clusters + * + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @param handle the raft handle + * @param truth_cluster_array: the array of truth classes of type T + * @param pred_cluster_array: the array of predicted classes of type T + * @param lower_label_range: the lower bound of the range of labels + * @param upper_label_range: the upper bound of the range of labels + * @param beta: v_measure parameter + */ +template +double v_measure(const raft::handle_t& handle, + raft::device_vector_view truth_cluster_array, + raft::device_vector_view pred_cluster_array, + value_t lower_label_range, + value_t upper_label_range, + double beta = 1.0) +{ + RAFT_EXPECTS(truth_cluster_array.extent(0) == pred_cluster_array.extent(0), + "Size mismatch betwen truth_cluster_array and pred_cluster_array"); + RAFT_EXPECTS(truth_cluster_array.is_exhaustive(), "truth_cluster_array must be contiguous"); + RAFT_EXPECTS(pred_cluster_array.is_exhaustive(), "pred_cluster_array must be contiguous"); + + return detail::v_measure(truth_cluster_array.data_handle(), + pred_cluster_array.data_handle(), + truth_cluster_array.extent(0), + lower_label_range, + upper_label_range, + handle.get_stream(), + beta); +} + }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index 0e8338fe84..6b234f8bd4 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -19,6 +19,7 @@ #pragma once +#include #include namespace raft { @@ -91,6 +92,89 @@ void colWeightedMean( { weightedMean(mu, data, weights, D, N, true, false, stream); } + +/** + * @brief Compute the weighted mean of the input matrix with a + * vector of weights, along rows or along columns + * + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @tparam layout_t Layout type of the input matrix. + * @param handle the raft handle + * @param data the input matrix of size nrows * ncols + * @param weights weight of size ncols if along_row is true, else of size nrows + * @param mu the output mean vector of size ncols if along_row is true, else of size nrows + * @param along_rows whether to reduce along rows or columns + */ +template +void weighted_mean(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view weights, + raft::device_vector_view mu, + bool along_rows) +{ + constexpr bool is_row_major = std::is_same_v; + constexpr bool is_col_major = std::is_same_v; + static_assert(is_row_major || is_col_major, + "weighted_mean: Layout must be either " + "raft::row_major or raft::col_major (or one of their aliases)"); + auto mean_vec_size = along_rows ? data.extent(1) : data.extent(0); + + RAFT_EXPECTS(weights.extent(0) == mean_vec_size, + "Size mismatch betwen weights and mean_vec_size"); + RAFT_EXPECTS(mu.extent(0) == mean_vec_size, "Size mismatch betwen mu and mean_vec_size"); + RAFT_EXPECTS(weights.is_exhaustive(), "weights must be contiguous"); + RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); + + detail::weightedMean(mu.data_handle(), + data.data_handle(), + weights.data_handle(), + data.extent(1), + data.extent(0), + is_row_major, + along_rows, + handle.get_stream()); +} + +/** + * @brief Compute the row-wise weighted mean of the input matrix with a + * vector of column weights + * + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @param handle the raft handle + * @param data the input matrix of size nrows * ncols + * @param weights per-col weight + * @param mu the output mean vector of size ncols + */ +template +void rowWeightedMean(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view weights, + raft::device_vector_view mu) +{ + weightedMean(handle, data, weights, mu, true); +} + +/** + * @brief Compute the column-wise weighted mean of the input matrix with a + * vector of row weights + * + * @tparam value_t the data type + * @tparam idx_t Integer type used to for addressing + * @param handle the raft handle + * @param data the input matrix of size nrows * ncols + * @param weights per-row weight + * @param mu the output mean vector of size nrows + */ +template +void colWeightedMean(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view weights, + raft::device_vector_view mu) +{ + weightedMean(handle, data, weights, mu, false); +} }; // end namespace stats }; // end namespace raft diff --git a/cpp/test/stats/mutual_info_score.cu b/cpp/test/stats/mutual_info_score.cu index 6bf3e6623f..1912366586 100644 --- a/cpp/test/stats/mutual_info_score.cu +++ b/cpp/test/stats/mutual_info_score.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -104,7 +105,7 @@ class mutualInfoTest : public ::testing::TestWithParam { truthmutualInfo /= nElements; // allocating and initializing memory to the GPU - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + stream = handle.get_stream(); rmm::device_uvector firstClusterArray(nElements, stream); rmm::device_uvector secondClusterArray(nElements, stream); @@ -117,18 +118,16 @@ class mutualInfoTest : public ::testing::TestWithParam { raft::update_device(secondClusterArray.data(), &arr2[0], (int)nElements, stream); // calling the mutualInfo CUDA implementation - computedmutualInfo = raft::stats::mutual_info_score(firstClusterArray.data(), - secondClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + computedmutualInfo = raft::stats::mutual_info_score( + handle, + raft::make_device_vector_view(firstClusterArray.data(), nElements), + raft::make_device_vector_view(secondClusterArray.data(), nElements), + lowerLabelRange, + upperLabelRange); } - // the destructor - void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } - // declaring the data values + raft::handle_t handle; mutualInfoParam params; T lowerLabelRange, upperLabelRange; int nElements = 0; diff --git a/cpp/test/stats/rand_index.cu b/cpp/test/stats/rand_index.cu index ca1c4dd5e8..01196bf852 100644 --- a/cpp/test/stats/rand_index.cu +++ b/cpp/test/stats/rand_index.cu @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -77,7 +78,7 @@ class randIndexTest : public ::testing::TestWithParam { truthRandIndex = (double)(((double)(a_truth + b_truth)) / (double)nChooseTwo); // allocating and initializing memory to the GPU - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + stream = handle.get_stream(); rmm::device_uvector firstClusterArray(size, stream); rmm::device_uvector secondClusterArray(size, stream); @@ -91,13 +92,13 @@ class randIndexTest : public ::testing::TestWithParam { // calling the rand_index CUDA implementation computedRandIndex = - raft::stats::rand_index(firstClusterArray.data(), secondClusterArray.data(), size, stream); + raft::stats::rand_index(handle, + raft::make_device_vector_view(firstClusterArray.data(), size), + raft::make_device_vector_view(secondClusterArray.data(), size)); } - // the destructor - void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } - // declaring the data values + raft::handle_t handle; randIndexParam params; int lowerLabelRange = 0, upperLabelRange = 2; uint64_t size = 0; diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu index f885c1034f..03f3874b40 100644 --- a/cpp/test/stats/silhouette_score.cu +++ b/cpp/test/stats/silhouette_score.cu @@ -173,25 +173,21 @@ class silhouetteScoreTest : public ::testing::TestWithParam> { { int rows = params.rows, cols = params.cols; - mean(mean_act.data(), data, cols, rows, params.sample, params.rowMajor, stream); - - stddev( - stddev_act.data(), data, mean_act.data(), cols, rows, params.sample, params.rowMajor, stream); - - vars( - vars_act.data(), data, mean_act.data(), cols, rows, params.sample, params.rowMajor, stream); - + if (params.rowMajor) { + using layout_t = raft::row_major; + mean(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + params.sample); + + stddev(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(stddev_act.data(), cols), + params.sample); + + vars(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(vars_act.data(), cols), + params.sample); + } else { + using layout_t = raft::col_major; + mean(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + params.sample); + + stddev(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(stddev_act.data(), cols), + params.sample); + + vars(handle, + raft::make_device_matrix_view(data, rows, cols), + raft::make_device_vector_view(mean_act.data(), cols), + raft::make_device_vector_view(vars_act.data(), cols), + params.sample); + } raft::matrix::seqRoot(vars_act.data(), T(1), cols, stream); } diff --git a/cpp/test/stats/sum.cu b/cpp/test/stats/sum.cu index 7a16dbde4a..e67988abb0 100644 --- a/cpp/test/stats/sum.cu +++ b/cpp/test/stats/sum.cu @@ -65,7 +65,9 @@ class SumTest : public ::testing::TestWithParam> { } raft::update_device(data.data(), data_h, len, stream); - sum(sum_act.data(), data.data(), cols, rows, false, stream); + sum(handle, + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(sum_act.data(), cols)); handle.sync_stream(stream); } diff --git a/cpp/test/stats/trustworthiness.cu b/cpp/test/stats/trustworthiness.cu index ae596d0535..97c77c00e1 100644 --- a/cpp/test/stats/trustworthiness.cu +++ b/cpp/test/stats/trustworthiness.cu @@ -320,10 +320,16 @@ class TrustworthinessScoreTest : public ::testing::Test { raft::update_device(d_X.data(), X.data(), X.size(), stream); raft::update_device(d_X_embedded.data(), X_embedded.data(), X_embedded.size(), stream); + auto n_sample = 50; + auto n_features_origin = 30; + auto n_features_embedded = 8; // euclidean test - score = trustworthiness_score( - handle, d_X.data(), d_X_embedded.data(), 50, 30, 8, 5); + score = trustworthiness_score( + handle, + raft::make_device_matrix_view(d_X.data(), n_sample, n_features_origin), + raft::make_device_matrix_view(d_X_embedded.data(), n_sample, n_features_embedded), + 5); } void SetUp() override { basicTest(); } diff --git a/cpp/test/stats/v_measure.cu b/cpp/test/stats/v_measure.cu index 22dcefba0c..0cbc2da7d9 100644 --- a/cpp/test/stats/v_measure.cu +++ b/cpp/test/stats/v_measure.cu @@ -65,7 +65,7 @@ class vMeasureTest : public ::testing::TestWithParam { // allocating and initializing memory to the GPU - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + stream = handle.get_stream(); rmm::device_uvector truthClusterArray(nElements, stream); rmm::device_uvector predClusterArray(nElements, stream); raft::update_device(truthClusterArray.data(), &arr1[0], (int)nElements, stream); @@ -93,19 +93,17 @@ class vMeasureTest : public ::testing::TestWithParam { truthVMeasure = ((1 + params.beta) * truthHomogeity * truthCompleteness / (params.beta * truthHomogeity + truthCompleteness)); // calling the v_measure CUDA implementation - computedVMeasure = raft::stats::v_measure(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream, - params.beta); + computedVMeasure = raft::stats::v_measure( + handle, + raft::make_device_vector_view(truthClusterArray.data(), nElements), + raft::make_device_vector_view(predClusterArray.data(), nElements), + lowerLabelRange, + upperLabelRange, + params.beta); } - // the destructor - void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } - // declaring the data values + raft::handle_t handle; vMeasureParam params; T lowerLabelRange, upperLabelRange; int nElements = 0; From ef94359fd1bccbd5175f1d29cd19a038cdf7128f Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Tue, 27 Sep 2022 14:50:42 +0200 Subject: [PATCH 18/40] Use snake case for variables, parameters and templates --- cpp/include/raft/stats/accuracy.cuh | 10 +-- .../raft/stats/adjusted_rand_index.cuh | 28 +++--- cpp/include/raft/stats/completeness_score.cuh | 48 +++++----- cpp/include/raft/stats/contingency_matrix.cuh | 90 +++++++++---------- cpp/include/raft/stats/cov.cuh | 18 ++-- cpp/include/raft/stats/dispersion.cuh | 54 ++++++----- cpp/include/raft/stats/entropy.cuh | 28 +++--- cpp/include/raft/stats/histogram.cuh | 38 ++++---- cpp/include/raft/stats/homogeneity_score.cuh | 38 ++++---- .../raft/stats/information_criterion.cuh | 14 +-- cpp/include/raft/stats/kl_divergence.cuh | 16 ++-- cpp/include/raft/stats/mean.cuh | 16 ++-- cpp/include/raft/stats/mean_center.cuh | 85 +++++++++--------- cpp/include/raft/stats/meanvar.cuh | 20 ++--- cpp/include/raft/stats/minmax.cuh | 43 +++++---- cpp/test/stats/adjusted_rand_index.cu | 1 - cpp/test/stats/contingencyMatrix.cu | 2 +- cpp/test/stats/dispersion.cu | 3 +- cpp/test/stats/mean_center.cu | 10 +-- cpp/test/stats/minmax.cu | 3 +- cpp/test/stats/mutual_info_score.cu | 1 - 21 files changed, 276 insertions(+), 290 deletions(-) diff --git a/cpp/include/raft/stats/accuracy.cuh b/cpp/include/raft/stats/accuracy.cuh index ef856724aa..37cdc280f9 100644 --- a/cpp/include/raft/stats/accuracy.cuh +++ b/cpp/include/raft/stats/accuracy.cuh @@ -42,17 +42,17 @@ float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, /** * @brief Compute accuracy of predictions. Useful for classification. - * @tparam DataT: data type for predictions (e.g., int for classification) - * @tparam IdxType Index type of matrix extent. + * @tparam value_t: data type for predictions (e.g., int for classification) + * @tparam idx_t Index type of matrix extent. * @param[in] handle: the raft handle. * @param[in] predictions: array of predictions (GPU pointer). * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). * @return: Accuracy score in [0, 1]; higher is better. */ -template +template float accuracy(const raft::handle_t& handle, - raft::device_vector_view predictions, - raft::device_vector_view ref_predictions) + raft::device_vector_view predictions, + raft::device_vector_view ref_predictions) { RAFT_EXPECTS(predictions.size() == ref_predictions.size(), "Size mismatch"); RAFT_EXPECTS(predictions.is_exhaustive(), "predictions must be contiguous"); diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 4ec8c0efa0..c6fac83c4d 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -52,26 +52,26 @@ double adjusted_rand_index(const T* firstClusterArray, /** * @brief Function to calculate Adjusted RandIndex as described * here - * @tparam DataT data-type for input label arrays + * @tparam value_t data-type for input label arrays * @tparam MathT integral data-type used for computing n-choose-r - * @tparam IdxType Index type of matrix extent. + * @tparam idx_t Index type of matrix extent. * @param handle: the raft handle. - * @param firstClusterArray: the array of classes - * @param secondClusterArray: the array of classes + * @param first_cluster_array: the array of classes + * @param second_cluster_array: the array of classes */ -template +template double adjusted_rand_index(const raft::handle_t& handle, - raft::device_vector_view firstClusterArray, - raft::device_vector_view secondClusterArray) + raft::device_vector_view first_cluster_array, + raft::device_vector_view second_cluster_array) { - RAFT_EXPECTS(firstClusterArray.size() == secondClusterArray.size(), "Size mismatch"); - RAFT_EXPECTS(firstClusterArray.is_exhaustive(), "firstClusterArray must be contiguous"); - RAFT_EXPECTS(secondClusterArray.is_exhaustive(), "secondClusterArray must be contiguous"); + RAFT_EXPECTS(first_cluster_array.size() == second_cluster_array.size(), "Size mismatch"); + RAFT_EXPECTS(first_cluster_array.is_exhaustive(), "first_cluster_array must be contiguous"); + RAFT_EXPECTS(second_cluster_array.is_exhaustive(), "second_cluster_array must be contiguous"); - return detail::compute_adjusted_rand_index(firstClusterArray.data_handle(), - secondClusterArray.data_handle(), - firstClusterArray.extent(0), - handle.get_stream()); + return detail::compute_adjusted_rand_index(first_cluster_array.data_handle(), + second_cluster_array.data_handle(), + first_cluster_array.extent(0), + handle.get_stream()); } }; // end namespace stats diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index ea72ed86cd..7ae963e12c 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -31,48 +31,48 @@ namespace stats { * @param truthClusterArray: the array of truth classes of type T * @param predClusterArray: the array of predicted classes of type T * @param size: the size of the data points of type int - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels + * @param lower_label_range: the lower bound of the range of labels + * @param upper_label_range: the upper bound of the range of labels * @param stream: the cudaStream object */ template double completeness_score(const T* truthClusterArray, const T* predClusterArray, int size, - T lowerLabelRange, - T upperLabelRange, + T lower_label_range, + T upper_label_range, cudaStream_t stream) { return detail::homogeneity_score( - predClusterArray, truthClusterArray, size, lowerLabelRange, upperLabelRange, stream); + predClusterArray, truthClusterArray, size, lower_label_range, upper_label_range, stream); } /** * @brief Function to calculate the completeness score between two clusters * - * @tparam DataT the data type - * @tparam IdxType Index type of matrix extent. + * @tparam value_t the data type + * @tparam idx_t Index type of matrix extent. * @param handle: the raft handle. - * @param truthClusterArray: the array of truth classes of type DataT - * @param predClusterArray: the array of predicted classes of type DataT - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels + * @param truth_cluster_array: the array of truth classes of type value_t + * @param pred_cluster_array: the array of predicted classes of type value_t + * @param lower_label_range: the lower bound of the range of labels + * @param upper_label_range: the upper bound of the range of labels */ -template +template double completeness_score(const raft::handle_t& handle, - raft::device_vector_view truthClusterArray, - raft::device_vector_view predClusterArray, - DataT lowerLabelRange, - DataT upperLabelRange) + raft::device_vector_view truth_cluster_array, + raft::device_vector_view pred_cluster_array, + value_t lower_label_range, + value_t upper_label_range) { - RAFT_EXPECTS(truthClusterArray.size() == predClusterArray.size(), "Size mismatch"); - RAFT_EXPECTS(truthClusterArray.is_exhaustive(), "truthClusterArray must be contiguous"); - RAFT_EXPECTS(predClusterArray.is_exhaustive(), "predClusterArray must be contiguous"); - return detail::homogeneity_score(predClusterArray.data_handle(), - truthClusterArray.data_handle(), - truthClusterArray.extent(0), - lowerLabelRange, - upperLabelRange, + RAFT_EXPECTS(truth_cluster_array.size() == pred_cluster_array.size(), "Size mismatch"); + RAFT_EXPECTS(truth_cluster_array.is_exhaustive(), "truth_cluster_array must be contiguous"); + RAFT_EXPECTS(pred_cluster_array.is_exhaustive(), "pred_cluster_array must be contiguous"); + return detail::homogeneity_score(pred_cluster_array.data_handle(), + truth_cluster_array.data_handle(), + truth_cluster_array.extent(0), + lower_label_range, + upper_label_range, handle.get_stream()); } diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 1179ce9404..39586930b0 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -46,18 +46,18 @@ void getInputClassCardinality( /** * @brief use this to allocate output matrix size * size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int) - * @tparam DataT label type - * @tparam IdxType Index type of matrix extent. + * @tparam value_t label type + * @tparam idx_t Index type of matrix extent. * @param handle: the raft handle. * @param groundTruth: device 1-d array for ground truth (num of rows) * @param minLabel: [out] calculated min value in input array * @param maxLabel: [out] calculated max value in input array */ -template +template void getInputClassCardinality(const raft::handle_t& handle, - raft::device_vector_view groundTruth, - raft::host_scalar_view minLabel, - raft::host_scalar_view maxLabel) + raft::device_vector_view groundTruth, + raft::host_scalar_view minLabel, + raft::host_scalar_view maxLabel) { RAFT_EXPECTS(minLabel.data_handle() != nullptr, "Invalid minLabel pointer"); RAFT_EXPECTS(maxLabel.data_handle() != nullptr, "Invalid maxLabel pointer"); @@ -159,56 +159,56 @@ void contingencyMatrix(const T* groundTruth, * labels. Users should call function getInputClassCardinality to find * and allocate memory for output. Similarly workspace requirements * should be checked using function getContingencyMatrixWorkspaceSize - * @tparam DataT label type - * @tparam OutType output matrix type - * @tparam IdxType Index type of matrix extent. - * @tparam LayoutPolicy Layout type of the input data. + * @tparam value_t label type + * @tparam out_t output matrix type + * @tparam idx_t Index type of matrix extent. + * @tparam layout_t Layout type of the input data. + * @tparam workspace_value_t Value type of the workspace data data. * @param handle: the raft handle. - * @param groundTruth: device 1-d array for ground truth (num of rows) - * @param predictedLabel: device 1-d array for prediction (num of columns) - * @param outMat: output buffer for contingency matrix + * @param ground_truth: device 1-d array for ground truth (num of rows) + * @param predicted_label: device 1-d array for prediction (num of columns) + * @param out_mat: output buffer for contingency matrix * @param workspace: Optional, workspace memory allocation - * @param minLabel: Optional, min value in input ground truth array - * @param maxLabel: Optional, max value in input ground truth array + * @param min_label: Optional, min value in input ground truth array + * @param max_label: Optional, max value in input ground truth array */ -template -void contingencyMatrix( - const raft::handle_t& handle, - raft::device_vector_view groundTruth, - raft::device_vector_view predictedLabel, - raft::device_matrix_view outMat, - std::optional> workspace, - std::optional minLabel = std::nullopt, - std::optional maxLabel = std::nullopt) +template +void contingency_matrix(const raft::handle_t& handle, + raft::device_vector_view ground_truth, + raft::device_vector_view predicted_label, + raft::device_matrix_view out_mat, + std::optional> workspace, + std::optional min_label = std::nullopt, + std::optional max_label = std::nullopt) { - RAFT_EXPECTS(groundTruth.size() == predictedLabel.size(), "Size mismatch"); - RAFT_EXPECTS(groundTruth.is_exhaustive(), "groundTruth must be contiguous"); - RAFT_EXPECTS(predictedLabel.is_exhaustive(), "predictedLabel must be contiguous"); - RAFT_EXPECTS(outMat.is_exhaustive(), "outMat must be contiguous"); + RAFT_EXPECTS(ground_truth.size() == predicted_label.size(), "Size mismatch"); + RAFT_EXPECTS(ground_truth.is_exhaustive(), "ground_truth must be contiguous"); + RAFT_EXPECTS(predicted_label.is_exhaustive(), "predicted_label must be contiguous"); + RAFT_EXPECTS(out_mat.is_exhaustive(), "out_mat must be contiguous"); - WorkspaceDataType* workspace_p = nullptr; - IdxType workspace_size = 0; + workspace_value_t* workspace_p = nullptr; + idx_t workspace_size = 0; if (workspace.has_value()) { workspace_p = workspace.value().data_handle(); - workspace_size = workspace.value().size() * sizeof(WorkspaceDataType); + workspace_size = workspace.value().size() * sizeof(workspace_value_t); } - DataT minLabelValue = std::numeric_limits::max(); - DataT maxLabelValue = std::numeric_limits::max(); - if (minLabel.has_value()) { minLabelValue = minLabel.value(); } - if (maxLabel.has_value()) { maxLabelValue = maxLabel.value(); } - detail::contingencyMatrix(groundTruth.data_handle(), - predictedLabel.data_handle(), - groundTruth.extent(0), - outMat.data_handle(), + value_t min_label_value = std::numeric_limits::max(); + value_t max_label_value = std::numeric_limits::max(); + if (min_label.has_value()) { min_label_value = min_label.value(); } + if (max_label.has_value()) { max_label_value = max_label.value(); } + detail::contingencyMatrix(ground_truth.data_handle(), + predicted_label.data_handle(), + ground_truth.extent(0), + out_mat.data_handle(), handle.get_stream(), workspace_p, workspace_size, - minLabelValue, - maxLabelValue); + min_label_value, + max_label_value); } }; // namespace stats diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index db99b7e31b..89e0efbfa7 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -64,9 +64,9 @@ void cov(const raft::handle_t& handle, * * Mean operation is assumed to be performed on a given column. * - * @tparam DataT the data type - * @tparam IdxT the index type - * @tparam LayoutPolicy Layout type of the input data. + * @tparam value_t the data type + * @tparam idx_t the index type + * @tparam layout_t Layout type of the input data. * @param handle the raft handle * @param data the input matrix (this will get mean-centered at the end!) * (length = nrows * ncols) @@ -79,16 +79,16 @@ void cov(const raft::handle_t& handle, * @note if stable=true, then the input data will be mean centered after this * function returns! */ -template +template void cov(const raft::handle_t& handle, - raft::device_matrix_view data, - raft::device_vector_view mu, - raft::device_matrix_view covar, + raft::device_matrix_view data, + raft::device_vector_view mu, + raft::device_matrix_view covar, bool sample, bool stable) { static_assert( - std::is_same_v || std::is_same_v, + std::is_same_v || std::is_same_v, "Data layout not supported"); RAFT_EXPECTS(data.extent(1) == covar.extent(0) && data.extent(1) == covar.extent(1), "Size mismatch"); @@ -102,7 +102,7 @@ void cov(const raft::handle_t& handle, mu.data_handle(), data.extent(1), data.extent(0), - std::is_same_v, + std::is_same_v, sample, stable, handle.get_stream()); diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index c6e8eab3d5..c651318003 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -60,42 +60,40 @@ DataT dispersion(const DataT* centroids, /** * @brief Compute cluster dispersion metric. This is very useful for * automatically finding the 'k' (in kmeans) that improves this metric. - * @tparam DataT data type - * @tparam IdxType index type - * @tparam TPB threads block for kernels launched + * @tparam value_t data type + * @tparam idx_t index type * @param handle the raft handle * @param centroids the cluster centroids. This is assumed to be row-major - * and of dimension (nClusters x dim) - * @param clusterSizes number of points in the dataset which belong to each - * cluster. This is of length nClusters - * @param globalCentroid compute the global weighted centroid of all cluster + * and of dimension (n_clusters x dim) + * @param cluster_sizes number of points in the dataset which belong to each + * cluster. This is of length n_clusters + * @param global_centroid compute the global weighted centroid of all cluster * centroids. This is of length dim. Use std::nullopt to not return it. - * @param nPoints number of points in the dataset + * @param n_points number of points in the dataset * @return the cluster dispersion value */ -template -DataT dispersion(const raft::handle_t& handle, - raft::device_matrix_view centroids, - raft::device_vector_view clusterSizes, - std::optional> globalCentroid, - const IdxType nPoints, - std::integral_constant) +template +value_t dispersion(const raft::handle_t& handle, + raft::device_matrix_view centroids, + raft::device_vector_view cluster_sizes, + std::optional> global_centroid, + const idx_t n_points) { - RAFT_EXPECTS(clusterSizes.extent(0) == centroids.extent(0), "Size mismatch"); - RAFT_EXPECTS(clusterSizes.is_exhaustive(), "clusterSizes must be contiguous"); + RAFT_EXPECTS(cluster_sizes.extent(0) == centroids.extent(0), "Size mismatch"); + RAFT_EXPECTS(cluster_sizes.is_exhaustive(), "cluster_sizes must be contiguous"); - DataT* globalCentroid_ptr = nullptr; - if (globalCentroid.has_value()) { - RAFT_EXPECTS(globalCentroid.value().is_exhaustive(), "globalCentroid must be contiguous"); - globalCentroid_ptr = globalCentroid.value().data_handle(); + value_t* global_centroid_ptr = nullptr; + if (global_centroid.has_value()) { + RAFT_EXPECTS(global_centroid.value().is_exhaustive(), "global_centroid must be contiguous"); + global_centroid_ptr = global_centroid.value().data_handle(); } - return detail::dispersion(centroids.data_handle(), - clusterSizes.data_handle(), - globalCentroid_ptr, - centroids.extent(0), - nPoints, - centroids.extent(1), - handle.get_stream()); + return detail::dispersion(centroids.data_handle(), + cluster_sizes.data_handle(), + global_centroid_ptr, + centroids.extent(0), + n_points, + centroids.extent(1), + handle.get_stream()); } } // end namespace stats diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index 5e474f909f..50e09942d6 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -50,25 +50,25 @@ double entropy(const T* clusterArray, * @brief Function to calculate entropy * more info on entropy * - * @tparam DataT data type - * @tparam IdxType index type + * @tparam value_t data type + * @tparam idx_t index type * @param handle the raft handle - * @param clusterArray: the array of classes of type DataT - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels + * @param cluster_array: the array of classes of type value_t + * @param lower_label_range: the lower bound of the range of labels + * @param upper_label_range: the upper bound of the range of labels * @return the entropy score */ -template +template double entropy(const raft::handle_t& handle, - raft::device_vector_view clusterArray, - const DataT lowerLabelRange, - const DataT upperLabelRange) + raft::device_vector_view cluster_array, + const value_t lower_label_range, + const value_t upper_label_range) { - RAFT_EXPECTS(clusterArray.is_exhaustive(), "clusterArray must be contiguous"); - return detail::entropy(clusterArray.data_handle(), - clusterArray.extent(0), - lowerLabelRange, - upperLabelRange, + RAFT_EXPECTS(cluster_array.is_exhaustive(), "cluster_array must be contiguous"); + return detail::entropy(cluster_array.data_handle(), + cluster_array.extent(0), + lower_label_range, + upper_label_range, handle.get_stream()); } }; // end namespace stats diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index 1d9e2b2194..be88a3fbbd 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -66,39 +66,39 @@ void histogram(HistType type, * @brief Perform histogram on the input data. It chooses the right load size * based on the input data vector length. It also supports large-bin cases * using a specialized smem-based hashing technique. - * @tparam DataT input data type - * @tparam IdxType data type used to compute indices - * @tparam BinnerOp takes the input data and computes its bin index + * @tparam value_t input data type + * @tparam idx_t data type used to compute indices + * @tparam binner_op takes the input data and computes its bin index * @param handle the raft handle * @param type histogram implementation type to choose * @param data input data col-major (length = nrows * ncols) * @param bins the output bins col-major (length = nbins * ncols) * @param binner the operation that computes the bin index of the input data * - * @note signature of BinnerOp is `int func(DataT, IdxT);` + * @note signature of binner_op is `int func(value_t, IdxT);` */ -template > +template > void histogram(const raft::handle_t& handle, HistType type, - raft::device_matrix_view data, - raft::device_matrix_view bins, - BinnerOp binner = IdentityBinner()) + raft::device_matrix_view data, + raft::device_matrix_view bins, + binner_op binner = IdentityBinner()) { - RAFT_EXPECTS(std::is_integral_v && data.extent(0) <= std::numeric_limits::max(), + RAFT_EXPECTS(std::is_integral_v && data.extent(0) <= std::numeric_limits::max(), "Index type not supported"); RAFT_EXPECTS(bins.extent(1) == data.extent(1), "Size mismatch"); RAFT_EXPECTS(bins.is_exhaustive(), "bins must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); - detail::histogram(type, - bins.data_handle(), - bins.extent(0), - data.data_handle(), - data.extent(0), - data.extent(1), - handle.get_stream(), - binner); + detail::histogram(type, + bins.data_handle(), + bins.extent(0), + data.data_handle(), + data.extent(0), + data.extent(1), + handle.get_stream(), + binner); } }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index f7f90e928b..333a57ee2a 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -53,29 +53,29 @@ double homogeneity_score(const T* truthClusterArray, * more info on mutual * information * - * @tparam DataT data type - * @tparam IdxType index type + * @tparam value_t data type + * @tparam idx_t index type * @param handle the raft handle - * @param truthClusterArray: the array of truth classes of type DataT - * @param predClusterArray: the array of predicted classes of type DataT - * @param lowerLabelRange: the lower bound of the range of labels - * @param upperLabelRange: the upper bound of the range of labels + * @param truth_cluster_array: the array of truth classes of type value_t + * @param pred_cluster_array: the array of predicted classes of type value_t + * @param lower_label_range: the lower bound of the range of labels + * @param upper_label_range: the upper bound of the range of labels */ -template +template double homogeneity_score(const raft::handle_t& handle, - raft::device_vector_view truthClusterArray, - raft::device_vector_view predClusterArray, - DataT lowerLabelRange, - DataT upperLabelRange) + raft::device_vector_view truth_cluster_array, + raft::device_vector_view pred_cluster_array, + value_t lower_label_range, + value_t upper_label_range) { - RAFT_EXPECTS(truthClusterArray.size() == predClusterArray.size(), "Size mismatch"); - RAFT_EXPECTS(truthClusterArray.is_exhaustive(), "truthClusterArray must be contiguous"); - RAFT_EXPECTS(predClusterArray.is_exhaustive(), "predClusterArray must be contiguous"); - return detail::homogeneity_score(truthClusterArray.data_handle(), - predClusterArray.data_handle(), - truthClusterArray.extent(0), - lowerLabelRange, - upperLabelRange, + RAFT_EXPECTS(truth_cluster_array.size() == pred_cluster_array.size(), "Size mismatch"); + RAFT_EXPECTS(truth_cluster_array.is_exhaustive(), "truth_cluster_array must be contiguous"); + RAFT_EXPECTS(pred_cluster_array.is_exhaustive(), "pred_cluster_array must be contiguous"); + return detail::homogeneity_score(truth_cluster_array.data_handle(), + pred_cluster_array.data_handle(), + truth_cluster_array.extent(0), + lower_label_range, + upper_label_range, handle.get_stream()); } }; // end namespace stats diff --git a/cpp/include/raft/stats/information_criterion.cuh b/cpp/include/raft/stats/information_criterion.cuh index 7ec1cd8812..8ab4723d01 100644 --- a/cpp/include/raft/stats/information_criterion.cuh +++ b/cpp/include/raft/stats/information_criterion.cuh @@ -71,8 +71,8 @@ void information_criterion_batched(ScalarT* d_ic, * @note: it is safe to do the computation in-place (i.e give same pointer * as input and output) * - * @tparam DataT data type - * @tparam IdxType index type + * @tparam value_t data type + * @tparam idx_t index type * @param[in] handle the raft handle * @param[in] d_loglikelihood Log-likelihood for each series (device) length: batch_size * @param[out] d_ic Information criterion to be returned for each @@ -81,13 +81,13 @@ void information_criterion_batched(ScalarT* d_ic, * @param[in] n_params Number of parameters in the model * @param[in] n_samples Number of samples in each series */ -template +template void information_criterion_batched(const raft::handle_t& handle, - raft::device_vector_view d_loglikelihood, - raft::device_vector_view d_ic, + raft::device_vector_view d_loglikelihood, + raft::device_vector_view d_ic, IC_Type ic_type, - IdxType n_params, - IdxType n_samples) + idx_t n_params, + idx_t n_samples) { RAFT_EXPECTS(d_ic.size() == d_loglikelihood.size(), "Size mismatch"); RAFT_EXPECTS(d_ic.is_exhaustive(), "d_ic must be contiguous"); diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index d81ece33eb..8cbe28c864 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -47,16 +47,16 @@ DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, * more info on KL * Divergence * - * @tparam DataT: Data type of the input array - * @tparam IdxType index type + * @tparam value_t: Data type of the input array + * @tparam idx_t index type * @param handle the raft handle - * @param modelPDF: the model array of probability density functions of type DataT - * @param candidatePDF: the candidate array of probability density functions of type DataT + * @param modelPDF: the model array of probability density functions of type value_t + * @param candidatePDF: the candidate array of probability density functions of type value_t */ -template -DataT kl_divergence(const raft::handle_t& handle, - raft::device_vector_view modelPDF, - raft::device_vector_view candidatePDF) +template +value_t kl_divergence(const raft::handle_t& handle, + raft::device_vector_view modelPDF, + raft::device_vector_view candidatePDF) { RAFT_EXPECTS(modelPDF.size() == candidatePDF.size(), "Size mismatch"); RAFT_EXPECTS(modelPDF.is_exhaustive(), "modelPDF must be contiguous"); diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index 24c293abb6..e4e3526179 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -55,23 +55,23 @@ void mean( * * Mean operation is assumed to be performed on a given column. * - * @tparam DataT the data type - * @tparam IdxType index type - * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam value_t the data type + * @tparam idx_t index type + * @tparam layout_t Layout type of the input matrix. * @param handle the raft handle * @param data: the input matrix * @param mu: the output mean vector * @param sample: whether to evaluate sample mean or not. In other words, whether * to normalize the output using N-1 or N, for true or false, respectively */ -template +template void mean(const raft::handle_t& handle, - raft::device_matrix_view data, - raft::device_vector_view mu, + raft::device_matrix_view data, + raft::device_vector_view mu, bool sample) { static_assert( - std::is_same_v || std::is_same_v, + std::is_same_v || std::is_same_v, "Data layout not supported"); RAFT_EXPECTS(data.extent(1) == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); @@ -81,7 +81,7 @@ void mean(const raft::handle_t& handle, data.extent(1), data.extent(0), sample, - std::is_same_v, + std::is_same_v, handle.get_stream()); } diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 988d88ff48..e9c24595fc 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -54,40 +54,38 @@ void meanCenter(Type* out, /** * @brief Center the input matrix wrt its mean - * @tparam DataT the data type - * @tparam IdxType index type - * @tparam LayoutPolicy Layout type of the input matrix. - * @tparam TPB threads per block of the cuda kernel launched + * @tparam value_t the data type + * @tparam idx_t index type + * @tparam layout_t Layout type of the input matrix. * @param handle the raft handle * @param data input matrix * @param mu the mean vector * @param out the output mean-centered matrix - * @param bcastAlongRows whether to broadcast vector along rows or columns + * @param bcast_along_rows whether to broadcast vector along rows or columns */ -template -void meanCenter(const raft::handle_t& handle, - raft::device_matrix_view data, - raft::device_vector_view mu, - raft::device_matrix_view out, - bool bcastAlongRows, - std::integral_constant) +template +void mean_center(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + raft::device_matrix_view out, + bool bcast_along_rows) { static_assert( - std::is_same_v || std::is_same_v, + std::is_same_v || std::is_same_v, "Data layout not supported"); - auto meanVecSize = bcastAlongRows ? data.extent(1) : data.extent(0); + auto mean_vec_size = bcast_along_rows ? data.extent(1) : data.extent(0); RAFT_EXPECTS(out.size() == data.size(), "Size mismatch"); - RAFT_EXPECTS(meanVecSize == mu.extent(0), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(mean_vec_size == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); - detail::meanCenter(out.data_handle(), - data.data_handle(), - mu.data_handle(), - data.extent(1), - data.extent(0), - std::is_same_v, - bcastAlongRows, - handle.get_stream()); + detail::meanCenter(out.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + bcast_along_rows, + handle.get_stream()); } /** @@ -120,39 +118,38 @@ void meanAdd(Type* out, /** * @brief Add the input matrix wrt its mean * @tparam Type the data type - * @tparam IdxType index type - * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam idx_t index type + * @tparam layout_t Layout type of the input matrix. * @tparam TPB threads per block of the cuda kernel launched * @param handle the raft handle * @param data input matrix * @param mu the mean vector * @param out the output mean-centered matrix - * @param bcastAlongRows whether to broadcast vector along rows or columns + * @param bcast_along_rows whether to broadcast vector along rows or columns */ -template -void meanAdd(const raft::handle_t& handle, - raft::device_matrix_view data, - raft::device_vector_view mu, - raft::device_matrix_view out, - bool bcastAlongRows, - std::integral_constant) +template +void mean_add(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view mu, + raft::device_matrix_view out, + bool bcast_along_rows) { static_assert( - std::is_same_v || std::is_same_v, + std::is_same_v || std::is_same_v, "Data layout not supported"); - auto meanVecSize = bcastAlongRows ? data.extent(1) : data.extent(0); + auto mean_vec_size = bcast_along_rows ? data.extent(1) : data.extent(0); RAFT_EXPECTS(out.size() == data.size(), "Size mismatch"); - RAFT_EXPECTS(meanVecSize == mu.extent(0), "Size mismatch betwen data and mu"); + RAFT_EXPECTS(mean_vec_size == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); - detail::meanAdd(out.data_handle(), - data.data_handle(), - mu.data_handle(), - data.extent(1), - data.extent(0), - std::is_same_v, - bcastAlongRows, - handle.get_stream()); + detail::meanAdd(out.data_handle(), + data.data_handle(), + mu.data_handle(), + data.extent(1), + data.extent(0), + std::is_same_v, + bcast_along_rows, + handle.get_stream()); } }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index b7533caf97..72c243f194 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -64,25 +64,25 @@ void meanvar(Type* mean, * It's almost twice faster than running `mean` and `vars` sequentially, because all three * kernels are memory-bound. * - * @tparam DataT the data type - * @tparam IdxType Integer type used for addressing - * @tparam LayoutPolicy Layout type of the input matrix. + * @tparam value_t the data type + * @tparam idx_t Integer type used for addressing + * @tparam layout_t Layout type of the input matrix. * @param handle the raft handle * @param [in] data the input matrix of size [N, D] * @param [out] mean the output mean vector of size D * @param [out] var the output variance vector of size D - * @param [in] sample whether to evaluate sample variance or not. In other words, whether to + * @param sample whether to evaluate sample variance or not. In other words, whether to * normalize the variance using N-1 or N, for true or false respectively. */ -template +template void meanvar(const raft::handle_t& handle, - raft::device_matrix_view data, - raft::device_vector_view mean, - raft::device_vector_view var, + raft::device_matrix_view data, + raft::device_vector_view mean, + raft::device_vector_view var, bool sample) { static_assert( - std::is_same_v || std::is_same_v, + std::is_same_v || std::is_same_v, "Data layout not supported"); RAFT_EXPECTS(data.extent(1) == var.extent(0), "Size mismatch betwen data and var"); RAFT_EXPECTS(mean.size() == var.size(), "Size mismatch betwen mean and var"); @@ -95,7 +95,7 @@ void meanvar(const raft::handle_t& handle, data.extent(1), data.extent(0), sample, - std::is_same_v, + std::is_same_v, handle.get_stream()); } diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index fdc47a785d..e1b18185b3 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -74,9 +74,8 @@ void minmax(const T* data, * @brief Computes min/max across every column of the input matrix, as well as * optionally allow to subsample based on the given row/col ID mapping vectors * - * @tparam DataT Data type of input matrix element. - * @tparam IdxType Index type of matrix extent. - * @tparam TPB number of threads per block + * @tparam value_t Data type of input matrix element. + * @tparam idx_t Index type of matrix extent. * @param handle the raft handle * @param data input data col-major of size [nrows, ncols], unless rowids or * colids length is smaller @@ -87,25 +86,23 @@ void minmax(const T* data, * @param globalmin final col-wise global minimum (size = ncols) * @param globalmax final col-wise global maximum (size = ncols) * @param sampledcols output sampled data. Pass nullptr if you don't need this - * @param TPB threads_pre_block * @note This method makes the following assumptions: * 1. input and output matrices are assumed to be col-major * 2. ncols is small enough to fit the whole of min/max values across all cols * in shared memory */ -template +template void minmax(const raft::handle_t& handle, - raft::device_matrix_view data, - std::optional> rowids, - std::optional> colids, - raft::device_vector_view globalmin, - raft::device_vector_view globalmax, - std::optional> sampledcols, - std::integral_constant) + raft::device_matrix_view data, + std::optional> rowids, + std::optional> colids, + raft::device_vector_view globalmin, + raft::device_vector_view globalmax, + std::optional> sampledcols) { const unsigned* rowids_ptr = nullptr; const unsigned* colids_ptr = nullptr; - DataT* sampledcols_ptr = nullptr; + value_t* sampledcols_ptr = nullptr; auto nrows = data.extent(0); auto ncols = data.extent(1); auto row_stride = data.stride(1); @@ -118,16 +115,16 @@ void minmax(const raft::handle_t& handle, ncols = colids.value().extent(0); } if (sampledcols.has_value()) { sampledcols_ptr = sampledcols.value().data_handle(); } - detail::minmax(data.data_handle(), - rowids_ptr, - colids_ptr, - nrows, - ncols, - row_stride, - globalmin.data_handle(), - globalmax.data_handle(), - sampledcols_ptr, - handle.get_stream()); + detail::minmax(data.data_handle(), + rowids_ptr, + colids_ptr, + nrows, + ncols, + row_stride, + globalmin.data_handle(), + globalmax.data_handle(), + sampledcols_ptr, + handle.get_stream()); } }; // namespace stats diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/test/stats/adjusted_rand_index.cu index e7e892b7fd..f113af821d 100644 --- a/cpp/test/stats/adjusted_rand_index.cu +++ b/cpp/test/stats/adjusted_rand_index.cu @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu index 3e2250540d..37b4dccc0b 100644 --- a/cpp/test/stats/contingencyMatrix.cu +++ b/cpp/test/stats/contingencyMatrix.cu @@ -121,7 +121,7 @@ class ContingencyMatrixTest : public ::testing::TestWithParam(dY.data(), numElements), raft::make_device_vector_view(dYHat.data(), numElements), diff --git a/cpp/test/stats/dispersion.cu b/cpp/test/stats/dispersion.cu index 5cb353a69a..485b80364e 100644 --- a/cpp/test/stats/dispersion.cu +++ b/cpp/test/stats/dispersion.cu @@ -69,8 +69,7 @@ class DispersionTest : public ::testing::TestWithParam> { raft::make_device_matrix_view(data.data(), params.clusters, params.dim), raft::make_device_vector_view(counts.data(), params.clusters), std::make_optional(raft::make_device_vector_view(act_mean.data(), params.dim)), - npoints, - std::integral_constant{}); + npoints); expectedVal = T(0); std::vector h_data(len, T(0)); raft::update_host(&(h_data[0]), data.data(), len, stream); diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu index 2cedf9944e..980d4d5426 100644 --- a/cpp/test/stats/mean_center.cu +++ b/cpp/test/stats/mean_center.cu @@ -65,20 +65,18 @@ class MeanCenterTest : public ::testing::TestWithParam(data.data(), rows, cols), raft::make_device_vector_view(meanVec.data(), meanVecSize), raft::make_device_matrix_view(out.data(), rows, cols), - params.bcastAlongRows, - std::integral_constant{}); + params.bcastAlongRows); } else { using layout = raft::col_major; - meanCenter(handle, + mean_center(handle, raft::make_device_matrix_view(data.data(), rows, cols), raft::make_device_vector_view(meanVec.data(), meanVecSize), raft::make_device_matrix_view(out.data(), rows, cols), - params.bcastAlongRows, - std::integral_constant{}); + params.bcastAlongRows); } raft::linalg::naiveMatVec(out_ref.data(), data.data(), diff --git a/cpp/test/stats/minmax.cu b/cpp/test/stats/minmax.cu index d3e0aa2637..a2ba6bfc9e 100644 --- a/cpp/test/stats/minmax.cu +++ b/cpp/test/stats/minmax.cu @@ -126,8 +126,7 @@ class MinMaxTest : public ::testing::TestWithParam> { std::nullopt, raft::make_device_vector_view(minmax_act.data(), params.cols), raft::make_device_vector_view(minmax_act.data() + params.cols, params.cols), - std::nullopt, - std::integral_constant{}); + std::nullopt); } protected: diff --git a/cpp/test/stats/mutual_info_score.cu b/cpp/test/stats/mutual_info_score.cu index 1912366586..fb9362df52 100644 --- a/cpp/test/stats/mutual_info_score.cu +++ b/cpp/test/stats/mutual_info_score.cu @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include From 0479c11f666600e5e36f96a4d9e018440334811f Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Tue, 27 Sep 2022 15:07:14 +0200 Subject: [PATCH 19/40] fix style --- cpp/test/stats/mean_center.cu | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu index 980d4d5426..30dcdd475b 100644 --- a/cpp/test/stats/mean_center.cu +++ b/cpp/test/stats/mean_center.cu @@ -66,17 +66,17 @@ class MeanCenterTest : public ::testing::TestWithParam(data.data(), rows, cols), - raft::make_device_vector_view(meanVec.data(), meanVecSize), - raft::make_device_matrix_view(out.data(), rows, cols), - params.bcastAlongRows); + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(meanVec.data(), meanVecSize), + raft::make_device_matrix_view(out.data(), rows, cols), + params.bcastAlongRows); } else { using layout = raft::col_major; mean_center(handle, - raft::make_device_matrix_view(data.data(), rows, cols), - raft::make_device_vector_view(meanVec.data(), meanVecSize), - raft::make_device_matrix_view(out.data(), rows, cols), - params.bcastAlongRows); + raft::make_device_matrix_view(data.data(), rows, cols), + raft::make_device_vector_view(meanVec.data(), meanVecSize), + raft::make_device_matrix_view(out.data(), rows, cols), + params.bcastAlongRows); } raft::linalg::naiveMatVec(out_ref.data(), data.data(), From f8ae9e1060d3debcd889904d81223f7f9162bcfe Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Wed, 28 Sep 2022 14:33:18 +0200 Subject: [PATCH 20/40] Remove workspace from public api --- cpp/include/raft/stats/contingency_matrix.cuh | 54 ++++++------------- cpp/test/stats/contingencyMatrix.cu | 14 ++--- 2 files changed, 20 insertions(+), 48 deletions(-) diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 39586930b0..b9ec275dea 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -20,6 +20,7 @@ #pragma once #include +#include #include #include #include @@ -54,7 +55,7 @@ void getInputClassCardinality( * @param maxLabel: [out] calculated max value in input array */ template -void getInputClassCardinality(const raft::handle_t& handle, +void get_input_class_cardinality(const raft::handle_t& handle, raft::device_vector_view groundTruth, raft::host_scalar_view minLabel, raft::host_scalar_view maxLabel) @@ -89,32 +90,6 @@ size_t getContingencyMatrixWorkspaceSize(int nSamples, nSamples, groundTruth, stream, minLabel, maxLabel); } -/** - * @brief Calculate workspace size for running contingency matrix calculations - * @tparam DataT label type - * @tparam IdxType Index type of matrix extent. - * @param handle: the raft handle. - * @param groundTruth: device 1-d array for ground truth (num of rows) - * @param minLabel: Optional, min value in input array - * @param maxLabel: Optional, max value in input array - */ -template -size_t getContingencyMatrixWorkspaceSize(const raft::handle_t& handle, - raft::device_vector_view groundTruth, - std::optional minLabel = std::nullopt, - std::optional maxLabel = std::nullopt) -{ - DataT minLabelValue = std::numeric_limits::max(); - DataT maxLabelValue = std::numeric_limits::max(); - if (minLabel.has_value()) { minLabelValue = minLabel.value(); } - if (maxLabel.has_value()) { maxLabelValue = maxLabel.value(); } - return detail::getContingencyMatrixWorkspaceSize(groundTruth.extent(0), - groundTruth.data_handle(), - handle.get_stream(), - minLabelValue, - maxLabelValue); -} - /** * @brief contruct contingency matrix given input ground truth and prediction * labels. Users should call function getInputClassCardinality to find @@ -175,13 +150,11 @@ void contingencyMatrix(const T* groundTruth, template + typename layout_t> void contingency_matrix(const raft::handle_t& handle, raft::device_vector_view ground_truth, raft::device_vector_view predicted_label, - raft::device_matrix_view out_mat, - std::optional> workspace, + raft::device_matrix_view out_mat, std::optional min_label = std::nullopt, std::optional max_label = std::nullopt) { @@ -190,23 +163,26 @@ void contingency_matrix(const raft::handle_t& handle, RAFT_EXPECTS(predicted_label.is_exhaustive(), "predicted_label must be contiguous"); RAFT_EXPECTS(out_mat.is_exhaustive(), "out_mat must be contiguous"); - workspace_value_t* workspace_p = nullptr; - idx_t workspace_size = 0; - if (workspace.has_value()) { - workspace_p = workspace.value().data_handle(); - workspace_size = workspace.value().size() * sizeof(workspace_value_t); - } value_t min_label_value = std::numeric_limits::max(); value_t max_label_value = std::numeric_limits::max(); if (min_label.has_value()) { min_label_value = min_label.value(); } if (max_label.has_value()) { max_label_value = max_label.value(); } + + auto workspace_sz = detail::getContingencyMatrixWorkspaceSize( + ground_truth.extent(0), + ground_truth.data_handle(), + handle.get_stream(), + min_label_value, + max_label_value); + auto workspace = raft::make_device_vector(handle, workspace_sz); + detail::contingencyMatrix(ground_truth.data_handle(), predicted_label.data_handle(), ground_truth.extent(0), out_mat.data_handle(), handle.get_stream(), - workspace_p, - workspace_size, + workspace.data_handle(), + workspace_sz, min_label_value, max_label_value); } diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu index 37b4dccc0b..7943610689 100644 --- a/cpp/test/stats/contingencyMatrix.cu +++ b/cpp/test/stats/contingencyMatrix.cu @@ -41,7 +41,6 @@ class ContingencyMatrixTest : public ::testing::TestWithParam(dY.data(), numElements), + raft::make_host_scalar_view(&minLabel), + raft::make_host_scalar_view(&maxLabel)); } else { minLabel = lowerLabelRange; maxLabel = upperLabelRange; @@ -111,10 +114,6 @@ class ContingencyMatrixTest : public ::testing::TestWithParam(dY.data(), numElements), raft::make_device_vector_view(dYHat.data(), numElements), raft::make_device_matrix_view(dComputedOutput.data(), numUniqueClasses, numUniqueClasses), - std::make_optional(raft::make_device_vector_view(pWorkspace.data(), workspaceSz)), std::make_optional(minLabel), std::make_optional(maxLabel)); @@ -142,8 +140,6 @@ class ContingencyMatrixTest : public ::testing::TestWithParam pWorkspace; rmm::device_uvector dY, dYHat; rmm::device_uvector dComputedOutput, dGoldenOutput; }; From 093dc4c5c3c352426137341d58698b3f7971427e Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Wed, 28 Sep 2022 17:32:36 +0200 Subject: [PATCH 21/40] Add [in] [out] to parameter documentation --- .../raft/stats/adjusted_rand_index.cuh | 7 +-- cpp/include/raft/stats/completeness_score.cuh | 11 +++-- cpp/include/raft/stats/contingency_matrix.cuh | 48 ++++++++----------- cpp/include/raft/stats/cov.cuh | 12 ++--- cpp/include/raft/stats/dispersion.cuh | 12 +++-- cpp/include/raft/stats/entropy.cuh | 8 ++-- cpp/include/raft/stats/histogram.cuh | 10 ++-- cpp/include/raft/stats/homogeneity_score.cuh | 11 +++-- cpp/include/raft/stats/kl_divergence.cuh | 7 +-- cpp/include/raft/stats/mean.cuh | 8 ++-- cpp/include/raft/stats/mean_center.cuh | 20 ++++---- cpp/include/raft/stats/meanvar.cuh | 10 ++-- cpp/include/raft/stats/minmax.cuh | 26 +++++----- cpp/include/raft/stats/mutual_info_score.cuh | 11 +++-- cpp/include/raft/stats/r2_score.cuh | 6 +-- cpp/include/raft/stats/rand_index.cuh | 7 +-- cpp/include/raft/stats/regression_metrics.cuh | 6 +-- cpp/include/raft/stats/silhouette_score.cuh | 13 ++--- cpp/include/raft/stats/stddev.cuh | 20 ++++---- cpp/include/raft/stats/sum.cuh | 6 +-- .../raft/stats/trustworthiness_score.cuh | 2 +- cpp/include/raft/stats/v_measure.cuh | 13 ++--- cpp/include/raft/stats/weighted_mean.cuh | 26 +++++----- 23 files changed, 154 insertions(+), 146 deletions(-) diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index c6fac83c4d..33058e3a76 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -55,9 +55,10 @@ double adjusted_rand_index(const T* firstClusterArray, * @tparam value_t data-type for input label arrays * @tparam MathT integral data-type used for computing n-choose-r * @tparam idx_t Index type of matrix extent. - * @param handle: the raft handle. - * @param first_cluster_array: the array of classes - * @param second_cluster_array: the array of classes + * @param[in] handle: the raft handle. + * @param[in] first_cluster_array: the array of classes + * @param[in] second_cluster_array: the array of classes + * @return the Adjusted RandIndex */ template double adjusted_rand_index(const raft::handle_t& handle, diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 7ae963e12c..fd535e77d5 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -52,11 +52,12 @@ double completeness_score(const T* truthClusterArray, * * @tparam value_t the data type * @tparam idx_t Index type of matrix extent. - * @param handle: the raft handle. - * @param truth_cluster_array: the array of truth classes of type value_t - * @param pred_cluster_array: the array of predicted classes of type value_t - * @param lower_label_range: the lower bound of the range of labels - * @param upper_label_range: the upper bound of the range of labels + * @param[in] handle: the raft handle. + * @param[in] truth_cluster_array: the array of truth classes of type value_t + * @param[in] pred_cluster_array: the array of predicted classes of type value_t + * @param[in] lower_label_range: the lower bound of the range of labels + * @param[in] upper_label_range: the upper bound of the range of labels + * @return the cluster completeness score */ template double completeness_score(const raft::handle_t& handle, diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index b9ec275dea..8de27cc51a 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -19,8 +19,8 @@ #pragma once -#include #include +#include #include #include #include @@ -49,16 +49,16 @@ void getInputClassCardinality( * size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int) * @tparam value_t label type * @tparam idx_t Index type of matrix extent. - * @param handle: the raft handle. - * @param groundTruth: device 1-d array for ground truth (num of rows) - * @param minLabel: [out] calculated min value in input array - * @param maxLabel: [out] calculated max value in input array + * @param[in] handle: the raft handle. + * @param[in] groundTruth: device 1-d array for ground truth (num of rows) + * @param[out] minLabel: calculated min value in input array + * @param[out] maxLabel: calculated max value in input array */ template void get_input_class_cardinality(const raft::handle_t& handle, - raft::device_vector_view groundTruth, - raft::host_scalar_view minLabel, - raft::host_scalar_view maxLabel) + raft::device_vector_view groundTruth, + raft::host_scalar_view minLabel, + raft::host_scalar_view maxLabel) { RAFT_EXPECTS(minLabel.data_handle() != nullptr, "Invalid minLabel pointer"); RAFT_EXPECTS(maxLabel.data_handle() != nullptr, "Invalid maxLabel pointer"); @@ -138,19 +138,14 @@ void contingencyMatrix(const T* groundTruth, * @tparam out_t output matrix type * @tparam idx_t Index type of matrix extent. * @tparam layout_t Layout type of the input data. - * @tparam workspace_value_t Value type of the workspace data data. - * @param handle: the raft handle. - * @param ground_truth: device 1-d array for ground truth (num of rows) - * @param predicted_label: device 1-d array for prediction (num of columns) - * @param out_mat: output buffer for contingency matrix - * @param workspace: Optional, workspace memory allocation - * @param min_label: Optional, min value in input ground truth array - * @param max_label: Optional, max value in input ground truth array + * @param[in] handle: the raft handle. + * @param[in] ground_truth: device 1-d array for ground truth (num of rows) + * @param[in] predicted_label: device 1-d array for prediction (num of columns) + * @param[out] out_mat: output buffer for contingency matrix + * @param[in] min_label: Optional, min value in input ground truth array + * @param[in] max_label: Optional, max value in input ground truth array */ -template +template void contingency_matrix(const raft::handle_t& handle, raft::device_vector_view ground_truth, raft::device_vector_view predicted_label, @@ -168,13 +163,12 @@ void contingency_matrix(const raft::handle_t& handle, if (min_label.has_value()) { min_label_value = min_label.value(); } if (max_label.has_value()) { max_label_value = max_label.value(); } - auto workspace_sz = detail::getContingencyMatrixWorkspaceSize( - ground_truth.extent(0), - ground_truth.data_handle(), - handle.get_stream(), - min_label_value, - max_label_value); - auto workspace = raft::make_device_vector(handle, workspace_sz); + auto workspace_sz = detail::getContingencyMatrixWorkspaceSize(ground_truth.extent(0), + ground_truth.data_handle(), + handle.get_stream(), + min_label_value, + max_label_value); + auto workspace = raft::make_device_vector(handle, workspace_sz); detail::contingencyMatrix(ground_truth.data_handle(), predicted_label.data_handle(), diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 89e0efbfa7..a0c2ed2090 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -67,15 +67,15 @@ void cov(const raft::handle_t& handle, * @tparam value_t the data type * @tparam idx_t the index type * @tparam layout_t Layout type of the input data. - * @param handle the raft handle - * @param data the input matrix (this will get mean-centered at the end!) + * @param[in] handle the raft handle + * @param[in] data the input matrix (this will get mean-centered at the end!) * (length = nrows * ncols) - * @param mu mean vector of the input matrix (length = ncols) - * @param covar the output covariance matrix (length = ncols * ncols) - * @param sample whether to evaluate sample covariance or not. In other words, + * @param[in] mu mean vector of the input matrix (length = ncols) + * @param[out] covar the output covariance matrix (length = ncols * ncols) + * @param[in] sample whether to evaluate sample covariance or not. In other words, * whether to normalize the output using N-1 or N, for true or false, * respectively - * @param stable whether to run the slower-but-numerically-stable version or not + * @param[in] stable whether to run the slower-but-numerically-stable version or not * @note if stable=true, then the input data will be mean centered after this * function returns! */ diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index c651318003..80974cae2a 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -62,14 +62,14 @@ DataT dispersion(const DataT* centroids, * automatically finding the 'k' (in kmeans) that improves this metric. * @tparam value_t data type * @tparam idx_t index type - * @param handle the raft handle - * @param centroids the cluster centroids. This is assumed to be row-major + * @param[in] handle the raft handle + * @param[in] centroids the cluster centroids. This is assumed to be row-major * and of dimension (n_clusters x dim) - * @param cluster_sizes number of points in the dataset which belong to each + * @param[in] cluster_sizes number of points in the dataset which belong to each * cluster. This is of length n_clusters - * @param global_centroid compute the global weighted centroid of all cluster + * @param[out] global_centroid compute the global weighted centroid of all cluster * centroids. This is of length dim. Use std::nullopt to not return it. - * @param n_points number of points in the dataset + * @param[in] n_points number of points in the dataset * @return the cluster dispersion value */ template @@ -84,6 +84,8 @@ value_t dispersion(const raft::handle_t& handle, value_t* global_centroid_ptr = nullptr; if (global_centroid.has_value()) { + RAFT_EXPECTS(global_centroid.value().extent(0) == centroids.extent(1), + "Size mismatch between global_centroid and centroids"); RAFT_EXPECTS(global_centroid.value().is_exhaustive(), "global_centroid must be contiguous"); global_centroid_ptr = global_centroid.value().data_handle(); } diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index 50e09942d6..8a98a03c6b 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -52,10 +52,10 @@ double entropy(const T* clusterArray, * * @tparam value_t data type * @tparam idx_t index type - * @param handle the raft handle - * @param cluster_array: the array of classes of type value_t - * @param lower_label_range: the lower bound of the range of labels - * @param upper_label_range: the upper bound of the range of labels + * @param[in] handle the raft handle + * @param[in] cluster_array: the array of classes of type value_t + * @param[in] lower_label_range: the lower bound of the range of labels + * @param[in] upper_label_range: the upper bound of the range of labels * @return the entropy score */ template diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index be88a3fbbd..98306abf98 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -69,11 +69,11 @@ void histogram(HistType type, * @tparam value_t input data type * @tparam idx_t data type used to compute indices * @tparam binner_op takes the input data and computes its bin index - * @param handle the raft handle - * @param type histogram implementation type to choose - * @param data input data col-major (length = nrows * ncols) - * @param bins the output bins col-major (length = nbins * ncols) - * @param binner the operation that computes the bin index of the input data + * @param[in] handle the raft handle + * @param[in] type histogram implementation type to choose + * @param[in] data input data col-major (length = nrows * ncols) + * @param[out] bins the output bins col-major (length = nbins * ncols) + * @param[in] binner the operation that computes the bin index of the input data * * @note signature of binner_op is `int func(value_t, IdxT);` */ diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index 333a57ee2a..91c479bc99 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -55,11 +55,12 @@ double homogeneity_score(const T* truthClusterArray, * * @tparam value_t data type * @tparam idx_t index type - * @param handle the raft handle - * @param truth_cluster_array: the array of truth classes of type value_t - * @param pred_cluster_array: the array of predicted classes of type value_t - * @param lower_label_range: the lower bound of the range of labels - * @param upper_label_range: the upper bound of the range of labels + * @param[in] handle the raft handle + * @param[in] truth_cluster_array: the array of truth classes of type value_t + * @param[in] pred_cluster_array: the array of predicted classes of type value_t + * @param[in] lower_label_range: the lower bound of the range of labels + * @param[in] upper_label_range: the upper bound of the range of labels + * @return the homogeneity score */ template double homogeneity_score(const raft::handle_t& handle, diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index 8cbe28c864..265e87dc68 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -49,9 +49,10 @@ DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, * * @tparam value_t: Data type of the input array * @tparam idx_t index type - * @param handle the raft handle - * @param modelPDF: the model array of probability density functions of type value_t - * @param candidatePDF: the candidate array of probability density functions of type value_t + * @param[in] handle the raft handle + * @param[in] modelPDF: the model array of probability density functions of type value_t + * @param[in] candidatePDF: the candidate array of probability density functions of type value_t + * @return the KL Divergence value */ template value_t kl_divergence(const raft::handle_t& handle, diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index e4e3526179..1e2592d3c6 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -58,10 +58,10 @@ void mean( * @tparam value_t the data type * @tparam idx_t index type * @tparam layout_t Layout type of the input matrix. - * @param handle the raft handle - * @param data: the input matrix - * @param mu: the output mean vector - * @param sample: whether to evaluate sample mean or not. In other words, whether + * @param[in] handle the raft handle + * @param[in] data: the input matrix + * @param[out] mu: the output mean vector + * @param[in] sample: whether to evaluate sample mean or not. In other words, whether * to normalize the output using N-1 or N, for true or false, respectively */ template diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index e9c24595fc..0a10e406de 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -57,11 +57,11 @@ void meanCenter(Type* out, * @tparam value_t the data type * @tparam idx_t index type * @tparam layout_t Layout type of the input matrix. - * @param handle the raft handle - * @param data input matrix - * @param mu the mean vector - * @param out the output mean-centered matrix - * @param bcast_along_rows whether to broadcast vector along rows or columns + * @param[in] handle the raft handle + * @param[in] data input matrix + * @param[in] mu the mean vector + * @param[out] out the output mean-centered matrix + * @param[in] bcast_along_rows whether to broadcast vector along rows or columns */ template void mean_center(const raft::handle_t& handle, @@ -121,11 +121,11 @@ void meanAdd(Type* out, * @tparam idx_t index type * @tparam layout_t Layout type of the input matrix. * @tparam TPB threads per block of the cuda kernel launched - * @param handle the raft handle - * @param data input matrix - * @param mu the mean vector - * @param out the output mean-centered matrix - * @param bcast_along_rows whether to broadcast vector along rows or columns + * @param[in] handle the raft handle + * @param[in] data input matrix + * @param[in] mu the mean vector + * @param[out] out the output mean-centered matrix + * @param[in] bcast_along_rows whether to broadcast vector along rows or columns */ template void mean_add(const raft::handle_t& handle, diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index 72c243f194..65f7e6cf22 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -67,11 +67,11 @@ void meanvar(Type* mean, * @tparam value_t the data type * @tparam idx_t Integer type used for addressing * @tparam layout_t Layout type of the input matrix. - * @param handle the raft handle - * @param [in] data the input matrix of size [N, D] - * @param [out] mean the output mean vector of size D - * @param [out] var the output variance vector of size D - * @param sample whether to evaluate sample variance or not. In other words, whether to + * @param[in] handle the raft handle + * @param[in] data the input matrix of size [N, D] + * @param[out] mean the output mean vector of size D + * @param[out] var the output variance vector of size D + * @param[in] sample whether to evaluate sample variance or not. In other words, whether to * normalize the variance using N-1 or N, for true or false respectively. */ template diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index e1b18185b3..305e63cc10 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -76,16 +76,16 @@ void minmax(const T* data, * * @tparam value_t Data type of input matrix element. * @tparam idx_t Index type of matrix extent. - * @param handle the raft handle - * @param data input data col-major of size [nrows, ncols], unless rowids or + * @param[in] handle the raft handle + * @param[in] data input data col-major of size [nrows, ncols], unless rowids or * colids length is smaller - * @param rowids actual row ID mappings. It is of length nrows. If you want to - * skip this index lookup entirely, pass nullptr - * @param colids actual col ID mappings. It is of length ncols. If you want to - * skip this index lookup entirely, pass nullptr - * @param globalmin final col-wise global minimum (size = ncols) - * @param globalmax final col-wise global maximum (size = ncols) - * @param sampledcols output sampled data. Pass nullptr if you don't need this + * @param[in] rowids optional row ID mappings of length nrows. If you want to + * skip this index lookup entirely, pass std::nullopt + * @param[in] colids optional col ID mappings of length ncols. If you want to + * skip this index lookup entirely, pass std::nullopt + * @param[out] globalmin final col-wise global minimum (size = ncols) + * @param[out] globalmax final col-wise global maximum (size = ncols) + * @param[out] sampledcols output sampled data. Pass std::nullopt if you don't need this * @note This method makes the following assumptions: * 1. input and output matrices are assumed to be col-major * 2. ncols is small enough to fit the whole of min/max values across all cols @@ -108,13 +108,17 @@ void minmax(const raft::handle_t& handle, auto row_stride = data.stride(1); if (rowids.has_value()) { rowids_ptr = rowids.value().data_handle(); - nrows = rowids.value().extent(0); + RAFT_EXPECTS(rowids.value().extent(0) <= nrows, "Rowids size is greater than nrows"); + nrows = rowids.value().extent(0); } if (colids.has_value()) { colids_ptr = colids.value().data_handle(); - ncols = colids.value().extent(0); + RAFT_EXPECTS(colids.value().extent(0) <= ncols, "Colids size is greater than ncols"); + ncols = colids.value().extent(0); } if (sampledcols.has_value()) { sampledcols_ptr = sampledcols.value().data_handle(); } + RAFT_EXPECTS(globalmin.extent(0) == ncols, "Size mismatch betwen globalmin and ncols"); + RAFT_EXPECTS(globalmax.extent(0) == ncols, "Size mismatch betwen globalmax and ncols"); detail::minmax(data.data_handle(), rowids_ptr, colids_ptr, diff --git a/cpp/include/raft/stats/mutual_info_score.cuh b/cpp/include/raft/stats/mutual_info_score.cuh index 2af8a939fc..e953f12461 100644 --- a/cpp/include/raft/stats/mutual_info_score.cuh +++ b/cpp/include/raft/stats/mutual_info_score.cuh @@ -52,11 +52,12 @@ double mutual_info_score(const T* firstClusterArray, * more info on mutual information * @tparam value_t the data type * @tparam idx_t index type - * @param handle the raft handle - * @param first_cluster_array: the array of classes of type value_t - * @param second_cluster_array: the array of classes of type value_t - * @param lower_label_range: the lower bound of the range of labels - * @param upper_label_range: the upper bound of the range of labels + * @param[in] handle the raft handle + * @param[in] first_cluster_array: the array of classes of type value_t + * @param[in] second_cluster_array: the array of classes of type value_t + * @param[in] lower_label_range: the lower bound of the range of labels + * @param[in] upper_label_range: the upper bound of the range of labels + * @return the mutual information score */ template double mutual_info_score(const raft::handle_t& handle, diff --git a/cpp/include/raft/stats/r2_score.cuh b/cpp/include/raft/stats/r2_score.cuh index 44774d710c..6e00590071 100644 --- a/cpp/include/raft/stats/r2_score.cuh +++ b/cpp/include/raft/stats/r2_score.cuh @@ -57,9 +57,9 @@ math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) * * @tparam value_t the data type * @tparam idx_t index type - * @param handle the raft handle - * @param y: Array of ground-truth response variables - * @param y_hat: Array of predicted response variables + * @param[in] handle the raft handle + * @param[in] y: Array of ground-truth response variables + * @param[in] y_hat: Array of predicted response variables * @return: The R-squared value. */ template diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh index d689dace87..3612d93521 100644 --- a/cpp/include/raft/stats/rand_index.cuh +++ b/cpp/include/raft/stats/rand_index.cuh @@ -44,9 +44,10 @@ double rand_index(T* firstClusterArray, T* secondClusterArray, uint64_t size, cu * more info on rand index * @tparam value_t the data type * @tparam idx_t index type - * @param handle the raft handle - * @param first_cluster_array: the array of classes of type value_t - * @param second_cluster_array: the array of classes of type value_t + * @param[in] handle the raft handle + * @param[in] first_cluster_array: the array of classes of type value_t + * @param[in] second_cluster_array: the array of classes of type value_t + * @return: The RandIndex value. */ template double rand_index(const raft::handle_t& handle, diff --git a/cpp/include/raft/stats/regression_metrics.cuh b/cpp/include/raft/stats/regression_metrics.cuh index 3c1ef094d0..fd33f2af49 100644 --- a/cpp/include/raft/stats/regression_metrics.cuh +++ b/cpp/include/raft/stats/regression_metrics.cuh @@ -57,9 +57,9 @@ void regression_metrics(const T* predictions, * @brief Compute regression metrics mean absolute error, mean squared error, median absolute error * @tparam value_t the data type for predictions (e.g., float or double for regression). * @tparam idx_t index type - * @param[in] handle the raft handle - * @param[in] predictions: array of predictions. - * @param[in] ref_predictions: array of reference (ground-truth) predictions. + * @param[in] handle the raft handle + * @param[in] predictions: array of predictions. + * @param[in] ref_predictions: array of reference (ground-truth) predictions. * @param[out] mean_abs_error: Mean Absolute Error. Sum over n of (|predictions[i] - * ref_predictions[i]|) / n. * @param[out] mean_squared_error: Mean Squared Error. Sum over n of ((predictions[i] - diff --git a/cpp/include/raft/stats/silhouette_score.cuh b/cpp/include/raft/stats/silhouette_score.cuh index 86f6be5040..8952f17ca1 100644 --- a/cpp/include/raft/stats/silhouette_score.cuh +++ b/cpp/include/raft/stats/silhouette_score.cuh @@ -80,15 +80,16 @@ value_t silhouette_score_batched( * @tparam value_t: type of the data samples * @tparam label_t: type of the labels * @tparam idx_t index type - * @param[in] handle: raft handle for managing expensive resources - * @param[in] X_in: input matrix Data in row-major format (nRows x nCols) - * @param[in] labels: the pointer to the array containing labels for every data sample (length: + * @param[in] handle: raft handle for managing expensive resources + * @param[in] X_in: input matrix Data in row-major format (nRows x nCols) + * @param[in] labels: the pointer to the array containing labels for every data sample (length: * nRows) * @param[out] silhouette_score_per_sample: optional array populated with the silhouette score * for every sample (length: nRows) - * @param n_unique_labels: number of unique labels in the labels array - * @param metric: the numerical value that maps to the type of distance metric to be used in the - * calculations + * @param[in] n_unique_labels: number of unique labels in the labels array + * @param[in] metric: the numerical value that maps to the type of distance metric to be used in + * the calculations + * @return: The silhouette score. */ template value_t silhouette_score( diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh index 5520599226..54dfd5554b 100644 --- a/cpp/include/raft/stats/stddev.cuh +++ b/cpp/include/raft/stats/stddev.cuh @@ -95,11 +95,11 @@ void vars(Type* var, * @tparam value_t the data type * @tparam idx_t Integer type used to for addressing * @tparam layout_t Layout type of the input matrix. - * @param handle the raft handle - * @param data the input matrix - * @param mu the mean vector - * @param std the output stddev vector - * @param sample whether to evaluate sample stddev or not. In other words, + * @param[in] handle the raft handle + * @param[in] data the input matrix + * @param[in] mu the mean vector + * @param[out] std the output stddev vector + * @param[in] sample whether to evaluate sample stddev or not. In other words, * whether * to normalize the output using N-1 or N, for true or false, respectively */ @@ -135,11 +135,11 @@ void stddev(const raft::handle_t& handle, * @tparam value_t the data type * @tparam idx_t Integer type used to for addressing * @tparam layout_t Layout type of the input matrix. - * @param handle the raft handle - * @param data the input matrix - * @param mu the mean vector - * @param var the output stddev vector - * @param sample whether to evaluate sample stddev or not. In other words, + * @param[in] handle the raft handle + * @param[in] data the input matrix + * @param[in] mu the mean vector + * @param[out] var the output stddev vector + * @param[in] sample whether to evaluate sample stddev or not. In other words, * whether * to normalize the output using N-1 or N, for true or false, respectively */ diff --git a/cpp/include/raft/stats/sum.cuh b/cpp/include/raft/stats/sum.cuh index 1156890095..18265c5e3a 100644 --- a/cpp/include/raft/stats/sum.cuh +++ b/cpp/include/raft/stats/sum.cuh @@ -54,9 +54,9 @@ void sum(Type* output, const Type* input, IdxType D, IdxType N, bool rowMajor, c * @tparam value_t the data type * @tparam idx_t Integer type used to for addressing * @tparam layout_t Layout type of the input matrix. - * @param handle the raft handle - * @param input the input matrix - * @param output the output mean vector + * @param[in] handle the raft handle + * @param[in] input the input matrix + * @param[out] output the output mean vector */ template void sum(const raft::handle_t& handle, diff --git a/cpp/include/raft/stats/trustworthiness_score.cuh b/cpp/include/raft/stats/trustworthiness_score.cuh index 03c9e38e99..e188bd9b38 100644 --- a/cpp/include/raft/stats/trustworthiness_score.cuh +++ b/cpp/include/raft/stats/trustworthiness_score.cuh @@ -60,7 +60,7 @@ double trustworthiness_score(const raft::handle_t& h, * @param[in] X_embedded: Data in target dimension (embedding) * @param[in] n_neighbors Number of neighbors considered by trustworthiness score * @param[in] batch_size Batch size - * @return[out] Trustworthiness score + * @return Trustworthiness score */ template double trustworthiness_score(const raft::handle_t& handle, diff --git a/cpp/include/raft/stats/v_measure.cuh b/cpp/include/raft/stats/v_measure.cuh index c0d97915a7..b8f16695bc 100644 --- a/cpp/include/raft/stats/v_measure.cuh +++ b/cpp/include/raft/stats/v_measure.cuh @@ -54,12 +54,13 @@ double v_measure(const T* truthClusterArray, * * @tparam value_t the data type * @tparam idx_t Integer type used to for addressing - * @param handle the raft handle - * @param truth_cluster_array: the array of truth classes of type T - * @param pred_cluster_array: the array of predicted classes of type T - * @param lower_label_range: the lower bound of the range of labels - * @param upper_label_range: the upper bound of the range of labels - * @param beta: v_measure parameter + * @param[in] handle the raft handle + * @param[in] truth_cluster_array: the array of truth classes of type T + * @param[in] pred_cluster_array: the array of predicted classes of type T + * @param[in] lower_label_range: the lower bound of the range of labels + * @param[in] upper_label_range: the upper bound of the range of labels + * @param[in] beta: v_measure parameter + * @return the v-measure between the two clusters */ template double v_measure(const raft::handle_t& handle, diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index 6b234f8bd4..0849dae29d 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -100,11 +100,11 @@ void colWeightedMean( * @tparam value_t the data type * @tparam idx_t Integer type used to for addressing * @tparam layout_t Layout type of the input matrix. - * @param handle the raft handle - * @param data the input matrix of size nrows * ncols - * @param weights weight of size ncols if along_row is true, else of size nrows - * @param mu the output mean vector of size ncols if along_row is true, else of size nrows - * @param along_rows whether to reduce along rows or columns + * @param[in] handle the raft handle + * @param[in] data the input matrix of size nrows * ncols + * @param[in] weights weight of size ncols if along_row is true, else of size nrows + * @param[out] mu the output mean vector of size ncols if along_row is true, else of size nrows + * @param[in] along_rows whether to reduce along rows or columns */ template void weighted_mean(const raft::handle_t& handle, @@ -142,10 +142,10 @@ void weighted_mean(const raft::handle_t& handle, * * @tparam value_t the data type * @tparam idx_t Integer type used to for addressing - * @param handle the raft handle - * @param data the input matrix of size nrows * ncols - * @param weights per-col weight - * @param mu the output mean vector of size ncols + * @param[in] handle the raft handle + * @param[in] data the input matrix of size nrows * ncols + * @param[in] weights per-col weight + * @param[out] mu the output mean vector of size ncols */ template void rowWeightedMean(const raft::handle_t& handle, @@ -162,10 +162,10 @@ void rowWeightedMean(const raft::handle_t& handle, * * @tparam value_t the data type * @tparam idx_t Integer type used to for addressing - * @param handle the raft handle - * @param data the input matrix of size nrows * ncols - * @param weights per-row weight - * @param mu the output mean vector of size nrows + * @param[in] handle the raft handle + * @param[in] data the input matrix of size nrows * ncols + * @param[in] weights per-row weight + * @param[out] mu the output mean vector of size nrows */ template void colWeightedMean(const raft::handle_t& handle, From 2a4b5b84958bb9f868b42ef3f11606c9b02b3225 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Wed, 28 Sep 2022 22:13:27 +0200 Subject: [PATCH 22/40] Adding const specifier when possible --- .../stats/detail/batched/silhouette_score.cuh | 14 ++++----- cpp/include/raft/stats/detail/rand_index.cuh | 4 +-- .../raft/stats/detail/silhouette_score.cuh | 8 ++--- cpp/include/raft/stats/r2_score.cuh | 11 +++++-- cpp/include/raft/stats/rand_index.cuh | 4 +-- cpp/include/raft/stats/silhouette_score.cuh | 8 ++--- .../raft/stats/trustworthiness_score.cuh | 31 +++++++++++-------- cpp/test/stats/rand_index.cu | 8 ++--- cpp/test/stats/silhouette_score.cu | 8 ++--- cpp/test/stats/trustworthiness.cu | 3 +- 10 files changed, 55 insertions(+), 44 deletions(-) diff --git a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh index e3b56d2183..25a3721af1 100644 --- a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh +++ b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh @@ -112,7 +112,7 @@ __global__ void compute_chunked_a_b_kernel(value_t* a, template rmm::device_uvector get_cluster_counts(const raft::handle_t& handle, - label_idx* y, + const label_idx* y, value_idx& n_rows, label_idx& n_labels) { @@ -129,8 +129,8 @@ rmm::device_uvector get_cluster_counts(const raft::handle_t& handle, template rmm::device_uvector get_pairwise_distance(const raft::handle_t& handle, - value_t* left_begin, - value_t* right_begin, + const value_t* left_begin, + const value_t* right_begin, value_idx& n_left_rows, value_idx& n_right_rows, value_idx& n_cols, @@ -170,10 +170,10 @@ void compute_chunked_a_b(const raft::handle_t& handle, template value_t silhouette_score( const raft::handle_t& handle, - value_t* X, + const value_t* X, value_idx n_rows, value_idx n_cols, - label_idx* y, + const label_idx* y, label_idx n_labels, value_t* scores, value_idx chunk, @@ -221,8 +221,8 @@ value_t silhouette_score( auto chunk_stream = handle.get_next_usable_stream(i + chunk * j); - auto* left_begin = X + (i * n_cols); - auto* right_begin = X + (j * n_cols); + const auto* left_begin = X + (i * n_cols); + const auto* right_begin = X + (j * n_cols); auto n_left_rows = (i + chunk) < n_rows ? chunk : (n_rows - i); auto n_right_rows = (j + chunk) < n_rows ? chunk : (n_rows - j); diff --git a/cpp/include/raft/stats/detail/rand_index.cuh b/cpp/include/raft/stats/detail/rand_index.cuh index a827427d8f..1e66216929 100644 --- a/cpp/include/raft/stats/detail/rand_index.cuh +++ b/cpp/include/raft/stats/detail/rand_index.cuh @@ -125,8 +125,8 @@ __global__ void computeTheNumerator( * @param stream: the cudaStream object */ template -double compute_rand_index(T* firstClusterArray, - T* secondClusterArray, +double compute_rand_index(const T* firstClusterArray, + const T* secondClusterArray, uint64_t size, cudaStream_t stream) { diff --git a/cpp/include/raft/stats/detail/silhouette_score.cuh b/cpp/include/raft/stats/detail/silhouette_score.cuh index f2e138ed6f..cfaff5fcce 100644 --- a/cpp/include/raft/stats/detail/silhouette_score.cuh +++ b/cpp/include/raft/stats/detail/silhouette_score.cuh @@ -56,7 +56,7 @@ template __global__ void populateAKernel(DataT* sampleToClusterSumOfDistances, DataT* binCountArray, DataT* d_aArray, - LabelT* labels, + const LabelT* labels, int nRows, int nLabels, const DataT MAX_VAL) @@ -102,7 +102,7 @@ __global__ void populateAKernel(DataT* sampleToClusterSumOfDistances, * @param stream: the cuda stream where to launch this kernel */ template -void countLabels(LabelT* labels, +void countLabels(const LabelT* labels, DataT* binCountArray, int nRows, int nUniqueLabels, @@ -205,10 +205,10 @@ struct MinOp { template DataT silhouette_score( const raft::handle_t& handle, - DataT* X_in, + const DataT* X_in, int nRows, int nCols, - LabelT* labels, + const LabelT* labels, int nLabels, DataT* silhouette_scorePerSample, cudaStream_t stream, diff --git a/cpp/include/raft/stats/r2_score.cuh b/cpp/include/raft/stats/r2_score.cuh index 6e00590071..c905a620fb 100644 --- a/cpp/include/raft/stats/r2_score.cuh +++ b/cpp/include/raft/stats/r2_score.cuh @@ -61,16 +61,21 @@ math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) * @param[in] y: Array of ground-truth response variables * @param[in] y_hat: Array of predicted response variables * @return: The R-squared value. + * @note The constness of y and y_hat is currently casted away. + * TODO: Change the underlying implementation to remove the need to const_cast */ template value_t r2_score(const raft::handle_t& handle, - raft::device_vector_view y, - raft::device_vector_view y_hat) + raft::device_vector_view y, + raft::device_vector_view y_hat) { RAFT_EXPECTS(y.extent(0) == y_hat.extent(0), "Size mismatch betwen y and y_hat"); RAFT_EXPECTS(y.is_exhaustive(), "y must be contiguous"); RAFT_EXPECTS(y_hat.is_exhaustive(), "y_hat must be contiguous"); - return detail::r2_score(y.data_handle(), y_hat.data_handle(), y.extent(0), handle.get_stream()); + return detail::r2_score(const_cast(y.data_handle()), + const_cast(y_hat.data_handle()), + y.extent(0), + handle.get_stream()); } } // namespace stats diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh index 3612d93521..72ad53f5d9 100644 --- a/cpp/include/raft/stats/rand_index.cuh +++ b/cpp/include/raft/stats/rand_index.cuh @@ -51,8 +51,8 @@ double rand_index(T* firstClusterArray, T* secondClusterArray, uint64_t size, cu */ template double rand_index(const raft::handle_t& handle, - raft::device_vector_view first_cluster_array, - raft::device_vector_view second_cluster_array) + raft::device_vector_view first_cluster_array, + raft::device_vector_view second_cluster_array) { RAFT_EXPECTS(first_cluster_array.extent(0) == second_cluster_array.extent(0), "Size mismatch betwen first_cluster_array and second_cluster_array"); diff --git a/cpp/include/raft/stats/silhouette_score.cuh b/cpp/include/raft/stats/silhouette_score.cuh index 8952f17ca1..bb7f6fc562 100644 --- a/cpp/include/raft/stats/silhouette_score.cuh +++ b/cpp/include/raft/stats/silhouette_score.cuh @@ -94,8 +94,8 @@ value_t silhouette_score_batched( template value_t silhouette_score( const raft::handle_t& handle, - raft::device_matrix_view X_in, - raft::device_vector_view labels, + raft::device_matrix_view X_in, + raft::device_vector_view labels, std::optional> silhouette_score_per_sample, idx_t n_unique_labels, raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) @@ -122,8 +122,8 @@ value_t silhouette_score( template value_t silhouette_score_batched( const raft::handle_t& handle, - raft::device_matrix_view X, - raft::device_vector_view y, + raft::device_matrix_view X, + raft::device_vector_view y, std::optional> scores, idx_t n_unique_labels, raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) diff --git a/cpp/include/raft/stats/trustworthiness_score.cuh b/cpp/include/raft/stats/trustworthiness_score.cuh index e188bd9b38..ccd3b5c73f 100644 --- a/cpp/include/raft/stats/trustworthiness_score.cuh +++ b/cpp/include/raft/stats/trustworthiness_score.cuh @@ -61,25 +61,30 @@ double trustworthiness_score(const raft::handle_t& h, * @param[in] n_neighbors Number of neighbors considered by trustworthiness score * @param[in] batch_size Batch size * @return Trustworthiness score + * @note The constness of the data in X_embedded is currently casted away and the data is slightly + * modified. + * TODO: Change the underlying implementation to remove the need to const_cast X_embedded. */ template -double trustworthiness_score(const raft::handle_t& handle, - raft::device_matrix_view X, - raft::device_matrix_view X_embedded, - int n_neighbors, - int batch_size = 512) +double trustworthiness_score( + const raft::handle_t& handle, + raft::device_matrix_view X, + raft::device_matrix_view X_embedded, + int n_neighbors, + int batch_size = 512) { RAFT_EXPECTS(X.extent(0) == X_embedded.extent(0), "Size mismatch between X and X_embedded"); RAFT_EXPECTS(std::is_integral_v && X.extent(0) <= std::numeric_limits::max(), "Index type not supported"); - return detail::trustworthiness_score(handle, - X.data_handle(), - X_embedded.data_handle(), - X.extent(0), - X.extent(1), - X_embedded.extent(1), - n_neighbors, - batch_size); + return detail::trustworthiness_score( + handle, + X.data_handle(), + const_cast(X_embedded.data_handle()), + X.extent(0), + X.extent(1), + X_embedded.extent(1), + n_neighbors, + batch_size); } } // namespace stats } // namespace raft diff --git a/cpp/test/stats/rand_index.cu b/cpp/test/stats/rand_index.cu index 01196bf852..67e4ab5517 100644 --- a/cpp/test/stats/rand_index.cu +++ b/cpp/test/stats/rand_index.cu @@ -91,10 +91,10 @@ class randIndexTest : public ::testing::TestWithParam { raft::update_device(secondClusterArray.data(), &arr2[0], (int)size, stream); // calling the rand_index CUDA implementation - computedRandIndex = - raft::stats::rand_index(handle, - raft::make_device_vector_view(firstClusterArray.data(), size), - raft::make_device_vector_view(secondClusterArray.data(), size)); + computedRandIndex = raft::stats::rand_index( + handle, + raft::make_device_vector_view(firstClusterArray.data(), size), + raft::make_device_vector_view(secondClusterArray.data(), size)); } // declaring the data values diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu index 03f3874b40..e4a682b4bc 100644 --- a/cpp/test/stats/silhouette_score.cu +++ b/cpp/test/stats/silhouette_score.cu @@ -175,16 +175,16 @@ class silhouetteScoreTest : public ::testing::TestWithParam(d_X.data(), nRows, nCols), + raft::make_device_vector_view(d_labels.data(), nRows), std::make_optional(raft::make_device_vector_view(sampleSilScore.data(), nRows)), nLabels, params.metric); batchedSilhouetteScore = raft::stats::silhouette_score_batched( handle, - raft::make_device_matrix_view(d_X.data(), nRows, nCols), - raft::make_device_vector_view(d_labels.data(), nRows), + raft::make_device_matrix_view(d_X.data(), nRows, nCols), + raft::make_device_vector_view(d_labels.data(), nRows), std::make_optional(raft::make_device_vector_view(sampleSilScore.data(), chunk)), nLabels, params.metric); diff --git a/cpp/test/stats/trustworthiness.cu b/cpp/test/stats/trustworthiness.cu index 97c77c00e1..cbb8228f8f 100644 --- a/cpp/test/stats/trustworthiness.cu +++ b/cpp/test/stats/trustworthiness.cu @@ -328,7 +328,8 @@ class TrustworthinessScoreTest : public ::testing::Test { score = trustworthiness_score( handle, raft::make_device_matrix_view(d_X.data(), n_sample, n_features_origin), - raft::make_device_matrix_view(d_X_embedded.data(), n_sample, n_features_embedded), + raft::make_device_matrix_view( + d_X_embedded.data(), n_sample, n_features_embedded), 5); } From 00448a17d4e5acbac394e4846a56817795ad5203 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 3 Oct 2022 17:07:33 +0200 Subject: [PATCH 23/40] Remove default template, rename dispersion, fix silhouette_score --- .../raft/stats/adjusted_rand_index.cuh | 12 +++--- cpp/include/raft/stats/dispersion.cuh | 11 +++--- cpp/include/raft/stats/histogram.cuh | 4 +- cpp/include/raft/stats/mean.cuh | 2 +- cpp/include/raft/stats/meanvar.cuh | 2 +- cpp/include/raft/stats/r2_score.cuh | 3 +- cpp/include/raft/stats/silhouette_score.cuh | 38 ++++++++++++++----- cpp/include/raft/stats/stddev.cuh | 4 +- .../raft/stats/trustworthiness_score.cuh | 3 +- cpp/include/raft/stats/weighted_mean.cuh | 24 ++++++------ cpp/test/stats/dispersion.cu | 2 +- 11 files changed, 63 insertions(+), 42 deletions(-) diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 33058e3a76..e1b6a241c4 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -53,14 +53,14 @@ double adjusted_rand_index(const T* firstClusterArray, * @brief Function to calculate Adjusted RandIndex as described * here * @tparam value_t data-type for input label arrays - * @tparam MathT integral data-type used for computing n-choose-r + * @tparam math_t integral data-type used for computing n-choose-r * @tparam idx_t Index type of matrix extent. * @param[in] handle: the raft handle. * @param[in] first_cluster_array: the array of classes * @param[in] second_cluster_array: the array of classes * @return the Adjusted RandIndex */ -template +template double adjusted_rand_index(const raft::handle_t& handle, raft::device_vector_view first_cluster_array, raft::device_vector_view second_cluster_array) @@ -69,10 +69,10 @@ double adjusted_rand_index(const raft::handle_t& handle, RAFT_EXPECTS(first_cluster_array.is_exhaustive(), "first_cluster_array must be contiguous"); RAFT_EXPECTS(second_cluster_array.is_exhaustive(), "second_cluster_array must be contiguous"); - return detail::compute_adjusted_rand_index(first_cluster_array.data_handle(), - second_cluster_array.data_handle(), - first_cluster_array.extent(0), - handle.get_stream()); + return detail::compute_adjusted_rand_index(first_cluster_array.data_handle(), + second_cluster_array.data_handle(), + first_cluster_array.extent(0), + handle.get_stream()); } }; // end namespace stats diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index 80974cae2a..c20ebd4d59 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -73,11 +73,12 @@ DataT dispersion(const DataT* centroids, * @return the cluster dispersion value */ template -value_t dispersion(const raft::handle_t& handle, - raft::device_matrix_view centroids, - raft::device_vector_view cluster_sizes, - std::optional> global_centroid, - const idx_t n_points) +value_t cluster_dispersion( + const raft::handle_t& handle, + raft::device_matrix_view centroids, + raft::device_vector_view cluster_sizes, + std::optional> global_centroid, + const idx_t n_points) { RAFT_EXPECTS(cluster_sizes.extent(0) == centroids.extent(0), "Size mismatch"); RAFT_EXPECTS(cluster_sizes.is_exhaustive(), "cluster_sizes must be contiguous"); diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index 98306abf98..df1c2772f1 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -77,9 +77,7 @@ void histogram(HistType type, * * @note signature of binner_op is `int func(value_t, IdxT);` */ -template > +template > void histogram(const raft::handle_t& handle, HistType type, raft::device_matrix_view data, diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index 1e2592d3c6..d5913e6176 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -64,7 +64,7 @@ void mean( * @param[in] sample: whether to evaluate sample mean or not. In other words, whether * to normalize the output using N-1 or N, for true or false, respectively */ -template +template void mean(const raft::handle_t& handle, raft::device_matrix_view data, raft::device_vector_view mu, diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index 65f7e6cf22..544aed092d 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -74,7 +74,7 @@ void meanvar(Type* mean, * @param[in] sample whether to evaluate sample variance or not. In other words, whether to * normalize the variance using N-1 or N, for true or false respectively. */ -template +template void meanvar(const raft::handle_t& handle, raft::device_matrix_view data, raft::device_vector_view mean, diff --git a/cpp/include/raft/stats/r2_score.cuh b/cpp/include/raft/stats/r2_score.cuh index c905a620fb..e7fcdb6a4e 100644 --- a/cpp/include/raft/stats/r2_score.cuh +++ b/cpp/include/raft/stats/r2_score.cuh @@ -62,7 +62,6 @@ math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) * @param[in] y_hat: Array of predicted response variables * @return: The R-squared value. * @note The constness of y and y_hat is currently casted away. - * TODO: Change the underlying implementation to remove the need to const_cast */ template value_t r2_score(const raft::handle_t& handle, @@ -72,6 +71,8 @@ value_t r2_score(const raft::handle_t& handle, RAFT_EXPECTS(y.extent(0) == y_hat.extent(0), "Size mismatch betwen y and y_hat"); RAFT_EXPECTS(y.is_exhaustive(), "y must be contiguous"); RAFT_EXPECTS(y_hat.is_exhaustive(), "y_hat must be contiguous"); + + // TODO: Change the underlying implementation to remove the need to const_cast return detail::r2_score(const_cast(y.data_handle()), const_cast(y_hat.data_handle()), y.extent(0), diff --git a/cpp/include/raft/stats/silhouette_score.cuh b/cpp/include/raft/stats/silhouette_score.cuh index bb7f6fc562..745f623d2b 100644 --- a/cpp/include/raft/stats/silhouette_score.cuh +++ b/cpp/include/raft/stats/silhouette_score.cuh @@ -119,13 +119,33 @@ value_t silhouette_score( metric); } + +/** + * @brief function that returns the average silhouette score for a given set of data and its + * clusterings + * @tparam value_t: type of the data samples + * @tparam label_t: type of the labels + * @tparam idx_t index type + * @param[in] handle: raft handle for managing expensive resources + * @param[in] X: input matrix Data in row-major format (nRows x nCols) + * @param[in] labels: the pointer to the array containing labels for every data sample (length: + * nRows) + * @param[out] silhouette_score_per_sample: optional array populated with the silhouette score + * for every sample (length: nRows) + * @param[in] n_unique_labels: number of unique labels in the labels array + * @param[in] batch_size: number of samples per batch + * @param[in] metric: the numerical value that maps to the type of distance metric to be used in + * the calculations + * @return: The silhouette score. + */ template value_t silhouette_score_batched( const raft::handle_t& handle, raft::device_matrix_view X, - raft::device_vector_view y, - std::optional> scores, + raft::device_vector_view labels, + std::optional> silhouette_score_per_sample, idx_t n_unique_labels, + idx_t batch_size, raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) { static_assert(std::is_integral_v, @@ -133,22 +153,22 @@ value_t silhouette_score_batched( "of each mdspan argument must be an integral type."); static_assert(std::is_integral_v, "silhouette_score_batched: The label type must be an integral type."); - RAFT_EXPECTS(y.extent(0) == X.extent(0), "Size mismatch betwen y and X"); + RAFT_EXPECTS(labels.extent(0) == X.extent(0), "Size mismatch betwen labels and data"); value_t* scores_ptr = nullptr; - idx_t nscores = 0; - if (scores.has_value()) { - scores_ptr = scores.value().data_handle(); - nscores = scores.value().extent(0); + if (silhouette_score_per_sample.has_value()) { + scores_ptr = silhouette_score_per_sample.value().data_handle(); + RAFT_EXPECTS(silhouette_score_per_sample.value().extent(0) == X.extent(0), + "Size mismatch betwen silhouette_score_per_sample and data"); } return batched::detail::silhouette_score(handle, X.data_handle(), X.extent(0), X.extent(1), - y.data_handle(), + labels.data_handle(), n_unique_labels, scores_ptr, - nscores, + batch_size, metric); } diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh index 54dfd5554b..2747029955 100644 --- a/cpp/include/raft/stats/stddev.cuh +++ b/cpp/include/raft/stats/stddev.cuh @@ -103,7 +103,7 @@ void vars(Type* var, * whether * to normalize the output using N-1 or N, for true or false, respectively */ -template +template void stddev(const raft::handle_t& handle, raft::device_matrix_view data, raft::device_vector_view mu, @@ -143,7 +143,7 @@ void stddev(const raft::handle_t& handle, * whether * to normalize the output using N-1 or N, for true or false, respectively */ -template +template void vars(const raft::handle_t& handle, raft::device_matrix_view data, raft::device_vector_view mu, diff --git a/cpp/include/raft/stats/trustworthiness_score.cuh b/cpp/include/raft/stats/trustworthiness_score.cuh index ccd3b5c73f..b7b3999f77 100644 --- a/cpp/include/raft/stats/trustworthiness_score.cuh +++ b/cpp/include/raft/stats/trustworthiness_score.cuh @@ -63,7 +63,6 @@ double trustworthiness_score(const raft::handle_t& h, * @return Trustworthiness score * @note The constness of the data in X_embedded is currently casted away and the data is slightly * modified. - * TODO: Change the underlying implementation to remove the need to const_cast X_embedded. */ template double trustworthiness_score( @@ -76,6 +75,8 @@ double trustworthiness_score( RAFT_EXPECTS(X.extent(0) == X_embedded.extent(0), "Size mismatch between X and X_embedded"); RAFT_EXPECTS(std::is_integral_v && X.extent(0) <= std::numeric_limits::max(), "Index type not supported"); + + // TODO: Change the underlying implementation to remove the need to const_cast X_embedded. return detail::trustworthiness_score( handle, X.data_handle(), diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index 0849dae29d..dba56f1a40 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -106,7 +106,7 @@ void colWeightedMean( * @param[out] mu the output mean vector of size ncols if along_row is true, else of size nrows * @param[in] along_rows whether to reduce along rows or columns */ -template +template void weighted_mean(const raft::handle_t& handle, raft::device_matrix_view data, raft::device_vector_view weights, @@ -147,13 +147,13 @@ void weighted_mean(const raft::handle_t& handle, * @param[in] weights per-col weight * @param[out] mu the output mean vector of size ncols */ -template -void rowWeightedMean(const raft::handle_t& handle, - raft::device_matrix_view data, - raft::device_vector_view weights, - raft::device_vector_view mu) +template +void row_weighted_mean(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view weights, + raft::device_vector_view mu) { - weightedMean(handle, data, weights, mu, true); + weighted_mean(handle, data, weights, mu, true); } /** @@ -168,12 +168,12 @@ void rowWeightedMean(const raft::handle_t& handle, * @param[out] mu the output mean vector of size nrows */ template -void colWeightedMean(const raft::handle_t& handle, - raft::device_matrix_view data, - raft::device_vector_view weights, - raft::device_vector_view mu) +void col_weighted_mean(const raft::handle_t& handle, + raft::device_matrix_view data, + raft::device_vector_view weights, + raft::device_vector_view mu) { - weightedMean(handle, data, weights, mu, false); + weighted_mean(handle, data, weights, mu, false); } }; // end namespace stats }; // end namespace raft diff --git a/cpp/test/stats/dispersion.cu b/cpp/test/stats/dispersion.cu index 485b80364e..4f18c9fb54 100644 --- a/cpp/test/stats/dispersion.cu +++ b/cpp/test/stats/dispersion.cu @@ -64,7 +64,7 @@ class DispersionTest : public ::testing::TestWithParam> { for (const auto& val : h_counts) { npoints += val; } - actualVal = dispersion( + actualVal = cluster_dispersion( handle, raft::make_device_matrix_view(data.data(), params.clusters, params.dim), raft::make_device_vector_view(counts.data(), params.clusters), From 36f066f62cd14d253bdd620e0d7d1cb5006c0de1 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 3 Oct 2022 17:08:38 +0200 Subject: [PATCH 24/40] Fix silhouette test file --- cpp/test/stats/silhouette_score.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu index e4a682b4bc..37a6fff786 100644 --- a/cpp/test/stats/silhouette_score.cu +++ b/cpp/test/stats/silhouette_score.cu @@ -185,8 +185,9 @@ class silhouetteScoreTest : public ::testing::TestWithParam(d_X.data(), nRows, nCols), raft::make_device_vector_view(d_labels.data(), nRows), - std::make_optional(raft::make_device_vector_view(sampleSilScore.data(), chunk)), + std::make_optional(raft::make_device_vector_view(sampleSilScore.data(), nRows)), nLabels, + chunk, params.metric); } From 39cc6431e7a3b34934f3cf2e90e731e3ad0c7236 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 3 Oct 2022 17:12:03 +0200 Subject: [PATCH 25/40] Add overload for std::nullopt --- cpp/include/raft/stats/contingency_matrix.cuh | 24 ++++++++++++ cpp/include/raft/stats/dispersion.cuh | 18 +++++++++ cpp/include/raft/stats/silhouette_score.cuh | 39 +++++++++++++++++++ 3 files changed, 81 insertions(+) diff --git a/cpp/include/raft/stats/contingency_matrix.cuh b/cpp/include/raft/stats/contingency_matrix.cuh index 8de27cc51a..10dedc44eb 100644 --- a/cpp/include/raft/stats/contingency_matrix.cuh +++ b/cpp/include/raft/stats/contingency_matrix.cuh @@ -181,6 +181,30 @@ void contingency_matrix(const raft::handle_t& handle, max_label_value); } +/** + * @brief Overload of `contingency_matrix` to help the + * compiler find the above overload, in case users pass in + * `std::nullopt` for the optional arguments. + * + * Please see above for documentation of `contingency_matrix`. + */ +template +void contingency_matrix(const raft::handle_t& handle, + raft::device_vector_view ground_truth, + raft::device_vector_view predicted_label, + raft::device_matrix_view out_mat, + opt_min_label_t&& min_label = std::nullopt, + opt_max_label_t&& max_label = std::nullopt) +{ + std::optional opt_min_label = std::forward(min_label); + std::optional opt_max_label = std::forward(max_label); + contingency_matrix(handle, ground_truth, predicted_label, out_mat, opt_min_label, opt_max_label); +} }; // namespace stats }; // namespace raft diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index c20ebd4d59..8a7b0b1689 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -99,6 +99,24 @@ value_t cluster_dispersion( handle.get_stream()); } +/** + * @brief Overload of `cluster_dispersion` to help the + * compiler find the above overload, in case users pass in + * `std::nullopt` for the optional arguments. + * + * Please see above for documentation of `cluster_dispersion`. + */ +template +value_t cluster_dispersion( + const raft::handle_t& handle, + raft::device_matrix_view centroids, + raft::device_vector_view cluster_sizes, + std::nullopt_t global_centroid, + const idx_t n_points) +{ + std::optional> opt_centroid = global_centroid; + return cluster_dispersion(handle, centroids, cluster_sizes, opt_centroid, n_points); +} } // end namespace stats } // end namespace raft diff --git a/cpp/include/raft/stats/silhouette_score.cuh b/cpp/include/raft/stats/silhouette_score.cuh index 745f623d2b..c366c8f030 100644 --- a/cpp/include/raft/stats/silhouette_score.cuh +++ b/cpp/include/raft/stats/silhouette_score.cuh @@ -119,6 +119,25 @@ value_t silhouette_score( metric); } +/** + * @brief Overload of `silhouette_score` to help the + * compiler find the above overload, in case users pass in + * `std::nullopt` for the optional arguments. + * + * Please see above for documentation of `silhouette_score`. + */ +template +value_t silhouette_score( + const raft::handle_t& handle, + raft::device_matrix_view X_in, + raft::device_vector_view labels, + std::nullopt_t silhouette_score_per_sample, + idx_t n_unique_labels, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + std::optional> opt_scores = silhouette_score_per_sample; + return silhouette_score(handle, X_in, labels, opt_scores, n_unique_labels, metric); +} /** * @brief function that returns the average silhouette score for a given set of data and its @@ -172,6 +191,26 @@ value_t silhouette_score_batched( metric); } +/** + * @brief Overload of `silhouette_score_batched` to help the + * compiler find the above overload, in case users pass in + * `std::nullopt` for the optional arguments. + * + * Please see above for documentation of `silhouette_score_batched`. + */ +template +value_t silhouette_score_batched( + const raft::handle_t& handle, + raft::device_matrix_view X, + raft::device_vector_view labels, + std::nullopt_t silhouette_score_per_sample, + idx_t n_unique_labels, + idx_t batch_size, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + std::optional> opt_scores = silhouette_score_per_sample; + return silhouette_score_batched(handle, X, labels, opt_scores, n_unique_labels, batch_size, metric); +} }; // namespace stats }; // namespace raft From 2a97ef2b84f2c05cc78e36bc59a7dc45844b048e Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 3 Oct 2022 17:37:31 +0200 Subject: [PATCH 26/40] Add cluster dispersion definition --- cpp/include/raft/stats/dispersion.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/include/raft/stats/dispersion.cuh b/cpp/include/raft/stats/dispersion.cuh index 8a7b0b1689..9f995e4d5a 100644 --- a/cpp/include/raft/stats/dispersion.cuh +++ b/cpp/include/raft/stats/dispersion.cuh @@ -60,6 +60,8 @@ DataT dispersion(const DataT* centroids, /** * @brief Compute cluster dispersion metric. This is very useful for * automatically finding the 'k' (in kmeans) that improves this metric. + * The cluster dispersion metric is defined as the square root of the sum of the + * squared distances between the cluster centroids and the global centroid * @tparam value_t data type * @tparam idx_t index type * @param[in] handle the raft handle From 461eee578f0924ea330e8f218516727e2a80b0c0 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Tue, 4 Oct 2022 15:16:50 +0200 Subject: [PATCH 27/40] Fix bcast_along_rows --- cpp/include/raft/stats/mean_center.cuh | 8 ++++---- cpp/include/raft/stats/silhouette_score.cuh | 3 ++- cpp/include/raft/stats/weighted_mean.cuh | 9 +++++---- cpp/test/stats/mean_center.cu | 4 ++-- cpp/test/stats/weighted_mean.cu | 5 ++++- 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 0a10e406de..e9c0ac78f0 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -73,8 +73,8 @@ void mean_center(const raft::handle_t& handle, static_assert( std::is_same_v || std::is_same_v, "Data layout not supported"); - auto mean_vec_size = bcast_along_rows ? data.extent(1) : data.extent(0); - RAFT_EXPECTS(out.size() == data.size(), "Size mismatch"); + auto mean_vec_size = bcast_along_rows ? data.extent(0) : data.extent(1); + RAFT_EXPECTS(out.extents() == data.extents(), "Size mismatch"); RAFT_EXPECTS(mean_vec_size == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); @@ -137,8 +137,8 @@ void mean_add(const raft::handle_t& handle, static_assert( std::is_same_v || std::is_same_v, "Data layout not supported"); - auto mean_vec_size = bcast_along_rows ? data.extent(1) : data.extent(0); - RAFT_EXPECTS(out.size() == data.size(), "Size mismatch"); + auto mean_vec_size = bcast_along_rows ? data.extent(0) : data.extent(1); + RAFT_EXPECTS(out.extents() == data.extents(), "Size mismatch"); RAFT_EXPECTS(mean_vec_size == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); RAFT_EXPECTS(data.is_exhaustive(), "data must be contiguous"); diff --git a/cpp/include/raft/stats/silhouette_score.cuh b/cpp/include/raft/stats/silhouette_score.cuh index c366c8f030..0b7d6436dd 100644 --- a/cpp/include/raft/stats/silhouette_score.cuh +++ b/cpp/include/raft/stats/silhouette_score.cuh @@ -209,7 +209,8 @@ value_t silhouette_score_batched( raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) { std::optional> opt_scores = silhouette_score_per_sample; - return silhouette_score_batched(handle, X, labels, opt_scores, n_unique_labels, batch_size, metric); + return silhouette_score_batched( + handle, X, labels, opt_scores, n_unique_labels, batch_size, metric); } }; // namespace stats }; // namespace raft diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index dba56f1a40..a2d62d6f8c 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -118,11 +118,12 @@ void weighted_mean(const raft::handle_t& handle, static_assert(is_row_major || is_col_major, "weighted_mean: Layout must be either " "raft::row_major or raft::col_major (or one of their aliases)"); - auto mean_vec_size = along_rows ? data.extent(1) : data.extent(0); + auto mean_vec_size = along_rows ? data.extent(0) : data.extent(1); + auto weight_size = along_rows ? data.extent(1) : data.extent(0); - RAFT_EXPECTS(weights.extent(0) == mean_vec_size, - "Size mismatch betwen weights and mean_vec_size"); - RAFT_EXPECTS(mu.extent(0) == mean_vec_size, "Size mismatch betwen mu and mean_vec_size"); + RAFT_EXPECTS(weights.extent(0) == weight_size, + "Size mismatch betwen weights and expected weight_size"); + RAFT_EXPECTS(mu.extent(0) == mean_vec_size, "Size mismatch betwen mu and expected mean_vec_size"); RAFT_EXPECTS(weights.is_exhaustive(), "weights must be contiguous"); RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu index 30dcdd475b..83752ad8f9 100644 --- a/cpp/test/stats/mean_center.cu +++ b/cpp/test/stats/mean_center.cu @@ -50,7 +50,7 @@ class MeanCenterTest : public ::testing::TestWithParam(din.data().get(), rows, cols), + raft::make_device_vector_view(dweights.data().get(), cols), + raft::make_device_vector_view(dact.data().get(), rows)); // adjust tolerance to account for round-off accumulation params.tolerance *= params.N; From 9c4adf4ff9d4474bcf4dda0749f242e893c9b855 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Wed, 5 Oct 2022 17:51:22 +0200 Subject: [PATCH 28/40] mean_center and weighted_mean correction for along_rows parameter --- .../raft/stats/detail/weighted_mean.cuh | 2 +- cpp/include/raft/stats/mean_center.cuh | 12 +- cpp/include/raft/stats/weighted_mean.cuh | 14 +-- cpp/test/stats/mean_center.cu | 4 +- cpp/test/stats/weighted_mean.cu | 114 ++++++++++-------- 5 files changed, 77 insertions(+), 69 deletions(-) diff --git a/cpp/include/raft/stats/detail/weighted_mean.cuh b/cpp/include/raft/stats/detail/weighted_mean.cuh index e8f85b4af3..04b1a32522 100644 --- a/cpp/include/raft/stats/detail/weighted_mean.cuh +++ b/cpp/include/raft/stats/detail/weighted_mean.cuh @@ -50,7 +50,7 @@ void weightedMean(Type* mu, cudaStream_t stream) { // sum the weights & copy back to CPU - auto weight_size = along_rows ? D : N; + auto weight_size = along_rows ? N : D; Type WS = 0; raft::stats::sum(mu, weights, (IdxType)1, weight_size, false, stream); raft::update_host(&WS, mu, 1, stream); diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index e9c0ac78f0..fba2aa5b5a 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -58,8 +58,8 @@ void meanCenter(Type* out, * @tparam idx_t index type * @tparam layout_t Layout type of the input matrix. * @param[in] handle the raft handle - * @param[in] data input matrix - * @param[in] mu the mean vector + * @param[in] data input matrix of size nrows * ncols + * @param[in] mu the mean vector of size ncols if bcast_along_rows else nrows * @param[out] out the output mean-centered matrix * @param[in] bcast_along_rows whether to broadcast vector along rows or columns */ @@ -73,7 +73,7 @@ void mean_center(const raft::handle_t& handle, static_assert( std::is_same_v || std::is_same_v, "Data layout not supported"); - auto mean_vec_size = bcast_along_rows ? data.extent(0) : data.extent(1); + auto mean_vec_size = bcast_along_rows ? data.extent(1) : data.extent(0); RAFT_EXPECTS(out.extents() == data.extents(), "Size mismatch"); RAFT_EXPECTS(mean_vec_size == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); @@ -122,8 +122,8 @@ void meanAdd(Type* out, * @tparam layout_t Layout type of the input matrix. * @tparam TPB threads per block of the cuda kernel launched * @param[in] handle the raft handle - * @param[in] data input matrix - * @param[in] mu the mean vector + * @param[in] data input matrix of size nrows * ncols + * @param[in] mu the mean vector of size ncols if bcast_along_rows else nrows * @param[out] out the output mean-centered matrix * @param[in] bcast_along_rows whether to broadcast vector along rows or columns */ @@ -137,7 +137,7 @@ void mean_add(const raft::handle_t& handle, static_assert( std::is_same_v || std::is_same_v, "Data layout not supported"); - auto mean_vec_size = bcast_along_rows ? data.extent(0) : data.extent(1); + auto mean_vec_size = bcast_along_rows ? data.extent(1) : data.extent(0); RAFT_EXPECTS(out.extents() == data.extents(), "Size mismatch"); RAFT_EXPECTS(mean_vec_size == mu.extent(0), "Size mismatch betwen data and mu"); RAFT_EXPECTS(out.is_exhaustive(), "out must be contiguous"); diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index a2d62d6f8c..8ef8ca5a51 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -102,7 +102,7 @@ void colWeightedMean( * @tparam layout_t Layout type of the input matrix. * @param[in] handle the raft handle * @param[in] data the input matrix of size nrows * ncols - * @param[in] weights weight of size ncols if along_row is true, else of size nrows + * @param[in] weights weight of size nrows if along_row is true, else of size ncols * @param[out] mu the output mean vector of size ncols if along_row is true, else of size nrows * @param[in] along_rows whether to reduce along rows or columns */ @@ -118,8 +118,8 @@ void weighted_mean(const raft::handle_t& handle, static_assert(is_row_major || is_col_major, "weighted_mean: Layout must be either " "raft::row_major or raft::col_major (or one of their aliases)"); - auto mean_vec_size = along_rows ? data.extent(0) : data.extent(1); - auto weight_size = along_rows ? data.extent(1) : data.extent(0); + auto mean_vec_size = along_rows ? data.extent(1) : data.extent(0); + auto weight_size = along_rows ? data.extent(0) : data.extent(1); RAFT_EXPECTS(weights.extent(0) == weight_size, "Size mismatch betwen weights and expected weight_size"); @@ -139,13 +139,13 @@ void weighted_mean(const raft::handle_t& handle, /** * @brief Compute the row-wise weighted mean of the input matrix with a - * vector of column weights + * vector of row weights * * @tparam value_t the data type * @tparam idx_t Integer type used to for addressing * @param[in] handle the raft handle * @param[in] data the input matrix of size nrows * ncols - * @param[in] weights per-col weight + * @param[in] weights weight of size nrows * @param[out] mu the output mean vector of size ncols */ template @@ -159,13 +159,13 @@ void row_weighted_mean(const raft::handle_t& handle, /** * @brief Compute the column-wise weighted mean of the input matrix with a - * vector of row weights + * vector of col weights * * @tparam value_t the data type * @tparam idx_t Integer type used to for addressing * @param[in] handle the raft handle * @param[in] data the input matrix of size nrows * ncols - * @param[in] weights per-row weight + * @param[in] weights weight of size ncols * @param[out] mu the output mean vector of size nrows */ template diff --git a/cpp/test/stats/mean_center.cu b/cpp/test/stats/mean_center.cu index 83752ad8f9..30dcdd475b 100644 --- a/cpp/test/stats/mean_center.cu +++ b/cpp/test/stats/mean_center.cu @@ -50,7 +50,7 @@ class MeanCenterTest : public ::testing::TestWithParam << I.along_rows << "}" << std::endl; } -///// weighted row-wise mean test and support functions +///// col-weighted mean test and support functions template -void naiveRowWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) +void naiveColWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) { int istr = rowMajor ? 1 : M; int jstr = rowMajor ? N : 1; @@ -62,6 +62,27 @@ void naiveRowWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) } } +///// row-weighted mean test and support functions +template +void naiveRowWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) +{ + int istr = rowMajor ? 1 : M; + int jstr = rowMajor ? N : 1; + + // sum the weights + T WS = 0; + for (int j = 0; j < M; j++) + WS += W[j]; + + for (int i = 0; i < N; i++) { + R[i] = (T)0; + for (int j = 0; j < M; j++) { + // R[i] += (W[j]*D[i*istr + j*jstr] - R[i])/(T)(j+1); + R[i] += (W[j] * D[i * istr + j * jstr]) / WS; + } + } +} + template class RowWeightedMeanTest : public ::testing::TestWithParam> { protected: @@ -73,18 +94,18 @@ class RowWeightedMeanTest : public ::testing::TestWithParam hin = din; thrust::host_vector hweights = dweights; - thrust::host_vector hexp(rows); + thrust::host_vector hexp(cols); // compute naive result & copy to GPU naiveRowWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); @@ -93,8 +114,8 @@ class RowWeightedMeanTest : public ::testing::TestWithParam(din.data().get(), rows, cols), - raft::make_device_vector_view(dweights.data().get(), cols), - raft::make_device_vector_view(dact.data().get(), rows)); + raft::make_device_vector_view(dweights.data().get(), rows), + raft::make_device_vector_view(dact.data().get(), cols)); // adjust tolerance to account for round-off accumulation params.tolerance *= params.N; @@ -107,27 +128,6 @@ class RowWeightedMeanTest : public ::testing::TestWithParam din, dweights, dexp, dact; }; -///// weighted column-wise mean test and support functions -template -void naiveColWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) -{ - int istr = rowMajor ? 1 : M; - int jstr = rowMajor ? N : 1; - - // sum the weights - T WS = 0; - for (int j = 0; j < M; j++) - WS += W[j]; - - for (int i = 0; i < N; i++) { - R[i] = (T)0; - for (int j = 0; j < M; j++) { - // R[i] += (W[j]*D[i*istr + j*jstr] - R[i])/(T)(j+1); - R[i] += (W[j] * D[i * istr + j * jstr]) / WS; - } - } -} - template class ColWeightedMeanTest : public ::testing::TestWithParam> { void SetUp() override @@ -139,25 +139,28 @@ class ColWeightedMeanTest : public ::testing::TestWithParam hin = din; thrust::host_vector hweights = dweights; - thrust::host_vector hexp(cols); + thrust::host_vector hexp(rows); // compute naive result & copy to GPU naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); dexp = hexp; // compute result - colWeightedMean(dact.data().get(), din.data().get(), dweights.data().get(), cols, rows, stream); + col_weighted_mean(handle, + raft::make_device_matrix_view(din.data().get(), rows, cols), + raft::make_device_vector_view(dweights.data().get(), cols), + raft::make_device_vector_view(dact.data().get(), rows)); // adjust tolerance to account for round-off accumulation params.tolerance *= params.M; @@ -179,8 +182,8 @@ class WeightedMeanTest : public ::testing::TestWithParam> raft::random::RngState r(params.seed); auto stream = handle.get_stream(); int rows = params.M, cols = params.N, len = rows * cols; - auto weight_size = params.along_rows ? cols : rows; - auto mean_size = params.along_rows ? rows : cols; + auto weight_size = params.along_rows ? rows : cols; + auto mean_size = params.along_rows ? cols : rows; // device-side data din.resize(len); dweights.resize(weight_size); @@ -204,14 +207,11 @@ class WeightedMeanTest : public ::testing::TestWithParam> dexp = hexp; // compute result - weightedMean(dact.data().get(), - din.data().get(), - dweights.data().get(), - cols, - rows, - params.row_major, - params.along_rows, - stream); + weighted_mean(handle, + raft::make_device_matrix_view(din.data().get(), rows, cols), + raft::make_device_vector_view(dweights.data().get(), weight_size), + raft::make_device_vector_view(dact.data().get(), mean_size), + params.along_rows); // adjust tolerance to account for round-off accumulation params.tolerance *= params.N; @@ -229,6 +229,8 @@ static const float tolF = 128 * std::numeric_limits::epsilon(); static const double tolD = 256 * std::numeric_limits::epsilon(); const std::vector> inputsf = {{tolF, 4, 4, 1234, true, true}, + {tolF, 32, 256, 1234, true, true}, + {tolF, 32, 256, 1234, false, true}, {tolF, 1024, 32, 1234, true, false}, {tolF, 1024, 64, 1234, true, true}, {tolF, 1024, 128, 1234, true, false}, @@ -239,6 +241,8 @@ const std::vector> inputsf = {{tolF, 4, 4, 1234, true, {tolF, 1024, 256, 1234, false, true}}; const std::vector> inputsd = {{tolD, 4, 4, 1234, true, true}, + {tolD, 32, 256, 1234, true, true}, + {tolD, 32, 256, 1234, false, true}, {tolD, 1024, 32, 1234, true, false}, {tolD, 1024, 64, 1234, true, true}, {tolD, 1024, 128, 1234, true, false}, @@ -252,7 +256,7 @@ using RowWeightedMeanTestF = RowWeightedMeanTest; TEST_P(RowWeightedMeanTestF, Result) { ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(RowWeightedMeanTest, RowWeightedMeanTestF, ::testing::ValuesIn(inputsf)); @@ -260,7 +264,7 @@ using RowWeightedMeanTestD = RowWeightedMeanTest; TEST_P(RowWeightedMeanTestD, Result) { ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(RowWeightedMeanTest, RowWeightedMeanTestD, ::testing::ValuesIn(inputsd)); @@ -268,7 +272,7 @@ using ColWeightedMeanTestF = ColWeightedMeanTest; TEST_P(ColWeightedMeanTestF, Result) { ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); + dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(ColWeightedMeanTest, ColWeightedMeanTestF, ::testing::ValuesIn(inputsf)); @@ -276,23 +280,27 @@ using ColWeightedMeanTestD = ColWeightedMeanTest; TEST_P(ColWeightedMeanTestD, Result) { ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); + dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(ColWeightedMeanTest, ColWeightedMeanTestD, ::testing::ValuesIn(inputsd)); using WeightedMeanTestF = WeightedMeanTest; TEST_P(WeightedMeanTestF, Result) { + auto mean_size = params.along_rows ? params.N : params.M; ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); + dexp.data().get(), dact.data().get(), mean_size, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestF, ::testing::ValuesIn(inputsf)); using WeightedMeanTestD = WeightedMeanTest; TEST_P(WeightedMeanTestD, Result) { - ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); + auto mean_size = params.along_rows ? params.N : params.M; + ASSERT_TRUE(devArrMatch(dexp.data().get(), + dact.data().get(), + mean_size, + raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestD, ::testing::ValuesIn(inputsd)); From 29d360ef18a6839f0355b83ceb04dbce5084edfc Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 5 Oct 2022 15:00:00 -0400 Subject: [PATCH 29/40] Updating row weighted mean --- cpp/include/raft/stats/weighted_mean.cuh | 22 +-- cpp/test/stats/weighted_mean.cu | 171 ++++++++++++++--------- 2 files changed, 113 insertions(+), 80 deletions(-) diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index 8ef8ca5a51..4f55895589 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -102,8 +102,8 @@ void colWeightedMean( * @tparam layout_t Layout type of the input matrix. * @param[in] handle the raft handle * @param[in] data the input matrix of size nrows * ncols - * @param[in] weights weight of size nrows if along_row is true, else of size ncols - * @param[out] mu the output mean vector of size ncols if along_row is true, else of size nrows + * @param[in] weights weights of size ncols if along_row is true, else of size nrows + * @param[out] mu the output mean vector of size nrows if along_row is true, else of size ncols * @param[in] along_rows whether to reduce along rows or columns */ template @@ -122,7 +122,7 @@ void weighted_mean(const raft::handle_t& handle, auto weight_size = along_rows ? data.extent(0) : data.extent(1); RAFT_EXPECTS(weights.extent(0) == weight_size, - "Size mismatch betwen weights and expected weight_size"); + "Size mismatch between weights and expected weight_size"); RAFT_EXPECTS(mu.extent(0) == mean_vec_size, "Size mismatch betwen mu and expected mean_vec_size"); RAFT_EXPECTS(weights.is_exhaustive(), "weights must be contiguous"); RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); @@ -145,12 +145,12 @@ void weighted_mean(const raft::handle_t& handle, * @tparam idx_t Integer type used to for addressing * @param[in] handle the raft handle * @param[in] data the input matrix of size nrows * ncols - * @param[in] weights weight of size nrows - * @param[out] mu the output mean vector of size ncols + * @param[in] weights weights of size ncols + * @param[out] mu the output mean vector of size nrows */ -template +template void row_weighted_mean(const raft::handle_t& handle, - raft::device_matrix_view data, + raft::device_matrix_view data, raft::device_vector_view weights, raft::device_vector_view mu) { @@ -165,12 +165,12 @@ void row_weighted_mean(const raft::handle_t& handle, * @tparam idx_t Integer type used to for addressing * @param[in] handle the raft handle * @param[in] data the input matrix of size nrows * ncols - * @param[in] weights weight of size ncols - * @param[out] mu the output mean vector of size nrows + * @param[in] weights weight of size nrows + * @param[out] mu the output mean vector of size ncols */ -template +template void col_weighted_mean(const raft::handle_t& handle, - raft::device_matrix_view data, + raft::device_matrix_view data, raft::device_vector_view weights, raft::device_vector_view mu) { diff --git a/cpp/test/stats/weighted_mean.cu b/cpp/test/stats/weighted_mean.cu index 2c4e0fe2d3..1ef4e8143e 100644 --- a/cpp/test/stats/weighted_mean.cu +++ b/cpp/test/stats/weighted_mean.cu @@ -15,7 +15,9 @@ */ #include "../test_utils.h" +#include #include +#include #include #include #include @@ -41,9 +43,9 @@ template << I.along_rows << "}" << std::endl; } -///// col-weighted mean test and support functions +///// weighted row-wise mean test and support functions template -void naiveColWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) +void naiveRowWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) { int istr = rowMajor ? 1 : M; int jstr = rowMajor ? N : 1; @@ -62,27 +64,6 @@ void naiveColWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) } } -///// row-weighted mean test and support functions -template -void naiveRowWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) -{ - int istr = rowMajor ? 1 : M; - int jstr = rowMajor ? N : 1; - - // sum the weights - T WS = 0; - for (int j = 0; j < M; j++) - WS += W[j]; - - for (int i = 0; i < N; i++) { - R[i] = (T)0; - for (int j = 0; j < M; j++) { - // R[i] += (W[j]*D[i*istr + j*jstr] - R[i])/(T)(j+1); - R[i] += (W[j] * D[i * istr + j * jstr]) / WS; - } - } -} - template class RowWeightedMeanTest : public ::testing::TestWithParam> { protected: @@ -94,28 +75,42 @@ class RowWeightedMeanTest : public ::testing::TestWithParam hin = din; thrust::host_vector hweights = dweights; - thrust::host_vector hexp(cols); + thrust::host_vector hexp(rows); // compute naive result & copy to GPU naiveRowWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); dexp = hexp; - // compute result - row_weighted_mean(handle, - raft::make_device_matrix_view(din.data().get(), rows, cols), - raft::make_device_vector_view(dweights.data().get(), rows), - raft::make_device_vector_view(dact.data().get(), cols)); + if (params.row_major) { + auto input = raft::make_device_matrix_view( + din.data().get(), rows, cols); + auto output = raft::make_device_vector_view(dact.data().get(), rows); + auto weights = + raft::make_device_vector_view(dweights.data().get(), cols); + + // compute result + row_weighted_mean(handle, input, weights, output); + } else { + auto input = raft::make_device_matrix_view( + din.data().get(), rows, cols); + auto output = raft::make_device_vector_view(dact.data().get(), rows); + auto weights = + raft::make_device_vector_view(dweights.data().get(), cols); + + // compute result + row_weighted_mean(handle, input, weights, output); + } // adjust tolerance to account for round-off accumulation params.tolerance *= params.N; @@ -128,6 +123,27 @@ class RowWeightedMeanTest : public ::testing::TestWithParam din, dweights, dexp, dact; }; +///// weighted column-wise mean test and support functions +template +void naiveColWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) +{ + int istr = rowMajor ? 1 : M; + int jstr = rowMajor ? N : 1; + + // sum the weights + T WS = 0; + for (int j = 0; j < M; j++) + WS += W[j]; + + for (int i = 0; i < N; i++) { + R[i] = (T)0; + for (int j = 0; j < M; j++) { + // R[i] += (W[j]*D[i*istr + j*jstr] - R[i])/(T)(j+1); + R[i] += (W[j] * D[i * istr + j * jstr]) / WS; + } + } +} + template class ColWeightedMeanTest : public ::testing::TestWithParam> { void SetUp() override @@ -139,29 +155,42 @@ class ColWeightedMeanTest : public ::testing::TestWithParam hin = din; thrust::host_vector hweights = dweights; - thrust::host_vector hexp(rows); + thrust::host_vector hexp(cols); // compute naive result & copy to GPU naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); dexp = hexp; - // compute result - col_weighted_mean(handle, - raft::make_device_matrix_view(din.data().get(), rows, cols), - raft::make_device_vector_view(dweights.data().get(), cols), - raft::make_device_vector_view(dact.data().get(), rows)); - + if (params.row_major) { + auto input = raft::make_device_matrix_view( + din.data().get(), rows, cols); + auto output = raft::make_device_vector_view(dact.data().get(), rows); + auto weights = + raft::make_device_vector_view(dweights.data().get(), cols); + + // compute result + col_weighted_mean(handle, input, weights, output); + } else { + auto input = raft::make_device_matrix_view( + din.data().get(), rows, cols); + auto output = raft::make_device_vector_view(dact.data().get(), rows); + auto weights = + raft::make_device_vector_view(dweights.data().get(), cols); + + // compute result + col_weighted_mean(handle, input, weights, output); + } // adjust tolerance to account for round-off accumulation params.tolerance *= params.M; } @@ -182,8 +211,8 @@ class WeightedMeanTest : public ::testing::TestWithParam> raft::random::RngState r(params.seed); auto stream = handle.get_stream(); int rows = params.M, cols = params.N, len = rows * cols; - auto weight_size = params.along_rows ? rows : cols; - auto mean_size = params.along_rows ? cols : rows; + auto weight_size = params.along_rows ? cols : rows; + auto mean_size = params.along_rows ? rows : cols; // device-side data din.resize(len); dweights.resize(weight_size); @@ -206,13 +235,25 @@ class WeightedMeanTest : public ::testing::TestWithParam> naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, params.row_major); dexp = hexp; - // compute result - weighted_mean(handle, - raft::make_device_matrix_view(din.data().get(), rows, cols), - raft::make_device_vector_view(dweights.data().get(), weight_size), - raft::make_device_vector_view(dact.data().get(), mean_size), - params.along_rows); - + if (params.row_major) { + auto input = raft::make_device_matrix_view( + din.data().get(), rows, cols); + auto output = raft::make_device_vector_view(dact.data().get(), rows); + auto weights = + raft::make_device_vector_view(dweights.data().get(), cols); + + // compute result + weighted_mean(handle, input, weights, output, params.along_rows); + } else { + auto input = raft::make_device_matrix_view( + din.data().get(), rows, cols); + auto output = raft::make_device_vector_view(dact.data().get(), rows); + auto weights = + raft::make_device_vector_view(dweights.data().get(), cols); + + // compute result + weighted_mean(handle, input, weights, output, params.along_rows); + } // adjust tolerance to account for round-off accumulation params.tolerance *= params.N; } @@ -229,8 +270,6 @@ static const float tolF = 128 * std::numeric_limits::epsilon(); static const double tolD = 256 * std::numeric_limits::epsilon(); const std::vector> inputsf = {{tolF, 4, 4, 1234, true, true}, - {tolF, 32, 256, 1234, true, true}, - {tolF, 32, 256, 1234, false, true}, {tolF, 1024, 32, 1234, true, false}, {tolF, 1024, 64, 1234, true, true}, {tolF, 1024, 128, 1234, true, false}, @@ -241,8 +280,6 @@ const std::vector> inputsf = {{tolF, 4, 4, 1234, true, {tolF, 1024, 256, 1234, false, true}}; const std::vector> inputsd = {{tolD, 4, 4, 1234, true, true}, - {tolD, 32, 256, 1234, true, true}, - {tolD, 32, 256, 1234, false, true}, {tolD, 1024, 32, 1234, true, false}, {tolD, 1024, 64, 1234, true, true}, {tolD, 1024, 128, 1234, true, false}, @@ -256,7 +293,7 @@ using RowWeightedMeanTestF = RowWeightedMeanTest; TEST_P(RowWeightedMeanTestF, Result) { ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); + dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(RowWeightedMeanTest, RowWeightedMeanTestF, ::testing::ValuesIn(inputsf)); @@ -264,7 +301,7 @@ using RowWeightedMeanTestD = RowWeightedMeanTest; TEST_P(RowWeightedMeanTestD, Result) { ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); + dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(RowWeightedMeanTest, RowWeightedMeanTestD, ::testing::ValuesIn(inputsd)); @@ -272,7 +309,7 @@ using ColWeightedMeanTestF = ColWeightedMeanTest; TEST_P(ColWeightedMeanTestF, Result) { ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(ColWeightedMeanTest, ColWeightedMeanTestF, ::testing::ValuesIn(inputsf)); @@ -280,29 +317,25 @@ using ColWeightedMeanTestD = ColWeightedMeanTest; TEST_P(ColWeightedMeanTestD, Result) { ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(ColWeightedMeanTest, ColWeightedMeanTestD, ::testing::ValuesIn(inputsd)); using WeightedMeanTestF = WeightedMeanTest; TEST_P(WeightedMeanTestF, Result) { - auto mean_size = params.along_rows ? params.N : params.M; ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), mean_size, raft::CompareApprox(params.tolerance))); + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestF, ::testing::ValuesIn(inputsf)); using WeightedMeanTestD = WeightedMeanTest; TEST_P(WeightedMeanTestD, Result) { - auto mean_size = params.along_rows ? params.N : params.M; - ASSERT_TRUE(devArrMatch(dexp.data().get(), - dact.data().get(), - mean_size, - raft::CompareApprox(params.tolerance))); + ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestD, ::testing::ValuesIn(inputsd)); }; // end namespace stats -}; // end namespace raft +}; // end namespace raft \ No newline at end of file From 333e596b1db5bcc0bd01ca4b252d45bb08ecfde1 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 5 Oct 2022 16:18:49 -0400 Subject: [PATCH 30/40] iRemoving weighted mean mdspanification for now. --- cpp/include/raft/stats/weighted_mean.cuh | 85 ------------------------ cpp/test/stats/weighted_mean.cu | 74 +++++---------------- 2 files changed, 15 insertions(+), 144 deletions(-) diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index 4f55895589..0e8338fe84 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -19,7 +19,6 @@ #pragma once -#include #include namespace raft { @@ -92,90 +91,6 @@ void colWeightedMean( { weightedMean(mu, data, weights, D, N, true, false, stream); } - -/** - * @brief Compute the weighted mean of the input matrix with a - * vector of weights, along rows or along columns - * - * @tparam value_t the data type - * @tparam idx_t Integer type used to for addressing - * @tparam layout_t Layout type of the input matrix. - * @param[in] handle the raft handle - * @param[in] data the input matrix of size nrows * ncols - * @param[in] weights weights of size ncols if along_row is true, else of size nrows - * @param[out] mu the output mean vector of size nrows if along_row is true, else of size ncols - * @param[in] along_rows whether to reduce along rows or columns - */ -template -void weighted_mean(const raft::handle_t& handle, - raft::device_matrix_view data, - raft::device_vector_view weights, - raft::device_vector_view mu, - bool along_rows) -{ - constexpr bool is_row_major = std::is_same_v; - constexpr bool is_col_major = std::is_same_v; - static_assert(is_row_major || is_col_major, - "weighted_mean: Layout must be either " - "raft::row_major or raft::col_major (or one of their aliases)"); - auto mean_vec_size = along_rows ? data.extent(1) : data.extent(0); - auto weight_size = along_rows ? data.extent(0) : data.extent(1); - - RAFT_EXPECTS(weights.extent(0) == weight_size, - "Size mismatch between weights and expected weight_size"); - RAFT_EXPECTS(mu.extent(0) == mean_vec_size, "Size mismatch betwen mu and expected mean_vec_size"); - RAFT_EXPECTS(weights.is_exhaustive(), "weights must be contiguous"); - RAFT_EXPECTS(mu.is_exhaustive(), "mu must be contiguous"); - - detail::weightedMean(mu.data_handle(), - data.data_handle(), - weights.data_handle(), - data.extent(1), - data.extent(0), - is_row_major, - along_rows, - handle.get_stream()); -} - -/** - * @brief Compute the row-wise weighted mean of the input matrix with a - * vector of row weights - * - * @tparam value_t the data type - * @tparam idx_t Integer type used to for addressing - * @param[in] handle the raft handle - * @param[in] data the input matrix of size nrows * ncols - * @param[in] weights weights of size ncols - * @param[out] mu the output mean vector of size nrows - */ -template -void row_weighted_mean(const raft::handle_t& handle, - raft::device_matrix_view data, - raft::device_vector_view weights, - raft::device_vector_view mu) -{ - weighted_mean(handle, data, weights, mu, true); -} - -/** - * @brief Compute the column-wise weighted mean of the input matrix with a - * vector of col weights - * - * @tparam value_t the data type - * @tparam idx_t Integer type used to for addressing - * @param[in] handle the raft handle - * @param[in] data the input matrix of size nrows * ncols - * @param[in] weights weight of size nrows - * @param[out] mu the output mean vector of size ncols - */ -template -void col_weighted_mean(const raft::handle_t& handle, - raft::device_matrix_view data, - raft::device_vector_view weights, - raft::device_vector_view mu) -{ - weighted_mean(handle, data, weights, mu, false); -} }; // end namespace stats }; // end namespace raft diff --git a/cpp/test/stats/weighted_mean.cu b/cpp/test/stats/weighted_mean.cu index 1ef4e8143e..ec99d5a627 100644 --- a/cpp/test/stats/weighted_mean.cu +++ b/cpp/test/stats/weighted_mean.cu @@ -15,9 +15,7 @@ */ #include "../test_utils.h" -#include #include -#include #include #include #include @@ -92,25 +90,8 @@ class RowWeightedMeanTest : public ::testing::TestWithParam( - din.data().get(), rows, cols); - auto output = raft::make_device_vector_view(dact.data().get(), rows); - auto weights = - raft::make_device_vector_view(dweights.data().get(), cols); - - // compute result - row_weighted_mean(handle, input, weights, output); - } else { - auto input = raft::make_device_matrix_view( - din.data().get(), rows, cols); - auto output = raft::make_device_vector_view(dact.data().get(), rows); - auto weights = - raft::make_device_vector_view(dweights.data().get(), cols); - - // compute result - row_weighted_mean(handle, input, weights, output); - } + // compute result + rowWeightedMean(dact.data().get(), din.data().get(), dweights.data().get(), cols, rows, stream); // adjust tolerance to account for round-off accumulation params.tolerance *= params.N; @@ -172,25 +153,9 @@ class ColWeightedMeanTest : public ::testing::TestWithParam( - din.data().get(), rows, cols); - auto output = raft::make_device_vector_view(dact.data().get(), rows); - auto weights = - raft::make_device_vector_view(dweights.data().get(), cols); - - // compute result - col_weighted_mean(handle, input, weights, output); - } else { - auto input = raft::make_device_matrix_view( - din.data().get(), rows, cols); - auto output = raft::make_device_vector_view(dact.data().get(), rows); - auto weights = - raft::make_device_vector_view(dweights.data().get(), cols); - - // compute result - col_weighted_mean(handle, input, weights, output); - } + // compute result + colWeightedMean(dact.data().get(), din.data().get(), dweights.data().get(), cols, rows, stream); + // adjust tolerance to account for round-off accumulation params.tolerance *= params.M; } @@ -235,25 +200,16 @@ class WeightedMeanTest : public ::testing::TestWithParam> naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, params.row_major); dexp = hexp; - if (params.row_major) { - auto input = raft::make_device_matrix_view( - din.data().get(), rows, cols); - auto output = raft::make_device_vector_view(dact.data().get(), rows); - auto weights = - raft::make_device_vector_view(dweights.data().get(), cols); - - // compute result - weighted_mean(handle, input, weights, output, params.along_rows); - } else { - auto input = raft::make_device_matrix_view( - din.data().get(), rows, cols); - auto output = raft::make_device_vector_view(dact.data().get(), rows); - auto weights = - raft::make_device_vector_view(dweights.data().get(), cols); - - // compute result - weighted_mean(handle, input, weights, output, params.along_rows); - } + // compute result + weightedMean(dact.data().get(), + din.data().get(), + dweights.data().get(), + cols, + rows, + params.row_major, + params.along_rows, + stream); + // adjust tolerance to account for round-off accumulation params.tolerance *= params.N; } From b489e3b69f1517a899b4cadb22be7f97d28f69c7 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 5 Oct 2022 20:19:14 -0400 Subject: [PATCH 31/40] Updating weighted mean test. --- cpp/test/stats/weighted_mean.cu | 470 ++++++++++++++++---------------- 1 file changed, 235 insertions(+), 235 deletions(-) diff --git a/cpp/test/stats/weighted_mean.cu b/cpp/test/stats/weighted_mean.cu index ec99d5a627..3ce17ac974 100644 --- a/cpp/test/stats/weighted_mean.cu +++ b/cpp/test/stats/weighted_mean.cu @@ -23,273 +23,273 @@ #include namespace raft { -namespace stats { - -template -struct WeightedMeanInputs { - T tolerance; - int M, N; - unsigned long long int seed; - bool along_rows; // Used only for the weightedMean test function - bool row_major; -}; - -template -::std::ostream& operator<<(::std::ostream& os, const WeightedMeanInputs& I) -{ - return os << "{ " << I.tolerance << ", " << I.M << ", " << I.N << ", " << I.seed << ", " - << I.along_rows << "}" << std::endl; -} + namespace stats { + + template + struct WeightedMeanInputs { + T tolerance; + int M, N; + unsigned long long int seed; + bool along_rows; // Used only for the weightedMean test function + bool row_major; + }; + + template + ::std::ostream& operator<<(::std::ostream& os, const WeightedMeanInputs& I) + { + return os << "{ " << I.tolerance << ", " << I.M << ", " << I.N << ", " << I.seed << ", " + << I.along_rows << "}" << std::endl; + } ///// weighted row-wise mean test and support functions -template -void naiveRowWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) -{ - int istr = rowMajor ? 1 : M; - int jstr = rowMajor ? N : 1; - - // sum the weights - T WS = 0; - for (int i = 0; i < N; i++) - WS += W[i]; - - for (int j = 0; j < M; j++) { - R[j] = (T)0; - for (int i = 0; i < N; i++) { - // R[j] += (W[i]*D[i*istr + j*jstr] - R[j])/(T)(i+1); - R[j] += (W[i] * D[i * istr + j * jstr]) / WS; - } - } -} - -template -class RowWeightedMeanTest : public ::testing::TestWithParam> { - protected: - void SetUp() override - { - params = ::testing::TestWithParam>::GetParam(); - raft::random::RngState r(params.seed); - int rows = params.M, cols = params.N, len = rows * cols; - auto stream = handle.get_stream(); - // device-side data - din.resize(len); - dweights.resize(cols); - dexp.resize(rows); - dact.resize(rows); - - // create random matrix and weights - uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); - uniform(handle, r, dweights.data().get(), cols, T(-1.0), T(1.0)); - - // host-side data - thrust::host_vector hin = din; - thrust::host_vector hweights = dweights; - thrust::host_vector hexp(rows); - - // compute naive result & copy to GPU - naiveRowWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); - dexp = hexp; - - // compute result - rowWeightedMean(dact.data().get(), din.data().get(), dweights.data().get(), cols, rows, stream); - - // adjust tolerance to account for round-off accumulation - params.tolerance *= params.N; - } - - protected: - raft::handle_t handle; - WeightedMeanInputs params; - thrust::host_vector hin, hweights; - thrust::device_vector din, dweights, dexp, dact; -}; + template + void naiveRowWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) + { + int istr = rowMajor ? 1 : M; + int jstr = rowMajor ? N : 1; + + // sum the weights + T WS = 0; + for (int i = 0; i < N; i++) + WS += W[i]; + + for (int j = 0; j < M; j++) { + R[j] = (T)0; + for (int i = 0; i < N; i++) { + // R[j] += (W[i]*D[i*istr + j*jstr] - R[j])/(T)(i+1); + R[j] += (W[i] * D[i * istr + j * jstr]) / WS; + } + } + } + + template + class RowWeightedMeanTest : public ::testing::TestWithParam> { + protected: + void SetUp() override + { + params = ::testing::TestWithParam>::GetParam(); + raft::random::RngState r(params.seed); + int rows = params.M, cols = params.N, len = rows * cols; + auto stream = handle.get_stream(); + // device-side data + din.resize(len); + dweights.resize(cols); + dexp.resize(rows); + dact.resize(rows); + + // create random matrix and weights + uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); + uniform(handle, r, dweights.data().get(), cols, T(-1.0), T(1.0)); + + // host-side data + thrust::host_vector hin = din; + thrust::host_vector hweights = dweights; + thrust::host_vector hexp(rows); + + // compute naive result & copy to GPU + naiveRowWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); + dexp = hexp; + + // compute result + rowWeightedMean(dact.data().get(), din.data().get(), dweights.data().get(), cols, rows, stream); + + // adjust tolerance to account for round-off accumulation + params.tolerance *= params.N; + } + + protected: + raft::handle_t handle; + WeightedMeanInputs params; + thrust::host_vector hin, hweights; + thrust::device_vector din, dweights, dexp, dact; + }; ///// weighted column-wise mean test and support functions -template -void naiveColWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) -{ - int istr = rowMajor ? 1 : M; - int jstr = rowMajor ? N : 1; - - // sum the weights - T WS = 0; - for (int j = 0; j < M; j++) - WS += W[j]; - - for (int i = 0; i < N; i++) { - R[i] = (T)0; - for (int j = 0; j < M; j++) { - // R[i] += (W[j]*D[i*istr + j*jstr] - R[i])/(T)(j+1); - R[i] += (W[j] * D[i * istr + j * jstr]) / WS; - } - } -} - -template -class ColWeightedMeanTest : public ::testing::TestWithParam> { - void SetUp() override - { - params = ::testing::TestWithParam>::GetParam(); - raft::random::RngState r(params.seed); - int rows = params.M, cols = params.N, len = rows * cols; - - auto stream = handle.get_stream(); - // device-side data - din.resize(len); - dweights.resize(rows); - dexp.resize(cols); - dact.resize(cols); - - // create random matrix and weights - uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); - uniform(handle, r, dweights.data().get(), rows, T(-1.0), T(1.0)); - - // host-side data - thrust::host_vector hin = din; - thrust::host_vector hweights = dweights; - thrust::host_vector hexp(cols); - - // compute naive result & copy to GPU - naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); - dexp = hexp; - - // compute result - colWeightedMean(dact.data().get(), din.data().get(), dweights.data().get(), cols, rows, stream); - - // adjust tolerance to account for round-off accumulation - params.tolerance *= params.M; - } - - protected: - raft::handle_t handle; - WeightedMeanInputs params; - thrust::host_vector hin, hweights; - thrust::device_vector din, dweights, dexp, dact; -}; - -template -class WeightedMeanTest : public ::testing::TestWithParam> { - protected: - void SetUp() override - { - params = ::testing::TestWithParam>::GetParam(); - raft::random::RngState r(params.seed); - auto stream = handle.get_stream(); - int rows = params.M, cols = params.N, len = rows * cols; - auto weight_size = params.along_rows ? cols : rows; - auto mean_size = params.along_rows ? rows : cols; - // device-side data - din.resize(len); - dweights.resize(weight_size); - dexp.resize(mean_size); - dact.resize(mean_size); - - // create random matrix and weights - uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); - uniform(handle, r, dweights.data().get(), weight_size, T(-1.0), T(1.0)); - - // host-side data - thrust::host_vector hin = din; - thrust::host_vector hweights = dweights; - thrust::host_vector hexp(mean_size); - - // compute naive result & copy to GPU - if (params.along_rows) - naiveRowWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, params.row_major); - else - naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, params.row_major); - dexp = hexp; - - // compute result - weightedMean(dact.data().get(), - din.data().get(), - dweights.data().get(), - cols, - rows, - params.row_major, - params.along_rows, - stream); - - // adjust tolerance to account for round-off accumulation - params.tolerance *= params.N; - } - - protected: - raft::handle_t handle; - WeightedMeanInputs params; - thrust::host_vector hin, hweights; - thrust::device_vector din, dweights, dexp, dact; -}; + template + void naiveColWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) + { + int istr = rowMajor ? 1 : M; + int jstr = rowMajor ? N : 1; + + // sum the weights + T WS = 0; + for (int j = 0; j < M; j++) + WS += W[j]; + + for (int i = 0; i < N; i++) { + R[i] = (T)0; + for (int j = 0; j < M; j++) { + // R[i] += (W[j]*D[i*istr + j*jstr] - R[i])/(T)(j+1); + R[i] += (W[j] * D[i * istr + j * jstr]) / WS; + } + } + } + + template + class ColWeightedMeanTest : public ::testing::TestWithParam> { + void SetUp() override + { + params = ::testing::TestWithParam>::GetParam(); + raft::random::RngState r(params.seed); + int rows = params.M, cols = params.N, len = rows * cols; + + auto stream = handle.get_stream(); + // device-side data + din.resize(len); + dweights.resize(rows); + dexp.resize(cols); + dact.resize(cols); + + // create random matrix and weights + uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); + uniform(handle, r, dweights.data().get(), rows, T(-1.0), T(1.0)); + + // host-side data + thrust::host_vector hin = din; + thrust::host_vector hweights = dweights; + thrust::host_vector hexp(cols); + + // compute naive result & copy to GPU + naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); + dexp = hexp; + + // compute result + colWeightedMean(dact.data().get(), din.data().get(), dweights.data().get(), cols, rows, stream); + + // adjust tolerance to account for round-off accumulation + params.tolerance *= params.M; + } + + protected: + raft::handle_t handle; + WeightedMeanInputs params; + thrust::host_vector hin, hweights; + thrust::device_vector din, dweights, dexp, dact; + }; + + template + class WeightedMeanTest : public ::testing::TestWithParam> { + protected: + void SetUp() override + { + params = ::testing::TestWithParam>::GetParam(); + raft::random::RngState r(params.seed); + auto stream = handle.get_stream(); + int rows = params.M, cols = params.N, len = rows * cols; + auto weight_size = params.along_rows ? cols : rows; + auto mean_size = params.along_rows ? rows : cols; + // device-side data + din.resize(len); + dweights.resize(weight_size); + dexp.resize(mean_size); + dact.resize(mean_size); + + // create random matrix and weights + uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); + uniform(handle, r, dweights.data().get(), weight_size, T(-1.0), T(1.0)); + + // host-side data + thrust::host_vector hin = din; + thrust::host_vector hweights = dweights; + thrust::host_vector hexp(mean_size); + + // compute naive result & copy to GPU + if (params.along_rows) + naiveRowWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, params.row_major); + else + naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, params.row_major); + dexp = hexp; + + // compute result + weightedMean(dact.data().get(), + din.data().get(), + dweights.data().get(), + cols, + rows, + params.row_major, + params.along_rows, + stream); + + // adjust tolerance to account for round-off accumulation + params.tolerance *= params.N; + } + + protected: + raft::handle_t handle; + WeightedMeanInputs params; + thrust::host_vector hin, hweights; + thrust::device_vector din, dweights, dexp, dact; + }; ////// Parameter sets and test instantiation -static const float tolF = 128 * std::numeric_limits::epsilon(); -static const double tolD = 256 * std::numeric_limits::epsilon(); - -const std::vector> inputsf = {{tolF, 4, 4, 1234, true, true}, - {tolF, 1024, 32, 1234, true, false}, - {tolF, 1024, 64, 1234, true, true}, - {tolF, 1024, 128, 1234, true, false}, - {tolF, 1024, 256, 1234, true, true}, - {tolF, 1024, 32, 1234, false, false}, - {tolF, 1024, 64, 1234, false, true}, - {tolF, 1024, 128, 1234, false, false}, - {tolF, 1024, 256, 1234, false, true}}; - -const std::vector> inputsd = {{tolD, 4, 4, 1234, true, true}, - {tolD, 1024, 32, 1234, true, false}, - {tolD, 1024, 64, 1234, true, true}, - {tolD, 1024, 128, 1234, true, false}, - {tolD, 1024, 256, 1234, true, true}, - {tolD, 1024, 32, 1234, false, false}, - {tolD, 1024, 64, 1234, false, true}, - {tolD, 1024, 128, 1234, false, false}, - {tolD, 1024, 256, 1234, false, true}}; - -using RowWeightedMeanTestF = RowWeightedMeanTest; -TEST_P(RowWeightedMeanTestF, Result) -{ - ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_CASE_P(RowWeightedMeanTest, RowWeightedMeanTestF, ::testing::ValuesIn(inputsf)); + static const float tolF = 128 * std::numeric_limits::epsilon(); + static const double tolD = 256 * std::numeric_limits::epsilon(); + + const std::vector> inputsf = {{tolF, 4, 4, 1234, true, true}, + {tolF, 1024, 32, 1234, true, false}, + {tolF, 1024, 64, 1234, true, true}, + {tolF, 1024, 128, 1234, true, false}, + {tolF, 1024, 256, 1234, true, true}, + {tolF, 1024, 32, 1234, false, false}, + {tolF, 1024, 64, 1234, false, true}, + {tolF, 1024, 128, 1234, false, false}, + {tolF, 1024, 256, 1234, false, true}}; + + const std::vector> inputsd = {{tolD, 4, 4, 1234, true, true}, + {tolD, 1024, 32, 1234, true, false}, + {tolD, 1024, 64, 1234, true, true}, + {tolD, 1024, 128, 1234, true, false}, + {tolD, 1024, 256, 1234, true, true}, + {tolD, 1024, 32, 1234, false, false}, + {tolD, 1024, 64, 1234, false, true}, + {tolD, 1024, 128, 1234, false, false}, + {tolD, 1024, 256, 1234, false, true}}; + + using RowWeightedMeanTestF = RowWeightedMeanTest; + TEST_P(RowWeightedMeanTestF, Result) + { + ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); + } + INSTANTIATE_TEST_CASE_P(RowWeightedMeanTest, RowWeightedMeanTestF, ::testing::ValuesIn(inputsf)); -using RowWeightedMeanTestD = RowWeightedMeanTest; -TEST_P(RowWeightedMeanTestD, Result) + using RowWeightedMeanTestD = RowWeightedMeanTest; + TEST_P(RowWeightedMeanTestD, Result) { - ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); + ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(RowWeightedMeanTest, RowWeightedMeanTestD, ::testing::ValuesIn(inputsd)); using ColWeightedMeanTestF = ColWeightedMeanTest; TEST_P(ColWeightedMeanTestF, Result) { - ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); +ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(ColWeightedMeanTest, ColWeightedMeanTestF, ::testing::ValuesIn(inputsf)); using ColWeightedMeanTestD = ColWeightedMeanTest; TEST_P(ColWeightedMeanTestD, Result) { - ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); +ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(ColWeightedMeanTest, ColWeightedMeanTestD, ::testing::ValuesIn(inputsd)); using WeightedMeanTestF = WeightedMeanTest; TEST_P(WeightedMeanTestF, Result) { - ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); +ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestF, ::testing::ValuesIn(inputsf)); using WeightedMeanTestD = WeightedMeanTest; TEST_P(WeightedMeanTestD, Result) { - ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); +ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestD, ::testing::ValuesIn(inputsd)); From a78d38a68f967d6e677595a5094a84e738cc3685 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 5 Oct 2022 20:24:27 -0400 Subject: [PATCH 32/40] Weighted mean --- .../raft/stats/detail/weighted_mean.cuh | 4 +- cpp/test/stats/weighted_mean.cu | 470 +++++++++--------- 2 files changed, 237 insertions(+), 237 deletions(-) diff --git a/cpp/include/raft/stats/detail/weighted_mean.cuh b/cpp/include/raft/stats/detail/weighted_mean.cuh index 04b1a32522..43dbe4e7f1 100644 --- a/cpp/include/raft/stats/detail/weighted_mean.cuh +++ b/cpp/include/raft/stats/detail/weighted_mean.cuh @@ -50,7 +50,7 @@ void weightedMean(Type* mu, cudaStream_t stream) { // sum the weights & copy back to CPU - auto weight_size = along_rows ? N : D; + auto weight_size = along_rows ? D : N; Type WS = 0; raft::stats::sum(mu, weights, (IdxType)1, weight_size, false, stream); raft::update_host(&WS, mu, 1, stream); @@ -71,4 +71,4 @@ void weightedMean(Type* mu, } }; // end namespace detail }; // end namespace stats -}; // end namespace raft +}; // end namespace raft \ No newline at end of file diff --git a/cpp/test/stats/weighted_mean.cu b/cpp/test/stats/weighted_mean.cu index 3ce17ac974..ec99d5a627 100644 --- a/cpp/test/stats/weighted_mean.cu +++ b/cpp/test/stats/weighted_mean.cu @@ -23,273 +23,273 @@ #include namespace raft { - namespace stats { - - template - struct WeightedMeanInputs { - T tolerance; - int M, N; - unsigned long long int seed; - bool along_rows; // Used only for the weightedMean test function - bool row_major; - }; - - template - ::std::ostream& operator<<(::std::ostream& os, const WeightedMeanInputs& I) - { - return os << "{ " << I.tolerance << ", " << I.M << ", " << I.N << ", " << I.seed << ", " - << I.along_rows << "}" << std::endl; - } +namespace stats { + +template +struct WeightedMeanInputs { + T tolerance; + int M, N; + unsigned long long int seed; + bool along_rows; // Used only for the weightedMean test function + bool row_major; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const WeightedMeanInputs& I) +{ + return os << "{ " << I.tolerance << ", " << I.M << ", " << I.N << ", " << I.seed << ", " + << I.along_rows << "}" << std::endl; +} ///// weighted row-wise mean test and support functions - template - void naiveRowWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) - { - int istr = rowMajor ? 1 : M; - int jstr = rowMajor ? N : 1; - - // sum the weights - T WS = 0; - for (int i = 0; i < N; i++) - WS += W[i]; - - for (int j = 0; j < M; j++) { - R[j] = (T)0; - for (int i = 0; i < N; i++) { - // R[j] += (W[i]*D[i*istr + j*jstr] - R[j])/(T)(i+1); - R[j] += (W[i] * D[i * istr + j * jstr]) / WS; - } - } - } - - template - class RowWeightedMeanTest : public ::testing::TestWithParam> { - protected: - void SetUp() override - { - params = ::testing::TestWithParam>::GetParam(); - raft::random::RngState r(params.seed); - int rows = params.M, cols = params.N, len = rows * cols; - auto stream = handle.get_stream(); - // device-side data - din.resize(len); - dweights.resize(cols); - dexp.resize(rows); - dact.resize(rows); - - // create random matrix and weights - uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); - uniform(handle, r, dweights.data().get(), cols, T(-1.0), T(1.0)); - - // host-side data - thrust::host_vector hin = din; - thrust::host_vector hweights = dweights; - thrust::host_vector hexp(rows); - - // compute naive result & copy to GPU - naiveRowWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); - dexp = hexp; - - // compute result - rowWeightedMean(dact.data().get(), din.data().get(), dweights.data().get(), cols, rows, stream); - - // adjust tolerance to account for round-off accumulation - params.tolerance *= params.N; - } - - protected: - raft::handle_t handle; - WeightedMeanInputs params; - thrust::host_vector hin, hweights; - thrust::device_vector din, dweights, dexp, dact; - }; +template +void naiveRowWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) +{ + int istr = rowMajor ? 1 : M; + int jstr = rowMajor ? N : 1; + + // sum the weights + T WS = 0; + for (int i = 0; i < N; i++) + WS += W[i]; + + for (int j = 0; j < M; j++) { + R[j] = (T)0; + for (int i = 0; i < N; i++) { + // R[j] += (W[i]*D[i*istr + j*jstr] - R[j])/(T)(i+1); + R[j] += (W[i] * D[i * istr + j * jstr]) / WS; + } + } +} + +template +class RowWeightedMeanTest : public ::testing::TestWithParam> { + protected: + void SetUp() override + { + params = ::testing::TestWithParam>::GetParam(); + raft::random::RngState r(params.seed); + int rows = params.M, cols = params.N, len = rows * cols; + auto stream = handle.get_stream(); + // device-side data + din.resize(len); + dweights.resize(cols); + dexp.resize(rows); + dact.resize(rows); + + // create random matrix and weights + uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); + uniform(handle, r, dweights.data().get(), cols, T(-1.0), T(1.0)); + + // host-side data + thrust::host_vector hin = din; + thrust::host_vector hweights = dweights; + thrust::host_vector hexp(rows); + + // compute naive result & copy to GPU + naiveRowWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); + dexp = hexp; + + // compute result + rowWeightedMean(dact.data().get(), din.data().get(), dweights.data().get(), cols, rows, stream); + + // adjust tolerance to account for round-off accumulation + params.tolerance *= params.N; + } + + protected: + raft::handle_t handle; + WeightedMeanInputs params; + thrust::host_vector hin, hweights; + thrust::device_vector din, dweights, dexp, dact; +}; ///// weighted column-wise mean test and support functions - template - void naiveColWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) - { - int istr = rowMajor ? 1 : M; - int jstr = rowMajor ? N : 1; - - // sum the weights - T WS = 0; - for (int j = 0; j < M; j++) - WS += W[j]; - - for (int i = 0; i < N; i++) { - R[i] = (T)0; - for (int j = 0; j < M; j++) { - // R[i] += (W[j]*D[i*istr + j*jstr] - R[i])/(T)(j+1); - R[i] += (W[j] * D[i * istr + j * jstr]) / WS; - } - } - } - - template - class ColWeightedMeanTest : public ::testing::TestWithParam> { - void SetUp() override - { - params = ::testing::TestWithParam>::GetParam(); - raft::random::RngState r(params.seed); - int rows = params.M, cols = params.N, len = rows * cols; - - auto stream = handle.get_stream(); - // device-side data - din.resize(len); - dweights.resize(rows); - dexp.resize(cols); - dact.resize(cols); - - // create random matrix and weights - uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); - uniform(handle, r, dweights.data().get(), rows, T(-1.0), T(1.0)); - - // host-side data - thrust::host_vector hin = din; - thrust::host_vector hweights = dweights; - thrust::host_vector hexp(cols); - - // compute naive result & copy to GPU - naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); - dexp = hexp; - - // compute result - colWeightedMean(dact.data().get(), din.data().get(), dweights.data().get(), cols, rows, stream); - - // adjust tolerance to account for round-off accumulation - params.tolerance *= params.M; - } - - protected: - raft::handle_t handle; - WeightedMeanInputs params; - thrust::host_vector hin, hweights; - thrust::device_vector din, dweights, dexp, dact; - }; - - template - class WeightedMeanTest : public ::testing::TestWithParam> { - protected: - void SetUp() override - { - params = ::testing::TestWithParam>::GetParam(); - raft::random::RngState r(params.seed); - auto stream = handle.get_stream(); - int rows = params.M, cols = params.N, len = rows * cols; - auto weight_size = params.along_rows ? cols : rows; - auto mean_size = params.along_rows ? rows : cols; - // device-side data - din.resize(len); - dweights.resize(weight_size); - dexp.resize(mean_size); - dact.resize(mean_size); - - // create random matrix and weights - uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); - uniform(handle, r, dweights.data().get(), weight_size, T(-1.0), T(1.0)); - - // host-side data - thrust::host_vector hin = din; - thrust::host_vector hweights = dweights; - thrust::host_vector hexp(mean_size); - - // compute naive result & copy to GPU - if (params.along_rows) - naiveRowWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, params.row_major); - else - naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, params.row_major); - dexp = hexp; - - // compute result - weightedMean(dact.data().get(), - din.data().get(), - dweights.data().get(), - cols, - rows, - params.row_major, - params.along_rows, - stream); - - // adjust tolerance to account for round-off accumulation - params.tolerance *= params.N; - } - - protected: - raft::handle_t handle; - WeightedMeanInputs params; - thrust::host_vector hin, hweights; - thrust::device_vector din, dweights, dexp, dact; - }; +template +void naiveColWeightedMean(T* R, T* D, T* W, int M, int N, bool rowMajor) +{ + int istr = rowMajor ? 1 : M; + int jstr = rowMajor ? N : 1; + + // sum the weights + T WS = 0; + for (int j = 0; j < M; j++) + WS += W[j]; + + for (int i = 0; i < N; i++) { + R[i] = (T)0; + for (int j = 0; j < M; j++) { + // R[i] += (W[j]*D[i*istr + j*jstr] - R[i])/(T)(j+1); + R[i] += (W[j] * D[i * istr + j * jstr]) / WS; + } + } +} + +template +class ColWeightedMeanTest : public ::testing::TestWithParam> { + void SetUp() override + { + params = ::testing::TestWithParam>::GetParam(); + raft::random::RngState r(params.seed); + int rows = params.M, cols = params.N, len = rows * cols; + + auto stream = handle.get_stream(); + // device-side data + din.resize(len); + dweights.resize(rows); + dexp.resize(cols); + dact.resize(cols); + + // create random matrix and weights + uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); + uniform(handle, r, dweights.data().get(), rows, T(-1.0), T(1.0)); + + // host-side data + thrust::host_vector hin = din; + thrust::host_vector hweights = dweights; + thrust::host_vector hexp(cols); + + // compute naive result & copy to GPU + naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, true); + dexp = hexp; + + // compute result + colWeightedMean(dact.data().get(), din.data().get(), dweights.data().get(), cols, rows, stream); + + // adjust tolerance to account for round-off accumulation + params.tolerance *= params.M; + } + + protected: + raft::handle_t handle; + WeightedMeanInputs params; + thrust::host_vector hin, hweights; + thrust::device_vector din, dweights, dexp, dact; +}; + +template +class WeightedMeanTest : public ::testing::TestWithParam> { + protected: + void SetUp() override + { + params = ::testing::TestWithParam>::GetParam(); + raft::random::RngState r(params.seed); + auto stream = handle.get_stream(); + int rows = params.M, cols = params.N, len = rows * cols; + auto weight_size = params.along_rows ? cols : rows; + auto mean_size = params.along_rows ? rows : cols; + // device-side data + din.resize(len); + dweights.resize(weight_size); + dexp.resize(mean_size); + dact.resize(mean_size); + + // create random matrix and weights + uniform(handle, r, din.data().get(), len, T(-1.0), T(1.0)); + uniform(handle, r, dweights.data().get(), weight_size, T(-1.0), T(1.0)); + + // host-side data + thrust::host_vector hin = din; + thrust::host_vector hweights = dweights; + thrust::host_vector hexp(mean_size); + + // compute naive result & copy to GPU + if (params.along_rows) + naiveRowWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, params.row_major); + else + naiveColWeightedMean(hexp.data(), hin.data(), hweights.data(), rows, cols, params.row_major); + dexp = hexp; + + // compute result + weightedMean(dact.data().get(), + din.data().get(), + dweights.data().get(), + cols, + rows, + params.row_major, + params.along_rows, + stream); + + // adjust tolerance to account for round-off accumulation + params.tolerance *= params.N; + } + + protected: + raft::handle_t handle; + WeightedMeanInputs params; + thrust::host_vector hin, hweights; + thrust::device_vector din, dweights, dexp, dact; +}; ////// Parameter sets and test instantiation - static const float tolF = 128 * std::numeric_limits::epsilon(); - static const double tolD = 256 * std::numeric_limits::epsilon(); - - const std::vector> inputsf = {{tolF, 4, 4, 1234, true, true}, - {tolF, 1024, 32, 1234, true, false}, - {tolF, 1024, 64, 1234, true, true}, - {tolF, 1024, 128, 1234, true, false}, - {tolF, 1024, 256, 1234, true, true}, - {tolF, 1024, 32, 1234, false, false}, - {tolF, 1024, 64, 1234, false, true}, - {tolF, 1024, 128, 1234, false, false}, - {tolF, 1024, 256, 1234, false, true}}; - - const std::vector> inputsd = {{tolD, 4, 4, 1234, true, true}, - {tolD, 1024, 32, 1234, true, false}, - {tolD, 1024, 64, 1234, true, true}, - {tolD, 1024, 128, 1234, true, false}, - {tolD, 1024, 256, 1234, true, true}, - {tolD, 1024, 32, 1234, false, false}, - {tolD, 1024, 64, 1234, false, true}, - {tolD, 1024, 128, 1234, false, false}, - {tolD, 1024, 256, 1234, false, true}}; - - using RowWeightedMeanTestF = RowWeightedMeanTest; - TEST_P(RowWeightedMeanTestF, Result) - { - ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); - } - INSTANTIATE_TEST_CASE_P(RowWeightedMeanTest, RowWeightedMeanTestF, ::testing::ValuesIn(inputsf)); +static const float tolF = 128 * std::numeric_limits::epsilon(); +static const double tolD = 256 * std::numeric_limits::epsilon(); + +const std::vector> inputsf = {{tolF, 4, 4, 1234, true, true}, + {tolF, 1024, 32, 1234, true, false}, + {tolF, 1024, 64, 1234, true, true}, + {tolF, 1024, 128, 1234, true, false}, + {tolF, 1024, 256, 1234, true, true}, + {tolF, 1024, 32, 1234, false, false}, + {tolF, 1024, 64, 1234, false, true}, + {tolF, 1024, 128, 1234, false, false}, + {tolF, 1024, 256, 1234, false, true}}; + +const std::vector> inputsd = {{tolD, 4, 4, 1234, true, true}, + {tolD, 1024, 32, 1234, true, false}, + {tolD, 1024, 64, 1234, true, true}, + {tolD, 1024, 128, 1234, true, false}, + {tolD, 1024, 256, 1234, true, true}, + {tolD, 1024, 32, 1234, false, false}, + {tolD, 1024, 64, 1234, false, true}, + {tolD, 1024, 128, 1234, false, false}, + {tolD, 1024, 256, 1234, false, true}}; + +using RowWeightedMeanTestF = RowWeightedMeanTest; +TEST_P(RowWeightedMeanTestF, Result) +{ + ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_CASE_P(RowWeightedMeanTest, RowWeightedMeanTestF, ::testing::ValuesIn(inputsf)); - using RowWeightedMeanTestD = RowWeightedMeanTest; - TEST_P(RowWeightedMeanTestD, Result) +using RowWeightedMeanTestD = RowWeightedMeanTest; +TEST_P(RowWeightedMeanTestD, Result) { - ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); + ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.M, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(RowWeightedMeanTest, RowWeightedMeanTestD, ::testing::ValuesIn(inputsd)); using ColWeightedMeanTestF = ColWeightedMeanTest; TEST_P(ColWeightedMeanTestF, Result) { -ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); + ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(ColWeightedMeanTest, ColWeightedMeanTestF, ::testing::ValuesIn(inputsf)); using ColWeightedMeanTestD = ColWeightedMeanTest; TEST_P(ColWeightedMeanTestD, Result) { -ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); + ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(ColWeightedMeanTest, ColWeightedMeanTestD, ::testing::ValuesIn(inputsd)); using WeightedMeanTestF = WeightedMeanTest; TEST_P(WeightedMeanTestF, Result) { -ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); + ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestF, ::testing::ValuesIn(inputsf)); using WeightedMeanTestD = WeightedMeanTest; TEST_P(WeightedMeanTestD, Result) { -ASSERT_TRUE(devArrMatch( - dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); + ASSERT_TRUE(devArrMatch( + dexp.data().get(), dact.data().get(), params.N, raft::CompareApprox(params.tolerance))); } INSTANTIATE_TEST_CASE_P(WeightedMeanTest, WeightedMeanTestD, ::testing::ValuesIn(inputsd)); From c3d11a3f8f01a91f58c6e1968b3a7b6416569f5a Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 5 Oct 2022 22:04:18 -0400 Subject: [PATCH 33/40] Skipping re-install of raft-dask for docs build --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 1f1169ade9..acced0cb08 100755 --- a/build.sh +++ b/build.sh @@ -312,7 +312,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has fi # Build and (optionally) install the raft-dask Python package -if (( ${NUMARGS} == 0 )) || hasArg raft-dask || hasArg docs; then +if (( ${NUMARGS} == 0 )) || hasArg raft-dask then cd ${REPODIR}/python/raft-dask python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH="${LIBRAFT_BUILD_DIR};${INSTALL_PREFIX}" -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} From 5a7307fe725a944f1d5e40a40b4fc1fcf536b18c Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 5 Oct 2022 22:05:18 -0400 Subject: [PATCH 34/40] Adding missing semicolon --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index acced0cb08..194ce6fe72 100755 --- a/build.sh +++ b/build.sh @@ -312,7 +312,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has fi # Build and (optionally) install the raft-dask Python package -if (( ${NUMARGS} == 0 )) || hasArg raft-dask then +if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then cd ${REPODIR}/python/raft-dask python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH="${LIBRAFT_BUILD_DIR};${INSTALL_PREFIX}" -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} From a7790e381a9ff33eb076c508237335bf7a1444ea Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 5 Oct 2022 22:11:46 -0400 Subject: [PATCH 35/40] Adding pylibraft to docs build. --- build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index 194ce6fe72..52e28e4a29 100755 --- a/build.sh +++ b/build.sh @@ -312,7 +312,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has fi # Build and (optionally) install the raft-dask Python package -if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then +if (( ${NUMARGS} == 0 )) || hasArg raft-dask || hasArg docs; then cd ${REPODIR}/python/raft-dask python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH="${LIBRAFT_BUILD_DIR};${INSTALL_PREFIX}" -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} @@ -322,7 +322,7 @@ if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then fi # Build and (optionally) install the pylibraft Python package -if (( ${NUMARGS} == 0 )) || hasArg pylibraft; then +if (( ${NUMARGS} == 0 )) || hasArg pylibraft || hasArg docs; then cd ${REPODIR}/python/pylibraft python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH="${LIBRAFT_BUILD_DIR};${INSTALL_PREFIX}" -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} From 374c91c28cf8a54654fd41b405f46ab265f36c76 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 6 Oct 2022 06:21:02 -0400 Subject: [PATCH 36/40] Reverting changes to build.sh --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 52e28e4a29..1f1169ade9 100755 --- a/build.sh +++ b/build.sh @@ -322,7 +322,7 @@ if (( ${NUMARGS} == 0 )) || hasArg raft-dask || hasArg docs; then fi # Build and (optionally) install the pylibraft Python package -if (( ${NUMARGS} == 0 )) || hasArg pylibraft || hasArg docs; then +if (( ${NUMARGS} == 0 )) || hasArg pylibraft; then cd ${REPODIR}/python/pylibraft python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH="${LIBRAFT_BUILD_DIR};${INSTALL_PREFIX}" -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1} From 72d6c809e64d06d9ec8e35fb4334a030d995a3f6 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 7 Oct 2022 10:27:09 -0400 Subject: [PATCH 37/40] enabling verbose logging in build.sh for docs --- build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build.sh b/build.sh index a31d97c22c..135c422c35 100755 --- a/build.sh +++ b/build.sh @@ -357,7 +357,8 @@ if (( ${NUMARGS} == 0 )) || hasArg pylibraft; then fi if hasArg docs; then - cmake --build ${LIBRAFT_BUILD_DIR} --target docs_raft + set -x + cmake --build --verbose ${LIBRAFT_BUILD_DIR} --target docs_raft cd ${SPHINX_BUILD_DIR} make html fi From ff2b9b06d8aa6fd4a49487f37cbdcd0bc662e402 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 7 Oct 2022 12:10:23 -0400 Subject: [PATCH 38/40] Removing --build from cmake --build --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 135c422c35..09c50fb932 100755 --- a/build.sh +++ b/build.sh @@ -358,7 +358,7 @@ fi if hasArg docs; then set -x - cmake --build --verbose ${LIBRAFT_BUILD_DIR} --target docs_raft + cmake --build ${LIBRAFT_BUILD_DIR} --target docs_raft cd ${SPHINX_BUILD_DIR} make html fi From dd89e816e1f94a4d9b286ea732fc5d4677685868 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 7 Oct 2022 16:06:19 -0400 Subject: [PATCH 39/40] Fixing doxygen build --- build.sh | 2 +- cpp/CMakeLists.txt | 1 - cpp/cmake/doxygen.cmake | 2 ++ cpp/doxygen/Doxyfile.in | 4 ++-- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/build.sh b/build.sh index 09c50fb932..d1dd8bdde1 100755 --- a/build.sh +++ b/build.sh @@ -358,7 +358,7 @@ fi if hasArg docs; then set -x - cmake --build ${LIBRAFT_BUILD_DIR} --target docs_raft + cmake --build ${LIBRAFT_BUILD_DIR} -v --target docs_raft cd ${SPHINX_BUILD_DIR} make html fi diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ce6eb00bc1..32fe654965 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -569,7 +569,6 @@ endif() ############################################################################## # - doxygen targets ---------------------------------------------------------- - include(cmake/doxygen.cmake) add_doxygen_target(IN_DOXYFILE doxygen/Doxyfile.in OUT_DOXYFILE ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile diff --git a/cpp/cmake/doxygen.cmake b/cpp/cmake/doxygen.cmake index 5b2da57eb5..45b973e7e3 100644 --- a/cpp/cmake/doxygen.cmake +++ b/cpp/cmake/doxygen.cmake @@ -22,6 +22,8 @@ function(add_doxygen_target) set(multiValueArgs "") cmake_parse_arguments(dox "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) configure_file(${dox_IN_DOXYFILE} ${dox_OUT_DOXYFILE} @ONLY) + + message("Command: ${DOXYGEN_EXECUTABLE} ${dox_OUT_DOXYFILE}") add_custom_target(docs_raft ${DOXYGEN_EXECUTABLE} ${dox_OUT_DOXYFILE} WORKING_DIRECTORY ${dox_CWD} diff --git a/cpp/doxygen/Doxyfile.in b/cpp/doxygen/Doxyfile.in index 549862600a..5517562a9f 100644 --- a/cpp/doxygen/Doxyfile.in +++ b/cpp/doxygen/Doxyfile.in @@ -459,7 +459,7 @@ LOOKUP_CACHE_SIZE = 0 # DOT_NUM_THREADS setting. # Minimum value: 0, maximum value: 32, default value: 1. -NUM_PROC_THREADS = 1 +NUM_PROC_THREADS = 0 #--------------------------------------------------------------------------- # Build related configuration options @@ -2495,7 +2495,7 @@ PLANTUML_INCLUDE_PATH = # Minimum value: 0, maximum value: 10000, default value: 50. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_GRAPH_MAX_NODES = 50 +DOT_GRAPH_MAX_NODES = 100 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs # generated by dot. A depth value of 3 means that only nodes reachable from the From 4a3ad939fafb1c47c32a0dd43d11f3e5057695f7 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 7 Oct 2022 16:47:09 -0400 Subject: [PATCH 40/40] Fixing style --- cpp/cmake/doxygen.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/doxygen.cmake b/cpp/cmake/doxygen.cmake index 45b973e7e3..7d06ec194c 100644 --- a/cpp/cmake/doxygen.cmake +++ b/cpp/cmake/doxygen.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.