From e314eea8d5c389db2a9f9281457bc5311c439c03 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 16 Feb 2022 12:49:51 -0500 Subject: [PATCH 01/12] Moving scores and metrics over to raft::stats --- cpp/include/raft/matrix/col_wise_sort.hpp | 51 +++ .../raft/matrix/detail/columnWiseSort.cuh | 346 ++++++++++++++++++ cpp/include/raft/stats/accuracy.hpp | 42 +++ .../raft/stats/adjusted_rand_index.hpp | 50 +++ cpp/include/raft/stats/common.hpp | 5 + cpp/include/raft/stats/completeness_score.hpp | 46 +++ cpp/include/raft/stats/contingency_matrix.hpp | 93 +++++ .../raft/stats/detail/adjusted_rand_index.cuh | 197 ++++++++++ .../detail/batched/information_criterion.cuh | 77 ++++ .../stats/detail/batched/silhouette_score.cuh | 276 ++++++++++++++ .../raft/stats/detail/completeness_score.cuh | 71 ++++ .../raft/stats/detail/contingencyMatrix.cuh | 313 ++++++++++++++++ cpp/include/raft/stats/detail/dispersion.cuh | 138 +++++++ cpp/include/raft/stats/detail/entropy.cuh | 155 ++++++++ .../raft/stats/detail/homogeneity_score.cuh | 72 ++++ .../raft/stats/detail/kl_divergence.cuh | 86 +++++ .../raft/stats/detail/mutual_info_score.cuh | 179 +++++++++ cpp/include/raft/stats/detail/rand_index.cuh | 167 +++++++++ cpp/include/raft/stats/detail/scores.cuh | 215 +++++++++++ .../raft/stats/detail/silhouette_score.cuh | 332 +++++++++++++++++ .../stats/detail/trustworthiness_score.cuh | 219 +++++++++++ cpp/include/raft/stats/detail/v_measure.cuh | 64 ++++ cpp/include/raft/stats/dispersion.hpp | 54 +++ cpp/include/raft/stats/entropy.hpp | 44 +++ cpp/include/raft/stats/homogeneity_score.hpp | 46 +++ .../raft/stats/information_criterion.hpp | 62 ++++ cpp/include/raft/stats/kl_divergence.hpp | 41 +++ cpp/include/raft/stats/meanvar.hpp | 3 +- cpp/include/raft/stats/mutual_info_score.hpp | 46 +++ cpp/include/raft/stats/r2_score.hpp | 45 +++ cpp/include/raft/stats/rand_index.hpp | 43 +++ cpp/include/raft/stats/regression_metrics.hpp | 49 +++ cpp/include/raft/stats/silhouette_score.hpp | 71 ++++ cpp/include/raft/stats/specializations.hpp | 20 + .../raft/stats/trustworthiness_score.hpp | 47 +++ cpp/include/raft/stats/v_measure.hpp | 45 +++ cpp/test/CMakeLists.txt | 12 + cpp/test/stats/adjusted_rand_index.cu | 201 ++++++++++ cpp/test/stats/completeness_score.cu | 138 +++++++ cpp/test/stats/contingencyMatrix.cu | 171 +++++++++ cpp/test/stats/dispersion.cu | 125 +++++++ cpp/test/stats/entropy.cu | 118 ++++++ cpp/test/stats/homogeneity_score.cu | 135 +++++++ cpp/test/stats/kl_divergence.cu | 105 ++++++ cpp/test/stats/mutual_info_score.cu | 163 +++++++++ cpp/test/stats/rand_index.cu | 127 +++++++ cpp/test/stats/silhouette_score.cu | 230 ++++++++++++ cpp/test/stats/trustworthiness.cu | 336 +++++++++++++++++ cpp/test/stats/v_measure.cu | 140 +++++++ 49 files changed, 5809 insertions(+), 2 deletions(-) create mode 100644 cpp/include/raft/matrix/col_wise_sort.hpp create mode 100644 cpp/include/raft/matrix/detail/columnWiseSort.cuh create mode 100644 cpp/include/raft/stats/accuracy.hpp create mode 100644 cpp/include/raft/stats/adjusted_rand_index.hpp create mode 100644 cpp/include/raft/stats/completeness_score.hpp create mode 100644 cpp/include/raft/stats/contingency_matrix.hpp create mode 100644 cpp/include/raft/stats/detail/adjusted_rand_index.cuh create mode 100644 cpp/include/raft/stats/detail/batched/information_criterion.cuh create mode 100644 cpp/include/raft/stats/detail/batched/silhouette_score.cuh create mode 100644 cpp/include/raft/stats/detail/completeness_score.cuh create mode 100644 cpp/include/raft/stats/detail/contingencyMatrix.cuh create mode 100644 cpp/include/raft/stats/detail/dispersion.cuh create mode 100644 cpp/include/raft/stats/detail/entropy.cuh create mode 100644 cpp/include/raft/stats/detail/homogeneity_score.cuh create mode 100644 cpp/include/raft/stats/detail/kl_divergence.cuh create mode 100644 cpp/include/raft/stats/detail/mutual_info_score.cuh create mode 100644 cpp/include/raft/stats/detail/rand_index.cuh create mode 100644 cpp/include/raft/stats/detail/scores.cuh create mode 100644 cpp/include/raft/stats/detail/silhouette_score.cuh create mode 100644 cpp/include/raft/stats/detail/trustworthiness_score.cuh create mode 100644 cpp/include/raft/stats/detail/v_measure.cuh create mode 100644 cpp/include/raft/stats/dispersion.hpp create mode 100644 cpp/include/raft/stats/entropy.hpp create mode 100644 cpp/include/raft/stats/homogeneity_score.hpp create mode 100644 cpp/include/raft/stats/information_criterion.hpp create mode 100644 cpp/include/raft/stats/kl_divergence.hpp create mode 100644 cpp/include/raft/stats/mutual_info_score.hpp create mode 100644 cpp/include/raft/stats/r2_score.hpp create mode 100644 cpp/include/raft/stats/rand_index.hpp create mode 100644 cpp/include/raft/stats/regression_metrics.hpp create mode 100644 cpp/include/raft/stats/silhouette_score.hpp create mode 100644 cpp/include/raft/stats/specializations.hpp create mode 100644 cpp/include/raft/stats/trustworthiness_score.hpp create mode 100644 cpp/include/raft/stats/v_measure.hpp create mode 100644 cpp/test/stats/adjusted_rand_index.cu create mode 100644 cpp/test/stats/completeness_score.cu create mode 100644 cpp/test/stats/contingencyMatrix.cu create mode 100644 cpp/test/stats/dispersion.cu create mode 100644 cpp/test/stats/entropy.cu create mode 100644 cpp/test/stats/homogeneity_score.cu create mode 100644 cpp/test/stats/kl_divergence.cu create mode 100644 cpp/test/stats/mutual_info_score.cu create mode 100644 cpp/test/stats/rand_index.cu create mode 100644 cpp/test/stats/silhouette_score.cu create mode 100644 cpp/test/stats/trustworthiness.cu create mode 100644 cpp/test/stats/v_measure.cu diff --git a/cpp/include/raft/matrix/col_wise_sort.hpp b/cpp/include/raft/matrix/col_wise_sort.hpp new file mode 100644 index 0000000000..c4f8435424 --- /dev/null +++ b/cpp/include/raft/matrix/col_wise_sort.hpp @@ -0,0 +1,51 @@ +/* +* Copyright (c) 2019-2022, NVIDIA CORPORATION. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#pragma once + +#include + +namespace raft { +namespace matrix { + +/** +* @brief sort columns within each row of row-major input matrix and return sorted indexes +* modelled as key-value sort with key being input matrix and value being index of values +* @param in: input matrix +* @param out: output value(index) matrix +* @param n_rows: number rows of input matrix +* @param n_cols: number columns of input matrix +* @param bAllocWorkspace: check returned value, if true allocate workspace passed in workspaceSize +* @param workspacePtr: pointer to workspace memory +* @param workspaceSize: Size of workspace to be allocated +* @param stream: cuda stream to execute prim on +* @param sortedKeys: Optional, output matrix for sorted keys (input) +*/ +template +void sort_cols_per_row(const InType* in, + OutType* out, + int n_rows, + int n_columns, + bool& bAllocWorkspace, + void* workspacePtr, + size_t& workspaceSize, + cudaStream_t stream, + InType* sortedKeys = nullptr) { + detail::sortColumnsPerRow(in, out, n_rows, n_columns, bAllocWorkspace, workspacePtr, workspaceSize, + stream, sortedKeys); +} +}; // end namespace matrix +}; // end namespace raft diff --git a/cpp/include/raft/matrix/detail/columnWiseSort.cuh b/cpp/include/raft/matrix/detail/columnWiseSort.cuh new file mode 100644 index 0000000000..f5758e3459 --- /dev/null +++ b/cpp/include/raft/matrix/detail/columnWiseSort.cuh @@ -0,0 +1,346 @@ +/* +* Copyright (c) 2019-2022, NVIDIA CORPORATION. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#pragma once + +#include +#include +#include +#include +#include + +#define INST_BLOCK_SORT(keyIn, keyOut, valueInOut, rows, columns, blockSize, elemPT, stream) \ +devKeyValSortColumnPerRow<<>>( \ +keyIn, keyOut, valueInOut, rows, columns, std::numeric_limits::max()) + +namespace raft { +namespace matrix { +namespace detail { + +template +struct TemplateChecker { +enum { + IsValid = (std::is_same::value && BLOCK_SIZE <= 1024) || + (std::is_same::value && BLOCK_SIZE <= 1024) || + (std::is_same::value && BLOCK_SIZE <= 1024) || + (std::is_same::value && BLOCK_SIZE <= 512) +}; +}; + +template +struct SmemPerBlock { +typedef cub::BlockLoad + BlockLoadTypeKey; + +typedef cub::BlockRadixSort BlockRadixSortType; + +union TempStorage { + typename BlockLoadTypeKey::TempStorage keyLoad; + typename BlockRadixSortType::TempStorage sort; +} tempStorage; +}; + +template +__global__ void devLayoutIdx(InType* in, int n_cols, int totalElements) +{ +int idx = threadIdx.x + blockDim.x * blockIdx.x; +int n = n_cols; + +if (idx < totalElements) { in[idx] = idx % n; } +} + +template +__global__ void devOffsetKernel(T* in, T value, int n_times) +{ +int idx = threadIdx.x + blockIdx.x * blockDim.x; +if (idx < n_times) in[idx] = idx * value; +} + +// block level radix sort - can only sort as much data we can fit within shared memory +template < + typename InType, + typename OutType, + int BLOCK_SIZE, + int ITEMS_PER_THREAD, + typename std::enable_if::IsValid, InType>::type* = nullptr> +__global__ void __launch_bounds__(1024, 1) devKeyValSortColumnPerRow(const InType* inputKeys, + InType* outputKeys, + OutType* inputVals, + int n_rows, + int n_cols, + InType MAX_VALUE) +{ +typedef cub::BlockLoad + BlockLoadTypeKey; + +typedef cub::BlockRadixSort BlockRadixSortType; + +__shared__ SmemPerBlock tmpSmem; + +InType threadKeys[ITEMS_PER_THREAD]; +OutType threadValues[ITEMS_PER_THREAD]; + +int blockOffset = blockIdx.x * n_cols; +BlockLoadTypeKey(tmpSmem.tempStorage.keyLoad) + .Load(inputKeys + blockOffset, threadKeys, n_cols, MAX_VALUE); + +OutType idxBase = threadIdx.x * ITEMS_PER_THREAD; +for (int i = 0; i < ITEMS_PER_THREAD; i++) { + OutType eId = idxBase + (OutType)i; + if (eId < n_cols) + threadValues[i] = eId; + else + threadValues[i] = MAX_VALUE; +} + +__syncthreads(); + +BlockRadixSortType(tmpSmem.tempStorage.sort).SortBlockedToStriped(threadKeys, threadValues); + +// storing index values back (not keys) +cub::StoreDirectStriped(threadIdx.x, inputVals + blockOffset, threadValues, n_cols); + +if (outputKeys) { + cub::StoreDirectStriped(threadIdx.x, outputKeys + blockOffset, threadKeys, n_cols); +} +} + +template < + typename InType, + typename OutType, + int BLOCK_SIZE, + int ITEMS_PER_THREAD, + typename std::enable_if::IsValid), InType>::type* = nullptr> +__global__ void devKeyValSortColumnPerRow(const InType* inputKeys, + InType* outputKeys, + OutType* inputVals, + int n_rows, + int n_cols, + InType MAX_VALUE) +{ +// place holder function +// so that compiler unrolls for all template types successfully +} + +// helper function to layout values (index's) for key-value sort +template +cudaError_t layoutIdx(OutType* in, int n_rows, int n_columns, cudaStream_t stream) +{ +int totalElements = n_rows * n_columns; +dim3 block(256); +dim3 grid((totalElements + block.x - 1) / block.x); +devLayoutIdx<<>>(in, n_columns, totalElements); +return cudaGetLastError(); +} + +// helper function to layout offsets for rows for DeviceSegmentedRadixSort +template +cudaError_t layoutSortOffset(T* in, T value, int n_times, cudaStream_t stream) +{ +dim3 block(128); +dim3 grid((n_times + block.x - 1) / block.x); +devOffsetKernel<<>>(in, value, n_times); +return cudaGetLastError(); +} + +/** +* @brief sort columns within each row of row-major input matrix and return sorted indexes +* modelled as key-value sort with key being input matrix and value being index of values +* @param in: input matrix +* @param out: output value(index) matrix +* @param n_rows: number rows of input matrix +* @param n_cols: number columns of input matrix +* @param bAllocWorkspace: check returned value, if true allocate workspace passed in workspaceSize +* @param workspacePtr: pointer to workspace memory +* @param workspaceSize: Size of workspace to be allocated +* @param stream: cuda stream to execute prim on +* @param sortedKeys: Optional, output matrix for sorted keys (input) +*/ +template +void sortColumnsPerRow(const InType* in, + OutType* out, + int n_rows, + int n_columns, + bool& bAllocWorkspace, + void* workspacePtr, + size_t& workspaceSize, + cudaStream_t stream, + InType* sortedKeys = nullptr) +{ +// assume non-square row-major matrices +// current use-case: KNN, trustworthiness scores +// output : either sorted indices or sorted indices and input values +// future : this prim can be modified to be more generic and serve as a way to sort column entries +// per row +// i.e. another output format: sorted values only + +int totalElements = n_rows * n_columns; +size_t perElementSmemUsage = sizeof(InType) + sizeof(OutType); +size_t memAlignWidth = 256; + +// @ToDo: Figure out dynamic shared memory for block sort kernel - better for volta and beyond +// int currDevice = 0, smemLimit = 0; +// RAFT_CUDA_TRY(cudaGetDevice(&currDevice)); +// RAFT_CUDA_TRY(cudaDeviceGetAttribute(&smemLimit, cudaDevAttrMaxSharedMemoryPerBlock, +// currDevice)); size_t maxElementsForBlockSort = smemLimit / perElementSmemUsage; + +// for 48KB smem/block, can fit in 6144 4byte key-value pair +// assuming key-value sort for now - smem computation will change for value only sort +// dtype being size of key-value pair +std::map dtypeToColumnMap = {{4, 12288}, // short + short + {8, 12288}, // float/int + int/float + {12, 6144}, // double + int/float + {16, 6144}}; // double + double + +if (dtypeToColumnMap.count(perElementSmemUsage) != 0 && + n_columns <= dtypeToColumnMap[perElementSmemUsage]) { + // more elements per thread --> more register pressure + // 512(blockSize) * 8 elements per thread = 71 register / thread + + // instantiate some kernel combinations + if (n_columns <= 512) + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 128, 4, stream); + else if (n_columns > 512 && n_columns <= 1024) + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 128, 8, stream); + else if (n_columns > 1024 && n_columns <= 3072) + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 512, 6, stream); + else if (n_columns > 3072 && n_columns <= 4096) + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 512, 8, stream); + else if (n_columns > 4096 && n_columns <= 6144) + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 512, 12, stream); + else + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 1024, 12, stream); +} else if (n_columns <= (1 << 18) && n_rows > 1) { + // device Segmented radix sort + // 2^18 column cap to restrict size of workspace ~512 MB + // will give better perf than below deviceWide Sort for even larger dims + int numSegments = n_rows + 1; + + // need auxillary storage: cub sorting + keys (if user not passing) + + // staging for values out + segment partition + if (workspaceSize == 0 || !workspacePtr) { + OutType* tmpValIn = nullptr; + int* tmpOffsetBuffer = nullptr; + + // first call is to get size of workspace + RAFT_CUDA_TRY(cub::DeviceSegmentedRadixSort::SortPairs(workspacePtr, + workspaceSize, + in, + sortedKeys, + tmpValIn, + out, + totalElements, + numSegments, + tmpOffsetBuffer, + tmpOffsetBuffer + 1)); + bAllocWorkspace = true; + // more staging space for temp output of keys + if (!sortedKeys) + workspaceSize += raft::alignTo(sizeof(InType) * (size_t)totalElements, memAlignWidth); + + // value in KV pair need to be passed in, out buffer is separate + workspaceSize += raft::alignTo(sizeof(OutType) * (size_t)totalElements, memAlignWidth); + + // for segment offsets + workspaceSize += raft::alignTo(sizeof(int) * (size_t)numSegments, memAlignWidth); + } else { + size_t workspaceOffset = 0; + + if (!sortedKeys) { + sortedKeys = reinterpret_cast(workspacePtr); + workspaceOffset = raft::alignTo(sizeof(InType) * (size_t)totalElements, memAlignWidth); + workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); + } + + OutType* dValuesIn = reinterpret_cast(workspacePtr); + workspaceOffset = raft::alignTo(sizeof(OutType) * (size_t)totalElements, memAlignWidth); + workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); + + int* dSegmentOffsets = reinterpret_cast(workspacePtr); + workspaceOffset = raft::alignTo(sizeof(int) * (size_t)numSegments, memAlignWidth); + workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); + + // layout idx + RAFT_CUDA_TRY(layoutIdx(dValuesIn, n_rows, n_columns, stream)); + + // layout segment lengths - spread out column length + RAFT_CUDA_TRY(layoutSortOffset(dSegmentOffsets, n_columns, numSegments, stream)); + + RAFT_CUDA_TRY(cub::DeviceSegmentedRadixSort::SortPairs(workspacePtr, + workspaceSize, + in, + sortedKeys, + dValuesIn, + out, + totalElements, + numSegments, + dSegmentOffsets, + dSegmentOffsets + 1, + 0, + sizeof(InType) * 8, + stream)); + } +} else { + // batched per row device wide sort + if (workspaceSize == 0 || !workspacePtr) { + OutType* tmpValIn = nullptr; + + // first call is to get size of workspace + RAFT_CUDA_TRY(cub::DeviceRadixSort::SortPairs( + workspacePtr, workspaceSize, in, sortedKeys, tmpValIn, out, n_columns)); + bAllocWorkspace = true; + + if (!sortedKeys) + workspaceSize += raft::alignTo(sizeof(InType) * (size_t)n_columns, memAlignWidth); + + workspaceSize += raft::alignTo(sizeof(OutType) * (size_t)n_columns, memAlignWidth); + } else { + size_t workspaceOffset = 0; + bool userKeyOutputBuffer = true; + + if (!sortedKeys) { + userKeyOutputBuffer = false; + sortedKeys = reinterpret_cast(workspacePtr); + workspaceOffset = raft::alignTo(sizeof(InType) * (size_t)n_columns, memAlignWidth); + workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); + } + + OutType* dValuesIn = reinterpret_cast(workspacePtr); + workspaceOffset = raft::alignTo(sizeof(OutType) * (size_t)n_columns, memAlignWidth); + workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); + + // layout idx + RAFT_CUDA_TRY(layoutIdx(dValuesIn, 1, n_columns, stream)); + + for (int i = 0; i < n_rows; i++) { + InType* rowIn = + reinterpret_cast((size_t)in + (i * sizeof(InType) * (size_t)n_columns)); + OutType* rowOut = + reinterpret_cast((size_t)out + (i * sizeof(OutType) * (size_t)n_columns)); + + RAFT_CUDA_TRY(cub::DeviceRadixSort::SortPairs( + workspacePtr, workspaceSize, rowIn, sortedKeys, dValuesIn, rowOut, n_columns)); + + if (userKeyOutputBuffer) + sortedKeys = + reinterpret_cast((size_t)sortedKeys + sizeof(InType) * (size_t)n_columns); + } + } +} +} +}; // end namespace detail +}; // end namespace matrix +}; // end namespace raft diff --git a/cpp/include/raft/stats/accuracy.hpp b/cpp/include/raft/stats/accuracy.hpp new file mode 100644 index 0000000000..2d4d92131d --- /dev/null +++ b/cpp/include/raft/stats/accuracy.hpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft { +namespace stats { + +/** + * @brief Compute accuracy of predictions. Useful for classification. + * @tparam math_t: data type for predictions (e.g., int for classification) + * @param[in] predictions: array of predictions (GPU pointer). + * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). + * @param[in] n: number of elements in each of predictions, ref_predictions. + * @param[in] stream: cuda stream. + * @return: Accuracy score in [0, 1]; higher is better. + */ +template +float accuracy(const math_t* predictions, + const math_t* ref_predictions, + int n, + cudaStream_t stream) { + return detail::accuracy_score(predictions, ref_predictions, n, stream); +} + +} // namespace stats +} // namespace raft diff --git a/cpp/include/raft/stats/adjusted_rand_index.hpp b/cpp/include/raft/stats/adjusted_rand_index.hpp new file mode 100644 index 0000000000..be1ee54ad3 --- /dev/null +++ b/cpp/include/raft/stats/adjusted_rand_index.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file adjusted_rand_index.cuh + * @brief The adjusted Rand index is the corrected-for-chance version of the Rand index. + * Such a correction for chance establishes a baseline by using the expected similarity + * of all pair-wise comparisons between clusterings specified by a random model. + */ + +#pragma once + +#include + +namespace raft { +namespace stats { + + +/** + * @brief Function to calculate Adjusted RandIndex as described + * here + * @tparam T data-type for input label arrays + * @tparam MathT integral data-type used for computing n-choose-r + * @param firstClusterArray: the array of classes + * @param secondClusterArray: the array of classes + * @param size: the size of the data points of type int + * @param stream: the cudaStream object + */ +template +double adjusted_rand_index(const T* firstClusterArray, + const T* secondClusterArray, + int size, + cudaStream_t stream) { + return detail::compute_adjusted_rand_index(firstClusterArray, secondClusterArray, size, stream); +} + +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/common.hpp b/cpp/include/raft/stats/common.hpp index 765f07a012..0545345698 100644 --- a/cpp/include/raft/stats/common.hpp +++ b/cpp/include/raft/stats/common.hpp @@ -63,5 +63,10 @@ enum HistType { /** decide at runtime the best algo for the given inputs */ HistTypeAuto }; + +/// Supported types of information criteria +enum IC_Type { AIC, AICc, BIC }; + + }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/completeness_score.hpp b/cpp/include/raft/stats/completeness_score.hpp new file mode 100644 index 0000000000..96cd217259 --- /dev/null +++ b/cpp/include/raft/stats/completeness_score.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft { +namespace stats { + +/** + * @brief Function to calculate the completeness score between two clusters + * + * @param truthClusterArray: the array of truth classes of type T + * @param predClusterArray: the array of predicted classes of type T + * @param size: the size of the data points of type int + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + * @param stream: the cudaStream object + */ +template +double completeness_score(const T* truthClusterArray, + const T* predClusterArray, + int size, + T lowerLabelRange, + T upperLabelRange, + cudaStream_t stream) { + return detail::completeness_score(truthClusterArray, predClusterArray, size, + lowerLabelRange, upperLabelRange, stream); +} + +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/contingency_matrix.hpp b/cpp/include/raft/stats/contingency_matrix.hpp new file mode 100644 index 0000000000..c1e8d5ff2d --- /dev/null +++ b/cpp/include/raft/stats/contingency_matrix.hpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft { +namespace stats { + + +/** + * @brief use this to allocate output matrix size + * size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int) + * @param groundTruth: device 1-d array for ground truth (num of rows) + * @param nSamples: number of elements in input array + * @param stream: cuda stream for execution + * @param minLabel: [out] calculated min value in input array + * @param maxLabel: [out] calculated max value in input array + */ +template +void getInputClassCardinality( + const T* groundTruth, const int nSamples, cudaStream_t stream, T& minLabel, T& maxLabel) +{ + detail::getInputClassCardinality(groundTruth, nSamples, stream, minLabel, maxLabel); +} + +/** + * @brief Calculate workspace size for running contingency matrix calculations + * @tparam T label type + * @tparam OutT output matrix type + * @param nSamples: number of elements in input array + * @param groundTruth: device 1-d array for ground truth (num of rows) + * @param stream: cuda stream for execution + * @param minLabel: Optional, min value in input array + * @param maxLabel: Optional, max value in input array + */ +template +size_t getContingencyMatrixWorkspaceSize(int nSamples, + const T* groundTruth, + cudaStream_t stream, + T minLabel = std::numeric_limits::max(), + T maxLabel = std::numeric_limits::max()) +{ + return detail::getContingencyMatrixWorkspaceSize(nSamples, groundTruth, stream, minLabel, maxLabel); +} + +/** + * @brief contruct contingency matrix given input ground truth and prediction + * labels. Users should call function getInputClassCardinality to find + * and allocate memory for output. Similarly workspace requirements + * should be checked using function getContingencyMatrixWorkspaceSize + * @tparam T label type + * @tparam OutT output matrix type + * @param groundTruth: device 1-d array for ground truth (num of rows) + * @param predictedLabel: device 1-d array for prediction (num of columns) + * @param nSamples: number of elements in input array + * @param outMat: output buffer for contingecy matrix + * @param stream: cuda stream for execution + * @param workspace: Optional, workspace memory allocation + * @param workspaceSize: Optional, size of workspace memory + * @param minLabel: Optional, min value in input ground truth array + * @param maxLabel: Optional, max value in input ground truth array + */ +template +void contingencyMatrix(const T* groundTruth, + const T* predictedLabel, + int nSamples, + OutT* outMat, + cudaStream_t stream, + void* workspace = nullptr, + size_t workspaceSize = 0, + T minLabel = std::numeric_limits::max(), + T maxLabel = std::numeric_limits::max()) +{ + detail::contingencyMatrix(groundTruth, predictedLabel, nSamples, outMat, stream, workspace, workspaceSize, minLabel, maxLabel); +} + +}; // namespace stats +}; // namespace raft diff --git a/cpp/include/raft/stats/detail/adjusted_rand_index.cuh b/cpp/include/raft/stats/detail/adjusted_rand_index.cuh new file mode 100644 index 0000000000..03ffac6377 --- /dev/null +++ b/cpp/include/raft/stats/detail/adjusted_rand_index.cuh @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file adjusted_rand_index.cuh + * @brief The adjusted Rand index is the corrected-for-chance version of the Rand index. + * Such a correction for chance establishes a baseline by using the expected similarity + * of all pair-wise comparisons between clusterings specified by a random model. + */ + +#pragma once + +#include "contingencyMatrix.cuh" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { +namespace detail { + +/** + * @brief Lambda to calculate the number of unordered pairs in a given input + * + * @tparam Type: Data type of the input + * @param in: the input to the functional mapping + * @param i: the indexing(not used in this case) + */ +template +struct nCTwo { + HDI Type operator()(Type in, int i = 0) + { + return in % 2 ? ((in - 1) >> 1) * in : (in >> 1) * (in - 1); + } +}; + +template +struct Binner { + Binner(DataT minL) : minLabel(minL) {} + + DI int operator()(DataT val, IdxT row, IdxT col) { return int(val - minLabel); } + + private: + DataT minLabel; +}; // struct Binner + +/** + * @brief Function to count the number of unique elements in the input array + * + * @tparam T data-type for input arrays + * + * @param[in] arr input array [on device] [len = size] + * @param[in] size the size of the input array + * @param[out] minLabel the lower bound of the range of labels + * @param[out] maxLabel the upper bound of the range of labels + * @param[in] stream cuda stream + * + * @return the number of unique elements in the array + */ +template +int countUnique(const T* arr, int size, T& minLabel, T& maxLabel, cudaStream_t stream) +{ + auto ptr = thrust::device_pointer_cast(arr); + auto minmax = thrust::minmax_element(thrust::cuda::par.on(stream), ptr, ptr + size); + minLabel = *minmax.first; + maxLabel = *minmax.second; + auto totalLabels = int(maxLabel - minLabel + 1); + rmm::device_uvector labelCounts(totalLabels, stream); + rmm::device_scalar nUniq(stream); + raft::stats::histogram( + raft::stats::HistTypeAuto, + labelCounts.data(), + totalLabels, + arr, + size, + 1, + stream, + [minLabel] __device__(T val, int row, int col) { return int(val - minLabel); }); + raft::linalg::mapThenSumReduce( + nUniq.data(), + totalLabels, + [] __device__(const T& val) { return val != 0; }, + stream, + labelCounts.data()); + auto numUniques = nUniq.value(stream); + return numUniques; +} + +/** + * @brief Function to calculate Adjusted RandIndex as described + * here + * @tparam T data-type for input label arrays + * @tparam MathT integral data-type used for computing n-choose-r + * @param firstClusterArray: the array of classes + * @param secondClusterArray: the array of classes + * @param size: the size of the data points of type int + * @param stream: the cudaStream object + */ +template +double compute_adjusted_rand_index(const T* firstClusterArray, + const T* secondClusterArray, + int size, + cudaStream_t stream) +{ + ASSERT(size >= 2, "Rand Index for size less than 2 not defined!"); + T minFirst, maxFirst, minSecond, maxSecond; + auto nUniqFirst = countUnique(firstClusterArray, size, minFirst, maxFirst, stream); + auto nUniqSecond = countUnique(secondClusterArray, size, minSecond, maxSecond, stream); + auto lowerLabelRange = std::min(minFirst, minSecond); + auto upperLabelRange = std::max(maxFirst, maxSecond); + auto nClasses = upperLabelRange - lowerLabelRange + 1; + // degenerate case of single cluster or clusters each with just one element + if (nUniqFirst == nUniqSecond) { + if (nUniqFirst == 1 || nUniqFirst == size) return 1.0; + } + auto nUniqClasses = MathT(nClasses); + rmm::device_uvector dContingencyMatrix(nUniqClasses * nUniqClasses, stream); + RAFT_CUDA_TRY(cudaMemsetAsync( + dContingencyMatrix.data(), 0, nUniqClasses * nUniqClasses * sizeof(MathT), stream)); + auto workspaceSz = getContingencyMatrixWorkspaceSize( + size, firstClusterArray, stream, lowerLabelRange, upperLabelRange); + rmm::device_uvector workspaceBuff(workspaceSz, stream); + contingencyMatrix(firstClusterArray, + secondClusterArray, + size, + dContingencyMatrix.data(), + stream, + workspaceBuff.data(), + workspaceSz, + lowerLabelRange, + upperLabelRange); + rmm::device_uvector a(nUniqClasses, stream); + rmm::device_uvector b(nUniqClasses, stream); + rmm::device_scalar d_aCTwoSum(stream); + rmm::device_scalar d_bCTwoSum(stream); + rmm::device_scalar d_nChooseTwoSum(stream); + MathT h_aCTwoSum, h_bCTwoSum, h_nChooseTwoSum; + RAFT_CUDA_TRY(cudaMemsetAsync(a.data(), 0, nUniqClasses * sizeof(MathT), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(b.data(), 0, nUniqClasses * sizeof(MathT), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(d_aCTwoSum.data(), 0, sizeof(MathT), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(d_bCTwoSum.data(), 0, sizeof(MathT), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(d_nChooseTwoSum.data(), 0, sizeof(MathT), stream)); + // calculating the sum of NijC2 + raft::linalg::mapThenSumReduce>(d_nChooseTwoSum.data(), + nUniqClasses * nUniqClasses, + nCTwo(), + stream, + dContingencyMatrix.data(), + dContingencyMatrix.data()); + // calculating the row-wise sums + raft::linalg::reduce( + a.data(), dContingencyMatrix.data(), nUniqClasses, nUniqClasses, 0, true, true, stream); + // calculating the column-wise sums + raft::linalg::reduce( + b.data(), dContingencyMatrix.data(), nUniqClasses, nUniqClasses, 0, true, false, stream); + // calculating the sum of number of unordered pairs for every element in a + raft::linalg::mapThenSumReduce>( + d_aCTwoSum.data(), nUniqClasses, nCTwo(), stream, a.data(), a.data()); + // calculating the sum of number of unordered pairs for every element of b + raft::linalg::mapThenSumReduce>( + d_bCTwoSum.data(), nUniqClasses, nCTwo(), stream, b.data(), b.data()); + // updating in the host memory + raft::update_host(&h_nChooseTwoSum, d_nChooseTwoSum.data(), 1, stream); + raft::update_host(&h_aCTwoSum, d_aCTwoSum.data(), 1, stream); + raft::update_host(&h_bCTwoSum, d_bCTwoSum.data(), 1, stream); + // calculating the ARI + auto nChooseTwo = double(size) * double(size - 1) / 2.0; + auto expectedIndex = double(h_aCTwoSum) * double(h_bCTwoSum) / double(nChooseTwo); + auto maxIndex = (double(h_bCTwoSum) + double(h_aCTwoSum)) / 2.0; + auto index = double(h_nChooseTwoSum); + if (maxIndex - expectedIndex) + return (index - expectedIndex) / (maxIndex - expectedIndex); + else + return 0; +} + +}; // end namespace detail +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/detail/batched/information_criterion.cuh b/cpp/include/raft/stats/detail/batched/information_criterion.cuh new file mode 100644 index 0000000000..3cc9b09403 --- /dev/null +++ b/cpp/include/raft/stats/detail/batched/information_criterion.cuh @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include +#include + +#include + +namespace raft { +namespace stats { +namespace batched { +namespace detail { + +/** + * Compute the given type of information criterion + * + * @note: it is safe to do the computation in-place (i.e give same pointer + * as input and output) + * + * @param[out] d_ic Information criterion to be returned for each + * series (device) + * @param[in] d_loglikelihood Log-likelihood for each series (device) + * @param[in] ic_type Type of criterion to compute. See IC_Type + * @param[in] n_params Number of parameters in the model + * @param[in] batch_size Number of series in the batch + * @param[in] n_samples Number of samples in each series + * @param[in] stream CUDA stream + */ + template + void information_criterion(ScalarT *d_ic, + const ScalarT *d_loglikelihood, + IC_Type ic_type, + IdxT n_params, + IdxT batch_size, + IdxT n_samples, + cudaStream_t stream) { + ScalarT ic_base{}; + ScalarT N = static_cast(n_params); + ScalarT T = static_cast(n_samples); + switch (ic_type) { + case AIC: + ic_base = (ScalarT) 2.0 * N; + break; + case AICc: + ic_base = (ScalarT) 2.0 * (N + (N * (N + (ScalarT) 1.0)) / (T - N - (ScalarT) 1.0)); + break; + case BIC: + ic_base = std::log(T) * N; + break; + } + /* Compute information criterion from log-likelihood and base term */ + raft::linalg::unaryOp( + d_ic, + d_loglikelihood, + batch_size, + [=] __device__(ScalarT loglike) { return ic_base - (ScalarT) 2.0 * loglike; }, + stream); + } + +} // namespace detail +} // namespace batched +} // namespace stats +} // namespace raft diff --git a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh new file mode 100644 index 0000000000..bc717f3235 --- /dev/null +++ b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../silhouette_score.cuh" +#include + +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { +namespace batched { +namespace detail { + +/** + * This kernel initializes matrix b (n_rows * n_labels) + * For each label that the corresponding row is not a part of is initialized as 0 + * If the corresponding row is the only sample in its label, again 0 + * Only if the there are > 1 samples in the label, row is initialized to max + */ +template +__global__ void fill_b_kernel(value_t* b, + const label_idx* y, + value_idx n_rows, + label_idx n_labels, + const value_idx* cluster_counts) +{ + value_idx idx = threadIdx.x + blockIdx.x * blockDim.x; + label_idx idy = threadIdx.y + blockIdx.y * blockDim.y; + + if (idx >= n_rows || idy >= n_labels) { return; } + + auto row_cluster = y[idx]; + + auto col_cluster_count = cluster_counts[idy]; + + // b for own cluster should be max value + // so that it does not interfere with min operator + // b is also max if col cluster count is 0 + // however, b is 0 if self cluster count is 1 + if (row_cluster == idy || col_cluster_count == 0) { + if (cluster_counts[row_cluster] == 1) { + b[idx * n_labels + idy] = 0; + } else { + b[idx * n_labels + idy] = std::numeric_limits::max(); + } + } else { + b[idx * n_labels + idy] = 0; + } +} + +/** + * This kernel does an elementwise sweep of chunked pairwise distance matrix + * By knowing the offsets of the chunked pairwise distance matrix in the + * global pairwise distance matrix, we are able to calculate + * intermediate values of a and b for the rows and columns present in the + * current chunked pairwise distance matrix. + */ +template +__global__ void compute_chunked_a_b_kernel(value_t* a, + value_t* b, + value_idx row_offset, + value_idx col_offset, + const label_idx* y, + label_idx n_labels, + const value_idx* cluster_counts, + const value_t* distances, + value_idx dist_rows, + value_idx dist_cols) +{ + value_idx row_id = threadIdx.x + blockIdx.x * blockDim.x; + value_idx col_id = threadIdx.y + blockIdx.y * blockDim.y; + + // these are global offsets of current element + // in the full pairwise distance matrix + value_idx pw_row_id = row_id + row_offset; + value_idx pw_col_id = col_id + col_offset; + + if (row_id >= dist_rows || col_id >= dist_cols || pw_row_id == pw_col_id) { return; } + + auto row_cluster = y[pw_row_id]; + if (cluster_counts[row_cluster] == 1) { return; } + + auto col_cluster = y[pw_col_id]; + auto col_cluster_counts = cluster_counts[col_cluster]; + + if (col_cluster == row_cluster) { + atomicAdd(&a[pw_row_id], distances[row_id * dist_cols + col_id] / (col_cluster_counts - 1)); + } else { + atomicAdd(&b[pw_row_id * n_labels + col_cluster], + distances[row_id * dist_cols + col_id] / col_cluster_counts); + } +} + +template +rmm::device_uvector get_cluster_counts(const raft::handle_t& handle, + label_idx* y, + value_idx& n_rows, + label_idx& n_labels) +{ + auto stream = handle.get_stream(); + + rmm::device_uvector cluster_counts(n_labels, stream); + + rmm::device_uvector workspace(1, stream); + + raft::stats::detail::countLabels(y, cluster_counts.data(), n_rows, n_labels, workspace, stream); + + return cluster_counts; +} + +template +rmm::device_uvector get_pairwise_distance(const raft::handle_t& handle, + value_t* left_begin, + value_t* right_begin, + value_idx& n_left_rows, + value_idx& n_right_rows, + value_idx& n_cols, + raft::distance::DistanceType metric, + cudaStream_t stream) +{ + rmm::device_uvector distances(n_left_rows * n_right_rows, stream); + + raft::distance::pairwise_distance( + handle, left_begin, right_begin, distances.data(), n_left_rows, n_right_rows, n_cols, metric); + + return distances; +} + +template +void compute_chunked_a_b(const raft::handle_t& handle, + value_t* a, + value_t* b, + value_idx& row_offset, + value_idx& col_offset, + const label_idx* y, + label_idx& n_labels, + const value_idx* cluster_counts, + const value_t* distances, + value_idx& dist_rows, + value_idx& dist_cols, + cudaStream_t stream) +{ + dim3 block_size(std::min(dist_rows, 32), std::min(dist_cols, 32)); + dim3 grid_size(raft::ceildiv(dist_rows, (value_idx)block_size.x), + raft::ceildiv(dist_cols, (value_idx)block_size.y)); + + detail::compute_chunked_a_b_kernel<<>>( + a, b, row_offset, col_offset, y, n_labels, cluster_counts, distances, dist_rows, dist_cols); +} + +template +value_t silhouette_score( + const raft::handle_t& handle, + value_t* X, + value_idx n_rows, + value_idx n_cols, + label_idx* y, + label_idx n_labels, + value_t* scores, + value_idx chunk, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + ASSERT(n_labels >= 2 && n_labels <= (n_rows - 1), + "silhouette Score not defined for the given number of labels!"); + + rmm::device_uvector cluster_counts = get_cluster_counts(handle, y, n_rows, n_labels); + + auto stream = handle.get_stream(); + auto policy = handle.get_thrust_policy(); + + auto b_size = n_rows * n_labels; + + value_t *a_ptr, *b_ptr; + rmm::device_uvector a(0, stream); + rmm::device_uvector b(b_size, stream); + + b_ptr = b.data(); + + // since a and silhouette score per sample are same size, reusing + if (scores == nullptr || scores == NULL) { + a.resize(n_rows, stream); + a_ptr = a.data(); + } else { + a_ptr = scores; + } + + thrust::fill(policy, a_ptr, a_ptr + n_rows, 0); + + dim3 block_size(std::min(n_rows, 32), std::min(n_labels, 32)); + dim3 grid_size(raft::ceildiv(n_rows, (value_idx)block_size.x), + raft::ceildiv(n_labels, (label_idx)block_size.y)); + detail::fill_b_kernel<<>>( + b_ptr, y, n_rows, n_labels, cluster_counts.data()); + + handle.wait_stream_pool_on_stream(); + + auto n_iters = 0; + + for (value_idx i = 0; i < n_rows; i += chunk) { + for (value_idx j = 0; j < n_rows; j += chunk) { + ++n_iters; + + auto chunk_stream = handle.get_next_usable_stream(i + chunk * j); + + auto* left_begin = X + (i * n_cols); + auto* right_begin = X + (j * n_cols); + + auto n_left_rows = (i + chunk) < n_rows ? chunk : (n_rows - i); + auto n_right_rows = (j + chunk) < n_rows ? chunk : (n_rows - j); + + rmm::device_uvector distances = get_pairwise_distance( + handle, left_begin, right_begin, n_left_rows, n_right_rows, n_cols, metric, chunk_stream); + + compute_chunked_a_b(handle, + a_ptr, + b_ptr, + i, + j, + y, + n_labels, + cluster_counts.data(), + distances.data(), + n_left_rows, + n_right_rows, + chunk_stream); + } + } + + handle.sync_stream_pool(); + + // calculating row-wise minimum in b + // this prim only supports int indices for now + raft::linalg:: + reduce, raft::stats::detail::MinOp>( + b_ptr, + b_ptr, + n_labels, + n_rows, + std::numeric_limits::max(), + true, + true, + stream, + false, + raft::Nop(), + raft::stats::detail::MinOp()); + + // calculating the silhouette score per sample + raft::linalg::binaryOp, value_t, value_idx>( + a_ptr, a_ptr, b_ptr, n_rows, raft::stats::detail::SilOp(), stream); + + return thrust::reduce(policy, a_ptr, a_ptr + n_rows, value_t(0)) / n_rows; +} + +} // namespace detail +} // namespace batched +} // namespace stats +} // namespace raft diff --git a/cpp/include/raft/stats/detail/completeness_score.cuh b/cpp/include/raft/stats/detail/completeness_score.cuh new file mode 100644 index 0000000000..1ddd4ffc4c --- /dev/null +++ b/cpp/include/raft/stats/detail/completeness_score.cuh @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file completeness_score.cuh + * + * @brief A clustering result satisfies completeness if all the data points + * that are members of a given class are elements of the same cluster. + */ + +#pragma once + +#include +#include + +namespace raft { +namespace stats { +namespace detail { + +/** + * @brief Function to calculate the completeness score between two clusters + * + * @param truthClusterArray: the array of truth classes of type T + * @param predClusterArray: the array of predicted classes of type T + * @param size: the size of the data points of type int + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + * @param stream: the cudaStream object + */ +template +double completeness_score(const T* truthClusterArray, + const T* predClusterArray, + int size, + T lowerLabelRange, + T upperLabelRange, + cudaStream_t stream) +{ + if (size == 0) return 1.0; + + double computedMI, computedEntropy; + + computedMI = raft::stats::mutual_info_score( + truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); + computedEntropy = + raft::stats::entropy(predClusterArray, size, lowerLabelRange, upperLabelRange, stream); + + double completeness; + + if (computedEntropy) { + completeness = computedMI / computedEntropy; + } else + completeness = 1.0; + + return completeness; +} + +}; // end namespace detail +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/detail/contingencyMatrix.cuh b/cpp/include/raft/stats/detail/contingencyMatrix.cuh new file mode 100644 index 0000000000..61a1cd0851 --- /dev/null +++ b/cpp/include/raft/stats/detail/contingencyMatrix.cuh @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +#include + +#include + +namespace raft { +namespace stats { +namespace detail { +typedef enum { + IMPL_NONE, + SMEM_ATOMICS, + GLOBAL_ATOMICS, + SORT_AND_GATOMICS +} ContingencyMatrixImplType; + +template +__global__ void devConstructContingencyMatrix(const T* groundTruth, + const T* predicted, + int nSamples, + OutT* outMat, + int outIdxOffset, + int outMatWidth) +{ + int elementId = threadIdx.x + blockDim.x * blockIdx.x; + if (elementId < nSamples) { + T gt = groundTruth[elementId]; + T pd = predicted[elementId]; + auto outputIdx = (gt - outIdxOffset) * outMatWidth + pd - outIdxOffset; + raft::myAtomicAdd(outMat + outputIdx, OutT(1)); + } +} + +template +void computeCMatWAtomics(const T* groundTruth, + const T* predictedLabel, + int nSamples, + OutT* outMat, + int outIdxOffset, + int outDimN, + cudaStream_t stream) +{ + RAFT_CUDA_TRY( + cudaFuncSetCacheConfig(devConstructContingencyMatrix, cudaFuncCachePreferL1)); + static const int block = 128; + auto grid = raft::ceildiv(nSamples, block); + devConstructContingencyMatrix<<>>( + groundTruth, predictedLabel, nSamples, outMat, outIdxOffset, outDimN); + RAFT_CUDA_TRY(cudaGetLastError()); +} + +template +__global__ void devConstructContingencyMatrixSmem(const T* groundTruth, + const T* predicted, + int nSamples, + OutT* outMat, + int outIdxOffset, + int outMatWidth) +{ + extern __shared__ char smem[]; + auto* sMemMatrix = reinterpret_cast(smem); + for (int smemIdx = threadIdx.x; smemIdx < outMatWidth * outMatWidth; smemIdx += blockDim.x) { + sMemMatrix[smemIdx] = 0; + } + __syncthreads(); + int elementId = threadIdx.x + blockDim.x * blockIdx.x; + if (elementId < nSamples) { + T gt = groundTruth[elementId]; + T pd = predicted[elementId]; + auto outputIdx = (gt - outIdxOffset) * outMatWidth + pd - outIdxOffset; + raft::myAtomicAdd(sMemMatrix + outputIdx, OutT(1)); + } + __syncthreads(); + for (int smemIdx = threadIdx.x; smemIdx < outMatWidth * outMatWidth; smemIdx += blockDim.x) { + raft::myAtomicAdd(outMat + smemIdx, sMemMatrix[smemIdx]); + } +} + +template +void computeCMatWSmemAtomics(const T* groundTruth, + const T* predictedLabel, + int nSamples, + OutT* outMat, + int outIdxOffset, + int outDimN, + cudaStream_t stream) +{ + static const int block = 128; + auto grid = raft::ceildiv(nSamples, block); + size_t smemSizePerBlock = outDimN * outDimN * sizeof(OutT); + devConstructContingencyMatrixSmem<<>>( + groundTruth, predictedLabel, nSamples, outMat, outIdxOffset, outDimN); + RAFT_CUDA_TRY(cudaGetLastError()); +} + +template +void contingencyMatrixWSort(const T* groundTruth, + const T* predictedLabel, + int nSamples, + OutT* outMat, + T minLabel, + T maxLabel, + void* workspace, + size_t workspaceSize, + cudaStream_t stream) +{ + T* outKeys = reinterpret_cast(workspace); + auto alignedBufferSz = raft::alignTo(nSamples * sizeof(T), 256); + T* outValue = reinterpret_cast((size_t)workspace + alignedBufferSz); + void* pWorkspaceCub = reinterpret_cast((size_t)workspace + 2 * alignedBufferSz); + auto bitsToSort = log2(maxLabel); + if (!raft::isPo2(maxLabel)) ++bitsToSort; + // we dont really need perfect sorting, should get by with some sort of + // binning-reordering operation + ///@todo: future work - explore "efficient" custom binning kernels vs cub sort + RAFT_CUDA_TRY(cub::DeviceRadixSort::SortPairs(pWorkspaceCub, + workspaceSize, + groundTruth, + outKeys, + predictedLabel, + outValue, + nSamples, + 0, + bitsToSort, + stream)); + auto outDimM_N = int(maxLabel - minLabel + 1); + computeCMatWAtomics(outKeys, outValue, nSamples, outMat, minLabel, outDimM_N, stream); +} + +template +ContingencyMatrixImplType getImplVersion(OutT outDimN) +{ + int currDevice = 0; + int l2CacheSize = 0; + // no way to query this from CUDA APIs, value for CC 7.0, 3.0 + int maxBlocksResidentPerSM = 16; + RAFT_CUDA_TRY(cudaGetDevice(&currDevice)); + RAFT_CUDA_TRY(cudaDeviceGetAttribute(&l2CacheSize, cudaDevAttrL2CacheSize, currDevice)); + auto maxSmemPerBlock = raft::getSharedMemPerBlock(); + ContingencyMatrixImplType implVersion = IMPL_NONE; + // keeping 8 block per SM to get good utilization + // can go higher but reduced L1 size degrades perf + OutT upperLimitSmemAtomics = + std::floor(std::sqrt(maxSmemPerBlock / (sizeof(OutT) * (maxBlocksResidentPerSM / 2)))); + OutT upperLimitL2Atomics = std::floor(std::sqrt(l2CacheSize / sizeof(OutT))); + if (outDimN <= upperLimitSmemAtomics) + implVersion = SMEM_ATOMICS; + else if (outDimN <= upperLimitL2Atomics) + implVersion = GLOBAL_ATOMICS; + else + implVersion = SORT_AND_GATOMICS; + return implVersion; +} + +/** + * @brief use this to allocate output matrix size + * size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int) + * @param groundTruth: device 1-d array for ground truth (num of rows) + * @param nSamples: number of elements in input array + * @param stream: cuda stream for execution + * @param minLabel: [out] calculated min value in input array + * @param maxLabel: [out] calculated max value in input array + */ +template +void getInputClassCardinality( + const T* groundTruth, const int nSamples, cudaStream_t stream, T& minLabel, T& maxLabel) +{ + thrust::device_ptr dTrueLabel = thrust::device_pointer_cast(groundTruth); + auto min_max = + thrust::minmax_element(thrust::cuda::par.on(stream), dTrueLabel, dTrueLabel + nSamples); + minLabel = *min_max.first; + maxLabel = *min_max.second; +} + +/** + * @brief Calculate workspace size for running contingency matrix calculations + * @tparam T label type + * @tparam OutT output matrix type + * @param nSamples: number of elements in input array + * @param groundTruth: device 1-d array for ground truth (num of rows) + * @param stream: cuda stream for execution + * @param minLabel: Optional, min value in input array + * @param maxLabel: Optional, max value in input array + */ +template +size_t getContingencyMatrixWorkspaceSize(int nSamples, + const T* groundTruth, + cudaStream_t stream, + T minLabel = std::numeric_limits::max(), + T maxLabel = std::numeric_limits::max()) +{ + size_t workspaceSize = 0; + // below is a redundant computation - can be avoided + if (minLabel == std::numeric_limits::max() || maxLabel == std::numeric_limits::max()) { + getInputClassCardinality(groundTruth, nSamples, stream, minLabel, maxLabel); + } + auto outDimN = OutT(maxLabel - minLabel + 1); + ContingencyMatrixImplType implVersion = getImplVersion(outDimN); + if (implVersion == SORT_AND_GATOMICS) { + void* pWorkspaceCub{}; + size_t tmpStorageBytes = 0; + // no-op pointers to get workspace size + T* pTmpUnused{}; + RAFT_CUDA_TRY(cub::DeviceRadixSort::SortPairs( + pWorkspaceCub, tmpStorageBytes, pTmpUnused, pTmpUnused, pTmpUnused, pTmpUnused, nSamples)); + auto tmpStagingMemorySize = raft::alignTo(nSamples * sizeof(T), 256); + tmpStagingMemorySize *= 2; + workspaceSize = tmpStagingMemorySize + tmpStorageBytes; + } + return workspaceSize; +} + +/** + * @brief contruct contingency matrix given input ground truth and prediction + * labels. Users should call function getInputClassCardinality to find + * and allocate memory for output. Similarly workspace requirements + * should be checked using function getContingencyMatrixWorkspaceSize + * @tparam T label type + * @tparam OutT output matrix type + * @param groundTruth: device 1-d array for ground truth (num of rows) + * @param predictedLabel: device 1-d array for prediction (num of columns) + * @param nSamples: number of elements in input array + * @param outMat: output buffer for contingecy matrix + * @param stream: cuda stream for execution + * @param workspace: Optional, workspace memory allocation + * @param workspaceSize: Optional, size of workspace memory + * @param minLabel: Optional, min value in input ground truth array + * @param maxLabel: Optional, max value in input ground truth array + */ +template +void contingencyMatrix(const T* groundTruth, + const T* predictedLabel, + int nSamples, + OutT* outMat, + cudaStream_t stream, + void* workspace = nullptr, + size_t workspaceSize = 0, + T minLabel = std::numeric_limits::max(), + T maxLabel = std::numeric_limits::max()) +{ + // assumptions: + // output is not at par with scikit learn - output will be square matrix + // always with numRows = numColumns = numOfClassesInTrueLabel + // it is also assumed that true labels are monotically increasing + // if for some reason groundTruth completely skips some labels + // eg: {0,1,2,5} instead of {0,1,2,3}. + // Output matrix will still have empty rows for label value {3,4} + // Users can use "make_monotonic" to convert their discontinuous input label + // range to a monotonically increasing one // + // this also serves as way to measure co-occurence/joint counts for NLP tasks which + // can be used to then compute pointwise mutual information and mutual information + if (minLabel == std::numeric_limits::max() || maxLabel == std::numeric_limits::max()) { + getInputClassCardinality(groundTruth, nSamples, stream, minLabel, maxLabel); + } + auto outDimM_N = OutT(maxLabel - minLabel + 1); + RAFT_CUDA_TRY(cudaMemsetAsync(outMat, 0, sizeof(OutT) * outDimM_N * outDimM_N, stream)); + ContingencyMatrixImplType implVersion = getImplVersion(outDimM_N); + switch (implVersion) { + case SMEM_ATOMICS: + // smem atomics and then single global mem atomics only works + // when all label count can fit in smem for a block + // helps when GLOBAL_ATOMICS performance blocked by atomic update + // serialization -when very less labels ~10 labels + computeCMatWSmemAtomics( + groundTruth, predictedLabel, nSamples, outMat, minLabel, outDimM_N, stream); + break; + case GLOBAL_ATOMICS: + // launch kernel - global atomic ops per (groundTruth,predictedValue) pair + computeCMatWAtomics( + groundTruth, predictedLabel, nSamples, outMat, minLabel, outDimM_N, stream); + break; + // more L2 thrashing if atomic OPs land in completely different mem + // segment - when more labels + case SORT_AND_GATOMICS: + contingencyMatrixWSort(groundTruth, + predictedLabel, + nSamples, + outMat, + minLabel, + maxLabel, + workspace, + workspaceSize, + stream); + break; + case IMPL_NONE: break; + } +} + +}; // namespace detail +}; // namespace stats +}; // namespace raft diff --git a/cpp/include/raft/stats/detail/dispersion.cuh b/cpp/include/raft/stats/detail/dispersion.cuh new file mode 100644 index 0000000000..c1d9376e05 --- /dev/null +++ b/cpp/include/raft/stats/detail/dispersion.cuh @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { +namespace detail { + +///@todo: ColsPerBlk has been tested only for 32! +template +__global__ void weightedMeanKernel(DataT* mu, const DataT* data, const IdxT* counts, IdxT D, IdxT N) +{ + constexpr int RowsPerBlkPerIter = TPB / ColsPerBlk; + IdxT thisColId = threadIdx.x % ColsPerBlk; + IdxT thisRowId = threadIdx.x / ColsPerBlk; + IdxT colId = thisColId + ((IdxT)blockIdx.y * ColsPerBlk); + IdxT rowId = thisRowId + ((IdxT)blockIdx.x * RowsPerBlkPerIter); + DataT thread_data = DataT(0); + const IdxT stride = RowsPerBlkPerIter * gridDim.x; + __shared__ DataT smu[ColsPerBlk]; + if (threadIdx.x < ColsPerBlk) smu[threadIdx.x] = DataT(0); + for (IdxT i = rowId; i < N; i += stride) { + thread_data += (colId < D) ? data[i * D + colId] * (DataT)counts[i] : DataT(0); + } + __syncthreads(); + raft::myAtomicAdd(smu + thisColId, thread_data); + __syncthreads(); + if (threadIdx.x < ColsPerBlk && colId < D) raft::myAtomicAdd(mu + colId, smu[thisColId]); +} + +template +__global__ void dispersionKernel(DataT* result, + const DataT* clusters, + const IdxT* clusterSizes, + const DataT* mu, + IdxT dim, + IdxT nClusters) +{ + IdxT tid = threadIdx.x + blockIdx.x * blockDim.x; + IdxT len = dim * nClusters; + IdxT stride = blockDim.x * gridDim.x; + DataT sum = DataT(0); + for (; tid < len; tid += stride) { + IdxT col = tid % dim; + IdxT row = tid / dim; + DataT diff = clusters[tid] - mu[col]; + sum += diff * diff * DataT(clusterSizes[row]); + } + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + __syncthreads(); + auto acc = BlockReduce(temp_storage).Sum(sum); + __syncthreads(); + if (threadIdx.x == 0) raft::myAtomicAdd(result, acc); +} + +/** + * @brief Compute cluster dispersion metric. This is very useful for + * automatically finding the 'k' (in kmeans) that improves this metric. + * @tparam DataT data type + * @tparam IdxT index type + * @tparam TPB threads block for kernels launched + * @param centroids the cluster centroids. This is assumed to be row-major + * and of dimension (nClusters x dim) + * @param clusterSizes number of points in the dataset which belong to each + * cluster. This is of length nClusters + * @param globalCentroid compute the global weighted centroid of all cluster + * centroids. This is of length dim. Pass a nullptr if this is not needed + * @param nClusters number of clusters + * @param nPoints number of points in the dataset + * @param dim dataset dimensionality + * @param stream cuda stream + * @return the cluster dispersion value + */ +template +DataT dispersion(const DataT* centroids, + const IdxT* clusterSizes, + DataT* globalCentroid, + IdxT nClusters, + IdxT nPoints, + IdxT dim, + cudaStream_t stream) +{ + static const int RowsPerThread = 4; + static const int ColsPerBlk = 32; + static const int RowsPerBlk = (TPB / ColsPerBlk) * RowsPerThread; + dim3 grid(raft::ceildiv(nPoints, (IdxT)RowsPerBlk), raft::ceildiv(dim, (IdxT)ColsPerBlk)); + rmm::device_uvector mean(0, stream); + rmm::device_uvector result(1, stream); + DataT* mu = globalCentroid; + if (globalCentroid == nullptr) { + mean.resize(dim, stream); + mu = mean.data(); + } + RAFT_CUDA_TRY(cudaMemsetAsync(mu, 0, sizeof(DataT) * dim, stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(result.data(), 0, sizeof(DataT), stream)); + weightedMeanKernel + <<>>(mu, centroids, clusterSizes, dim, nClusters); + RAFT_CUDA_TRY(cudaGetLastError()); + DataT ratio = DataT(1) / DataT(nPoints); + raft::linalg::scalarMultiply(mu, mu, ratio, dim, stream); + // finally, compute the dispersion + constexpr int ItemsPerThread = 4; + int nblks = raft::ceildiv(dim * nClusters, TPB * ItemsPerThread); + dispersionKernel + <<>>(result.data(), centroids, clusterSizes, mu, dim, nClusters); + RAFT_CUDA_TRY(cudaGetLastError()); + DataT h_result; + raft::update_host(&h_result, result.data(), 1, stream); + raft::interruptible::synchronize(stream); + return sqrt(h_result); +} + +} // end namespace detail +} // end namespace stats +} // end namespace raft diff --git a/cpp/include/raft/stats/detail/entropy.cuh b/cpp/include/raft/stats/detail/entropy.cuh new file mode 100644 index 0000000000..ce57a86693 --- /dev/null +++ b/cpp/include/raft/stats/detail/entropy.cuh @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file entropy.cuh + * @brief Calculates the entropy for a labeling in nats.(ie, uses natural logarithm for the + * calculations) + */ + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { +namespace detail { + + +/** + * @brief Lambda to calculate the entropy of a sample given its probability value + * + * @param p: the input to the functional mapping + * @param q: dummy param + */ + struct entropyOp { + HDI double operator()(double p, double q) + { + if (p) + return -1 * (p) * (log(p)); + else + return 0.0; + } + }; + + /** + * @brief function to calculate the bincounts of number of samples in every label + * + * @tparam LabelT: type of the labels + * @param labels: the pointer to the array containing labels for every data sample + * @param binCountArray: pointer to the 1D array that contains the count of samples per cluster + * @param nRows: number of data samples + * @param lowerLabelRange + * @param upperLabelRange + * @param workspace: device buffer containing workspace memory + * @param stream: the cuda stream where to launch this kernel + */ +template +void countLabels(const LabelT* labels, + double* binCountArray, + int nRows, + LabelT lowerLabelRange, + LabelT upperLabelRange, + rmm::device_uvector& workspace, + cudaStream_t stream) +{ + int num_levels = upperLabelRange - lowerLabelRange + 2; + LabelT lower_level = lowerLabelRange; + LabelT upper_level = upperLabelRange + 1; + size_t temp_storage_bytes = 0; + + RAFT_CUDA_TRY(cub::DeviceHistogram::HistogramEven(nullptr, + temp_storage_bytes, + labels, + binCountArray, + num_levels, + lower_level, + upper_level, + nRows, + stream)); + + workspace.resize(temp_storage_bytes, stream); + + RAFT_CUDA_TRY(cub::DeviceHistogram::HistogramEven(workspace.data(), + temp_storage_bytes, + labels, + binCountArray, + num_levels, + lower_level, + upper_level, + nRows, + stream)); +} + +/** + * @brief Function to calculate entropy + * more info on entropy + * + * @param clusterArray: the array of classes of type T + * @param size: the size of the data points of type int + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + * @param stream: the cudaStream object + * @return the entropy score + */ +template +double entropy(const T* clusterArray, + const int size, + const T lowerLabelRange, + const T upperLabelRange, + cudaStream_t stream) +{ + if (!size) return 1.0; + + T numUniqueClasses = upperLabelRange - lowerLabelRange + 1; + + // declaring, allocating and initializing memory for bincount array and entropy values + rmm::device_uvector prob(numUniqueClasses, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(prob.data(), 0, numUniqueClasses * sizeof(double), stream)); + rmm::device_scalar d_entropy(stream); + RAFT_CUDA_TRY(cudaMemsetAsync(d_entropy.data(), 0, sizeof(double), stream)); + + // workspace allocation + rmm::device_uvector workspace(1, stream); + + // calculating the bincounts and populating the prob array + countLabels(clusterArray, prob.data(), size, lowerLabelRange, upperLabelRange, workspace, stream); + + // scalar dividing by size + raft::linalg::divideScalar( + prob.data(), prob.data(), (double)size, numUniqueClasses, stream); + + // calculating the aggregate entropy + raft::linalg::mapThenSumReduce( + d_entropy.data(), numUniqueClasses, entropyOp(), stream, prob.data(), prob.data()); + + // updating in the host memory + double h_entropy; + raft::update_host(&h_entropy, d_entropy.data(), 1, stream); + + raft::interruptible::synchronize(stream); + + return h_entropy; +} + +}; // end namespace detail +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/detail/homogeneity_score.cuh b/cpp/include/raft/stats/detail/homogeneity_score.cuh new file mode 100644 index 0000000000..23198f910d --- /dev/null +++ b/cpp/include/raft/stats/detail/homogeneity_score.cuh @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file homogeneity_score.cuh + * + * @brief A clustering result satisfies homogeneity if all of its clusters + * contain only data points which are members of a single class. + */ + +#pragma once + +#include +#include +#include + +namespace raft { +namespace stats { +namespace detail { +/** + * @brief Function to calculate the homogeneity score between two clusters + * more info on mutual + * information + * @param truthClusterArray: the array of truth classes of type T + * @param predClusterArray: the array of predicted classes of type T + * @param size: the size of the data points of type int + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + * @param stream: the cudaStream object + */ +template +double homogeneity_score(const T* truthClusterArray, + const T* predClusterArray, + int size, + T lowerLabelRange, + T upperLabelRange, + cudaStream_t stream) +{ + if (size == 0) return 1.0; + + double computedMI, computedEntropy; + + computedMI = raft::stats::mutual_info_score( + truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); + computedEntropy = + raft::stats::entropy(truthClusterArray, size, lowerLabelRange, upperLabelRange, stream); + + double homogeneity; + + if (computedEntropy) { + homogeneity = computedMI / computedEntropy; + } else + homogeneity = 1.0; + + return homogeneity; +} + +}; // end namespace detail +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/detail/kl_divergence.cuh b/cpp/include/raft/stats/detail/kl_divergence.cuh new file mode 100644 index 0000000000..0638565207 --- /dev/null +++ b/cpp/include/raft/stats/detail/kl_divergence.cuh @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file kl_divergence.cuh + * @brief The KL divergence tells us how well the probability distribution Q AKA candidatePDF + * approximates the probability distribution P AKA modelPDF. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { +namespace detail { + +/** + * @brief the KL Diverence mapping function + * + * @tparam Type: Data type of the input + * @param modelPDF: the model probability density function of type DataT + * @param candidatePDF: the candidate probability density function of type DataT + */ +template +struct KLDOp { + HDI Type operator()(Type modelPDF, Type candidatePDF) + { + if (modelPDF == 0.0) + return 0; + + else + return modelPDF * (log(modelPDF) - log(candidatePDF)); + } +}; + + + +/** + * @brief Function to calculate KL Divergence + * more info on KL + * Divergence + * + * @tparam DataT: Data type of the input array + * @param modelPDF: the model array of probability density functions of type DataT + * @param candidatePDF: the candidate array of probability density functions of type DataT + * @param size: the size of the data points of type int + * @param stream: the cudaStream object + */ +template +DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, cudaStream_t stream) +{ + rmm::device_scalar d_KLDVal(stream); + RAFT_CUDA_TRY(cudaMemsetAsync(d_KLDVal.data(), 0, sizeof(DataT), stream)); + + raft::linalg::mapThenSumReduce, 256, const DataT*>( + d_KLDVal.data(), (size_t)size, KLDOp(), stream, modelPDF, candidatePDF); + + DataT h_KLDVal; + + raft::update_host(&h_KLDVal, d_KLDVal.data(), 1, stream); + + raft::interruptible::synchronize(stream); + + return h_KLDVal; +} + +}; // end namespace detail +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/detail/mutual_info_score.cuh b/cpp/include/raft/stats/detail/mutual_info_score.cuh new file mode 100644 index 0000000000..c39001dc38 --- /dev/null +++ b/cpp/include/raft/stats/detail/mutual_info_score.cuh @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file mutual_info_score.cuh + * @brief The Mutual Information is a measure of the similarity between two labels of + * the same data.This metric is independent of the absolute values of the labels: + * a permutation of the class or cluster label values won't change the + * score value in any way. + * This metric is furthermore symmetric.This can be useful to + * measure the agreement of two independent label assignments strategies + * on the same dataset when the real ground truth is not known. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { +namespace detail { + +/** + * @brief kernel to calculate the mutual info score + * @param dContingencyMatrix: the contingency matrix corresponding to the two clusters + * @param a: the row wise sum of the contingency matrix, which is also the bin counts of first + * cluster array + * @param b: the column wise sum of the contingency matrix, which is also the bin counts of second + * cluster array + * @param numUniqueClasses: number of unique classes + * @param size: the size of array a and b (size of the contingency matrix is (size x size)) + * @param d_MI: pointer to the device memory that stores the aggreggate mutual information + */ +template +__global__ void mutual_info_kernel(const int* dContingencyMatrix, + const int* a, + const int* b, + int numUniqueClasses, + int size, + double* d_MI) +{ + // calculating the indices of pairs of datapoints compared by the current thread + int j = threadIdx.x + blockIdx.x * blockDim.x; + int i = threadIdx.y + blockIdx.y * blockDim.y; + + // thread-local variable to count the mutual info + double localMI = 0.0; + + if (i < numUniqueClasses && j < numUniqueClasses && a[i] * b[j] != 0 && + dContingencyMatrix[i * numUniqueClasses + j] != 0) { + localMI += (double(dContingencyMatrix[i * numUniqueClasses + j])) * + (log(double(size) * double(dContingencyMatrix[i * numUniqueClasses + j])) - + log(double(a[i] * b[j]))); + } + + // specialize blockReduce for a 2D block of 1024 threads of type uint64_t + typedef cub::BlockReduce + BlockReduce; + + // Allocate shared memory for blockReduce + __shared__ typename BlockReduce::TempStorage temp_storage; + + // summing up thread-local counts specific to a block + localMI = BlockReduce(temp_storage).Sum(localMI); + __syncthreads(); + + // executed once per block + if (threadIdx.x == 0 && threadIdx.y == 0) { raft::myAtomicAdd(d_MI, localMI); } +} + +/** + * @brief Function to calculate the mutual information between two clusters + * more info on mutual information + * @param firstClusterArray: the array of classes of type T + * @param secondClusterArray: the array of classes of type T + * @param size: the size of the data points of type int + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + * @param stream: the cudaStream object + */ +template +double mutual_info_score(const T* firstClusterArray, + const T* secondClusterArray, + int size, + T lowerLabelRange, + T upperLabelRange, + cudaStream_t stream) +{ + int numUniqueClasses = upperLabelRange - lowerLabelRange + 1; + + // declaring, allocating and initializing memory for the contingency marix + rmm::device_uvector dContingencyMatrix(numUniqueClasses * numUniqueClasses, stream); + RAFT_CUDA_TRY(cudaMemsetAsync( + dContingencyMatrix.data(), 0, numUniqueClasses * numUniqueClasses * sizeof(int), stream)); + + // workspace allocation + size_t workspaceSz = raft::stats::getContingencyMatrixWorkspaceSize( + size, firstClusterArray, stream, lowerLabelRange, upperLabelRange); + rmm::device_uvector pWorkspace(workspaceSz, stream); + + // calculating the contingency matrix + raft::stats::contingencyMatrix(firstClusterArray, + secondClusterArray, + (int)size, + (int*)dContingencyMatrix.data(), + stream, + (void*)pWorkspace.data(), + workspaceSz, + lowerLabelRange, + upperLabelRange); + + // creating device buffers for all the parameters involved in ARI calculation + // device variables + rmm::device_uvector a(numUniqueClasses, stream); + rmm::device_uvector b(numUniqueClasses, stream); + rmm::device_scalar d_MI(stream); + + // host variables + double h_MI; + + // initializing device memory + RAFT_CUDA_TRY(cudaMemsetAsync(a.data(), 0, numUniqueClasses * sizeof(int), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(b.data(), 0, numUniqueClasses * sizeof(int), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(d_MI.data(), 0, sizeof(double), stream)); + + // calculating the row-wise sums + raft::linalg::reduce( + a.data(), dContingencyMatrix.data(), numUniqueClasses, numUniqueClasses, 0, true, true, stream); + + // calculating the column-wise sums + raft::linalg::reduce(b.data(), + dContingencyMatrix.data(), + numUniqueClasses, + numUniqueClasses, + 0, + true, + false, + stream); + + // kernel configuration + static const int BLOCK_DIM_Y = 16, BLOCK_DIM_X = 16; + dim3 numThreadsPerBlock(BLOCK_DIM_X, BLOCK_DIM_Y); + dim3 numBlocks(raft::ceildiv(numUniqueClasses, numThreadsPerBlock.x), + raft::ceildiv(numUniqueClasses, numThreadsPerBlock.y)); + + // calling the kernel + mutual_info_kernel<<>>( + dContingencyMatrix.data(), a.data(), b.data(), numUniqueClasses, size, d_MI.data()); + + // updating in the host memory + h_MI = d_MI.value(stream); + + raft::interruptible::synchronize(stream); + + return h_MI / size; +} + +}; // end namespace detail +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/detail/rand_index.cuh b/cpp/include/raft/stats/detail/rand_index.cuh new file mode 100644 index 0000000000..19f8e56121 --- /dev/null +++ b/cpp/include/raft/stats/detail/rand_index.cuh @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file rand_index.cuh + * @todo TODO(Ganesh Venkataramana): + *
+ * The below rand_index calculation implementation is a Brute force one that uses
+ (nElements*nElements) threads (2 dimensional grids and blocks)
+ * For small datasets, this will suffice; but for larger ones, work done by the threads increase
+ dramatically.
+ * A more mathematically intensive implementation that uses half the above threads can be done,
+ which will prove to be more efficient for larger datasets
+ * the idea is as follows:
+  * instead of 2D block and grid configuration with a total of (nElements*nElements) threads (where
+ each (i,j) through these threads represent an ordered pair selection of 2 data points), a 1D block
+ and grid configuration with a total of (nElements*(nElements))/2 threads (each thread index
+ represents an element part of the set of unordered pairwise selections from the dataset (nChoose2))
+  * In this setup, one has to generate a one-to-one mapping between this 1D thread index (for each
+ kernel) and the unordered pair of chosen datapoints.
+  * More specifically, thread0-> {dataPoint1, dataPoint0}, thread1-> {dataPoint2, dataPoint0},
+ thread2-> {dataPoint2, dataPoint1} ... thread((nElements*(nElements))/2 - 1)->
+ {dataPoint(nElements-1),dataPoint(nElements-2)}
+  * say ,
+     * threadNum: thread index | threadNum = threadIdx.x + BlockIdx.x*BlockDim.x,
+     * i : index of dataPoint i
+     * j : index of dataPoint j
+  * then the mapping is as follows:
+     * i = ceil((-1 + sqrt(1 + 8*(1 + threadNum)))/2) = floor((1 + sqrt(1 + 8*threadNum))/2)
+     * j = threadNum - i(i-1)/2
+  * after obtaining the the pair of datapoints, calculation of rand index is the same as done in
+ this implementation
+ * Caveat: since the kernel implementation involves use of emulated sqrt() operations:
+  * the number of instructions executed per kernel is ~40-50 times
+  * as the O(nElements*nElements) increase beyond the floating point limit, floating point
+ inaccuracies occur, and hence the above floor(...) !=  ceil(...)
+ * 
+ */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { +namespace detail { + +/** + * @brief kernel to calculate the values of a and b + * @param firstClusterArray: the array of classes of type T + * @param secondClusterArray: the array of classes of type T + * @param size: the size of the data points + * @param a: number of pairs of points that both the clusters have classified the same + * @param b: number of pairs of points that both the clusters have classified differently + */ +template +__global__ void computeTheNumerator( + const T* firstClusterArray, const T* secondClusterArray, uint64_t size, uint64_t* a, uint64_t* b) +{ + // calculating the indices of pairs of datapoints compared by the current thread + uint64_t j = threadIdx.x + blockIdx.x * blockDim.x; + uint64_t i = threadIdx.y + blockIdx.y * blockDim.y; + + // thread-local variables to count a and b + uint64_t myA = 0, myB = 0; + + if (i < size && j < size && j < i) { + // checking if the pair have been classified the same by both the clusters + if (firstClusterArray[i] == firstClusterArray[j] && + secondClusterArray[i] == secondClusterArray[j]) { + ++myA; + } + + // checking if the pair have been classified differently by both the clusters + else if (firstClusterArray[i] != firstClusterArray[j] && + secondClusterArray[i] != secondClusterArray[j]) { + ++myB; + } + } + + // specialize blockReduce for a 2D block of 1024 threads of type uint64_t + typedef cub::BlockReduce + BlockReduce; + + // Allocate shared memory for blockReduce + __shared__ typename BlockReduce::TempStorage temp_storage; + + // summing up thread-local counts specific to a block + myA = BlockReduce(temp_storage).Sum(myA); + __syncthreads(); + myB = BlockReduce(temp_storage).Sum(myB); + __syncthreads(); + + // executed once per block + if (threadIdx.x == 0 && threadIdx.y == 0) { + raft::myAtomicAdd((unsigned long long int*)a, myA); + raft::myAtomicAdd((unsigned long long int*)b, myB); + } +} + +/** + * @brief Function to calculate RandIndex + * more info on rand index + * @param firstClusterArray: the array of classes of type T + * @param secondClusterArray: the array of classes of type T + * @param size: the size of the data points of type uint64_t + * @param stream: the cudaStream object + */ +template +double compute_rand_index(T* firstClusterArray, + T* secondClusterArray, + uint64_t size, + cudaStream_t stream) +{ + // rand index for size less than 2 is not defined + ASSERT(size >= 2, "Rand Index for size less than 2 not defined!"); + + // allocating and initializing memory for a and b in the GPU + rmm::device_uvector arr_buf(2, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(arr_buf.data(), 0, 2 * sizeof(uint64_t), stream)); + + // kernel configuration + static const int BLOCK_DIM_Y = 16, BLOCK_DIM_X = 16; + dim3 numThreadsPerBlock(BLOCK_DIM_X, BLOCK_DIM_Y); + dim3 numBlocks(raft::ceildiv(size, numThreadsPerBlock.x), + raft::ceildiv(size, numThreadsPerBlock.y)); + + // calling the kernel + computeTheNumerator<<>>( + firstClusterArray, secondClusterArray, size, arr_buf.data(), arr_buf.data() + 1); + + // synchronizing and updating the calculated values of a and b from device to host + uint64_t ab_host[2] = {0}; + raft::update_host(ab_host, arr_buf.data(), 2, stream); + raft::interruptible::synchronize(stream); + + // error handling + RAFT_CUDA_TRY(cudaGetLastError()); + + // denominator + uint64_t nChooseTwo = size * (size - 1) / 2; + + // calculating the rand_index + return (double)(((double)(ab_host[0] + ab_host[1])) / (double)nChooseTwo); +} + +}; // end namespace detail +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/detail/scores.cuh b/cpp/include/raft/stats/detail/scores.cuh new file mode 100644 index 0000000000..130bdb4a85 --- /dev/null +++ b/cpp/include/raft/stats/detail/scores.cuh @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define N_THREADS 512 + +namespace raft { +namespace stats { +namespace detail { +/** + * Calculates the "Coefficient of Determination" (R-Squared) score + * normalizing the sum of squared errors by the total sum of squares. + * + * This score indicates the proportionate amount of variation in an + * expected response variable is explained by the independent variables + * in a linear regression model. The larger the R-squared value, the + * more variability is explained by the linear regression model. + * + * @param y: Array of ground-truth response variables + * @param y_hat: Array of predicted response variables + * @param n: Number of elements in y and y_hat + * @param stream: cuda stream + * @return: The R-squared value. + */ +template +math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) +{ + rmm::device_scalar y_bar(stream); + + raft::stats::mean(y_bar.data(), y, 1, n, false, false, stream); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + rmm::device_uvector sse_arr(n, stream); + + raft::linalg::eltwiseSub(sse_arr.data(), y, y_hat, n, stream); + raft::linalg::powerScalar(sse_arr.data(), sse_arr.data(), math_t(2.0), n, stream); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + rmm::device_uvector ssto_arr(n, stream); + + raft::linalg::subtractDevScalar(ssto_arr.data(), y, y_bar.data(), n, stream); + raft::linalg::powerScalar(ssto_arr.data(), ssto_arr.data(), math_t(2.0), n, stream); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + thrust::device_ptr d_sse = thrust::device_pointer_cast(sse_arr.data()); + thrust::device_ptr d_ssto = thrust::device_pointer_cast(ssto_arr.data()); + + math_t sse = thrust::reduce(thrust::cuda::par.on(stream), d_sse, d_sse + n); + math_t ssto = thrust::reduce(thrust::cuda::par.on(stream), d_ssto, d_ssto + n); + + return 1.0 - sse / ssto; +} + +/** + * @brief Compute accuracy of predictions. Useful for classification. + * @tparam math_t: data type for predictions (e.g., int for classification) + * @param[in] predictions: array of predictions (GPU pointer). + * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). + * @param[in] n: number of elements in each of predictions, ref_predictions. + * @param[in] stream: cuda stream. + * @return: Accuracy score in [0, 1]; higher is better. + */ +template +float accuracy_score(const math_t* predictions, + const math_t* ref_predictions, + int n, + cudaStream_t stream) +{ + unsigned long long correctly_predicted = 0ULL; + rmm::device_uvector diffs_array(n, stream); + + // TODO could write a kernel instead + raft::linalg::eltwiseSub(diffs_array.data(), predictions, ref_predictions, n, stream); + RAFT_CUDA_TRY(cudaGetLastError()); + correctly_predicted = + thrust::count(thrust::cuda::par.on(stream), diffs_array.data(), diffs_array.data() + n, 0); + + float accuracy = correctly_predicted * 1.0f / n; + return accuracy; +} + +template +__global__ void reg_metrics_kernel( + const T* predictions, const T* ref_predictions, int n, double* abs_diffs, double* tmp_sums) +{ + int tid = threadIdx.x + blockIdx.x * blockDim.x; + __shared__ double shmem[2]; // {abs_difference_sum, squared difference sum} + + for (int i = threadIdx.x; i < 2; i += blockDim.x) { + shmem[i] = 0; + } + __syncthreads(); + + for (int i = tid; i < n; i += blockDim.x * gridDim.x) { + double diff = predictions[i] - ref_predictions[i]; + double abs_diff = abs(diff); + raft::myAtomicAdd(&shmem[0], abs_diff); + raft::myAtomicAdd(&shmem[1], diff * diff); + + // update absolute difference in global memory for subsequent abs. median computation + abs_diffs[i] = abs_diff; + } + __syncthreads(); + + // Update tmp_sum w/ total abs_difference_sum and squared difference sum. + for (int i = threadIdx.x; i < 2; i += blockDim.x) { + raft::myAtomicAdd(&tmp_sums[i], shmem[i]); + } +} + +/** + * @brief Compute regression metrics mean absolute error, mean squared error, median absolute error + * @tparam T: data type for predictions (e.g., float or double for regression). + * @param[in] predictions: array of predictions (GPU pointer). + * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). + * @param[in] n: number of elements in each of predictions, ref_predictions. Should be > 0. + * @param[in] stream: cuda stream. + * @param[out] mean_abs_error: Mean Absolute Error. Sum over n of (|predictions[i] - + * ref_predictions[i]|) / n. + * @param[out] mean_squared_error: Mean Squared Error. Sum over n of ((predictions[i] - + * ref_predictions[i])^2) / n. + * @param[out] median_abs_error: Median Absolute Error. Median of |predictions[i] - + * ref_predictions[i]| for i in [0, n). + */ +template +void regression_metrics(const T* predictions, + const T* ref_predictions, + int n, + cudaStream_t stream, + double& mean_abs_error, + double& mean_squared_error, + double& median_abs_error) +{ + std::vector mean_errors(2); + std::vector h_sorted_abs_diffs(n); + int thread_cnt = 256; + int block_cnt = raft::ceildiv(n, thread_cnt); + + int array_size = n * sizeof(double); + rmm::device_uvector abs_diffs_array(array_size, stream); + rmm::device_uvector sorted_abs_diffs(array_size, stream); + rmm::device_uvector tmp_sums(2 * sizeof(double), stream); + RAFT_CUDA_TRY(cudaMemsetAsync(tmp_sums.data(), 0, 2 * sizeof(double), stream)); + + reg_metrics_kernel<<>>( + predictions, ref_predictions, n, abs_diffs_array.data(), tmp_sums.data()); + RAFT_CUDA_TRY(cudaGetLastError()); + raft::update_host(&mean_errors[0], tmp_sums.data(), 2, stream); + raft::interruptible::synchronize(stream); + + mean_abs_error = mean_errors[0] / n; + mean_squared_error = mean_errors[1] / n; + + // Compute median error. Sort diffs_array and pick median value + char* temp_storage = nullptr; + size_t temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceRadixSort::SortKeys((void*)temp_storage, + temp_storage_bytes, + abs_diffs_array.data(), + sorted_abs_diffs.data(), + n, + 0, + 8 * sizeof(double), + stream)); + rmm::device_uvector temp_storage_v(temp_storage_bytes, stream); + temp_storage = temp_storage_v.data(); + RAFT_CUDA_TRY(cub::DeviceRadixSort::SortKeys((void*)temp_storage, + temp_storage_bytes, + abs_diffs_array.data(), + sorted_abs_diffs.data(), + n, + 0, + 8 * sizeof(double), + stream)); + + raft::update_host(h_sorted_abs_diffs.data(), sorted_abs_diffs.data(), n, stream); + raft::interruptible::synchronize(stream); + + int middle = n / 2; + if (n % 2 == 1) { + median_abs_error = h_sorted_abs_diffs[middle]; + } else { + median_abs_error = (h_sorted_abs_diffs[middle] + h_sorted_abs_diffs[middle - 1]) / 2; + } +} +} // namespace detail +} // namespace stats +} // namespace raft diff --git a/cpp/include/raft/stats/detail/silhouette_score.cuh b/cpp/include/raft/stats/detail/silhouette_score.cuh new file mode 100644 index 0000000000..8f02ff5045 --- /dev/null +++ b/cpp/include/raft/stats/detail/silhouette_score.cuh @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { +namespace detail { + +/** + * @brief kernel that calculates the average intra-cluster distance for every sample data point and + * updates the cluster distance to max value + * @tparam DataT: type of the data samples + * @tparam LabelT: type of the labels + * @param sampleToClusterSumOfDistances: the pointer to the 2D array that contains the sum of + * distances from every sample to every cluster (nRows x nLabels) + * @param binCountArray: pointer to the 1D array that contains the count of samples per cluster (1 x + * nLabels) + * @param d_aArray: the pointer to the array of average intra-cluster distances for every sample in + * device memory (1 x nRows) + * @param labels: the pointer to the array containing labels for every data sample (1 x nRows) + * @param nRows: number of data samples + * @param nLabels: number of Labels + * @param MAX_VAL: DataT specific upper limit + */ +template +__global__ void populateAKernel(DataT* sampleToClusterSumOfDistances, + DataT* binCountArray, + DataT* d_aArray, + LabelT* labels, + int nRows, + int nLabels, + const DataT MAX_VAL) +{ + // getting the current index + int sampleIndex = threadIdx.x + blockIdx.x * blockDim.x; + + if (sampleIndex >= nRows) return; + + // sampleDistanceVector is an array that stores that particular row of the distanceMatrix + DataT* sampleToClusterSumOfDistancesVector = + &sampleToClusterSumOfDistances[sampleIndex * nLabels]; + + LabelT sampleCluster = labels[sampleIndex]; + + int sampleClusterIndex = (int)sampleCluster; + + if (binCountArray[sampleClusterIndex] - 1 <= 0) { + d_aArray[sampleIndex] = -1; + return; + + } + + else { + d_aArray[sampleIndex] = (sampleToClusterSumOfDistancesVector[sampleClusterIndex]) / + (binCountArray[sampleClusterIndex] - 1); + + // modifying the sampleDistanceVector to give sample average distance + sampleToClusterSumOfDistancesVector[sampleClusterIndex] = MAX_VAL; + } +} + +/** + * @brief function to calculate the bincounts of number of samples in every label + * @tparam DataT: type of the data samples + * @tparam LabelT: type of the labels + * @param labels: the pointer to the array containing labels for every data sample (1 x nRows) + * @param binCountArray: pointer to the 1D array that contains the count of samples per cluster (1 x + * nLabels) + * @param nRows: number of data samples + * @param nUniqueLabels: number of Labels + * @param workspace: device buffer containing workspace memory + * @param stream: the cuda stream where to launch this kernel + */ +template +void countLabels(LabelT* labels, + DataT* binCountArray, + int nRows, + int nUniqueLabels, + rmm::device_uvector& workspace, + cudaStream_t stream) +{ + int num_levels = nUniqueLabels + 1; + LabelT lower_level = 0; + LabelT upper_level = nUniqueLabels; + size_t temp_storage_bytes = 0; + + rmm::device_uvector countArray(nUniqueLabels, stream); + + RAFT_CUDA_TRY(cub::DeviceHistogram::HistogramEven(nullptr, + temp_storage_bytes, + labels, + binCountArray, + num_levels, + lower_level, + upper_level, + nRows, + stream)); + + workspace.resize(temp_storage_bytes, stream); + + RAFT_CUDA_TRY(cub::DeviceHistogram::HistogramEven(workspace.data(), + temp_storage_bytes, + labels, + binCountArray, + num_levels, + lower_level, + upper_level, + nRows, + stream)); +} + +/** + * @brief stucture that defines the division Lambda for elementwise op + */ +template +struct DivOp { + HDI DataT operator()(DataT a, int b, int c) + { + if (b == 0) + return ULLONG_MAX; + else + return a / b; + } +}; + +/** + * @brief stucture that defines the elementwise operation to calculate silhouette score using params + * 'a' and 'b' + */ +template +struct SilOp { + HDI DataT operator()(DataT a, DataT b) + { + if (a == 0 && b == 0 || a == b) + return 0; + else if (a == -1) + return 0; + else if (a > b) + return (b - a) / a; + else + return (b - a) / b; + } +}; + +/** + * @brief stucture that defines the reduction Lambda to find minimum between elements + */ +template +struct MinOp { + HDI DataT operator()(DataT a, DataT b) + { + if (a > b) + return b; + else + return a; + } +}; + +/** + * @brief main function that returns the average silhouette score for a given set of data and its + * clusterings + * @tparam DataT: type of the data samples + * @tparam LabelT: type of the labels + * @param X_in: pointer to the input Data samples array (nRows x nCols) + * @param nRows: number of data samples + * @param nCols: number of features + * @param labels: the pointer to the array containing labels for every data sample (1 x nRows) + * @param nLabels: number of Labels + * @param silhouette_scorePerSample: pointer to the array that is optionally taken in as input and + * is populated with the silhouette score for every sample (1 x nRows) + * @param stream: the cuda stream where to launch this kernel + * @param metric: the numerical value that maps to the type of distance metric to be used in the + * calculations + */ +template +DataT silhouette_score( + const raft::handle_t& handle, + DataT* X_in, + int nRows, + int nCols, + LabelT* labels, + int nLabels, + DataT* silhouette_scorePerSample, + cudaStream_t stream, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + ASSERT(nLabels >= 2 && nLabels <= (nRows - 1), + "silhouette Score not defined for the given number of labels!"); + + // compute the distance matrix + rmm::device_uvector distanceMatrix(nRows * nRows, stream); + rmm::device_uvector workspace(1, stream); + + raft::distance::pairwise_distance( + handle, X_in, X_in, distanceMatrix.data(), nRows, nRows, nCols, metric); + + // deciding on the array of silhouette scores for each dataPoint + rmm::device_uvector silhouette_scoreSamples(0, stream); + DataT* perSampleSilScore = nullptr; + if (silhouette_scorePerSample == nullptr) { + silhouette_scoreSamples.resize(nRows, stream); + perSampleSilScore = silhouette_scoreSamples.data(); + } else { + perSampleSilScore = silhouette_scorePerSample; + } + RAFT_CUDA_TRY(cudaMemsetAsync(perSampleSilScore, 0, nRows * sizeof(DataT), stream)); + + // getting the sample count per cluster + rmm::device_uvector binCountArray(nLabels, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(binCountArray.data(), 0, nLabels * sizeof(DataT), stream)); + countLabels(labels, binCountArray.data(), nRows, nLabels, workspace, stream); + + // calculating the sample-cluster-distance-sum-array + rmm::device_uvector sampleToClusterSumOfDistances(nRows * nLabels, stream); + RAFT_CUDA_TRY(cudaMemsetAsync( + sampleToClusterSumOfDistances.data(), 0, nRows * nLabels * sizeof(DataT), stream)); + raft::linalg::reduce_cols_by_key(distanceMatrix.data(), + labels, + sampleToClusterSumOfDistances.data(), + nRows, + nRows, + nLabels, + stream); + + // creating the a array and b array + rmm::device_uvector d_aArray(nRows, stream); + rmm::device_uvector d_bArray(nRows, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(d_aArray.data(), 0, nRows * sizeof(DataT), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(d_bArray.data(), 0, nRows * sizeof(DataT), stream)); + + // kernel that populates the d_aArray + // kernel configuration + dim3 numThreadsPerBlock(32, 1, 1); + dim3 numBlocks(raft::ceildiv(nRows, numThreadsPerBlock.x), 1, 1); + + // calling the kernel + populateAKernel<<>>( + sampleToClusterSumOfDistances.data(), + binCountArray.data(), + d_aArray.data(), + labels, + nRows, + nLabels, + std::numeric_limits::max()); + + // elementwise dividing by bincounts + rmm::device_uvector averageDistanceBetweenSampleAndCluster(nRows * nLabels, stream); + RAFT_CUDA_TRY(cudaMemsetAsync( + averageDistanceBetweenSampleAndCluster.data(), 0, nRows * nLabels * sizeof(DataT), stream)); + + raft::linalg::matrixVectorOp>(averageDistanceBetweenSampleAndCluster.data(), + sampleToClusterSumOfDistances.data(), + binCountArray.data(), + binCountArray.data(), + nLabels, + nRows, + true, + true, + DivOp(), + stream); + + // calculating row-wise minimum + raft::linalg::reduce, MinOp>( + d_bArray.data(), + averageDistanceBetweenSampleAndCluster.data(), + nLabels, + nRows, + std::numeric_limits::max(), + true, + true, + stream, + false, + raft::Nop(), + MinOp()); + + // calculating the silhouette score per sample using the d_aArray and d_bArray + raft::linalg::binaryOp>( + perSampleSilScore, d_aArray.data(), d_bArray.data(), nRows, SilOp(), stream); + + // calculating the sum of all the silhouette score + rmm::device_scalar d_avgSilhouetteScore(stream); + RAFT_CUDA_TRY(cudaMemsetAsync(d_avgSilhouetteScore.data(), 0, sizeof(DataT), stream)); + + raft::linalg::mapThenSumReduce>(d_avgSilhouetteScore.data(), + nRows, + raft::Nop(), + stream, + perSampleSilScore, + perSampleSilScore); + + DataT avgSilhouetteScore = d_avgSilhouetteScore.value(stream); + + handle.sync_stream(stream); + + avgSilhouetteScore /= nRows; + + return avgSilhouetteScore; +} + +}; // namespace detail +}; // namespace stats +}; // namespace raft diff --git a/cpp/include/raft/stats/detail/trustworthiness_score.cuh b/cpp/include/raft/stats/detail/trustworthiness_score.cuh new file mode 100644 index 0000000000..b2d92df755 --- /dev/null +++ b/cpp/include/raft/stats/detail/trustworthiness_score.cuh @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#define N_THREADS 512 + +namespace raft { +namespace stats { +namespace detail { + +/** + * @brief Build the lookup table + * @param[out] lookup_table: Lookup table giving nearest neighbor order + * of pairwise distance calculations given sample index + * @param[in] X_ind: Sorted indexes of pairwise distance calculations of X + * @param n: Number of samples + * @param work: Number of elements to consider + */ +__global__ void build_lookup_table(int* lookup_table, const int* X_ind, int n, int work) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= work) return; + + int sample_idx = i / n; + int nn_idx = i % n; + + int idx = X_ind[i]; + lookup_table[(sample_idx * n) + idx] = nn_idx; +} + +/** + * @brief Compute a the rank of trustworthiness score + * @param[out] rank: Resulting rank + * @param[out] lookup_table: Lookup table giving nearest neighbor order + * of pairwise distance calculations given sample index + * @param[in] emb_ind: Indexes of KNN on embeddings + * @param n: Number of samples + * @param n_neighbors: Number of neighbors considered by trustworthiness score + * @param work: Batch to consider (to do it at once use n * n_neighbors) + */ +template +__global__ void compute_rank(double* rank, + const int* lookup_table, + const knn_index_t* emb_ind, + int n, + int n_neighbors, + int work) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= work) return; + + int sample_idx = i / n_neighbors; + + knn_index_t emb_nn_ind = emb_ind[i]; + + int r = lookup_table[(sample_idx * n) + emb_nn_ind]; + int tmp = r - n_neighbors + 1; + if (tmp > 0) raft::myAtomicAdd(rank, tmp); +} + +/** + * @brief Compute a kNN and returns the indices of the nearest neighbors + * @param h Raft handle + * @param[in] input Input matrix containing the dataset + * @param n Number of samples + * @param d Number of features + * @param n_neighbors number of neighbors + * @param[out] indices KNN indexes + * @param[out] distances KNN distances + */ +template +void run_knn(const raft::handle_t& h, + math_t* input, + int n, + int d, + int n_neighbors, + int64_t* indices, + math_t* distances) +{ + std::vector ptrs(1); + std::vector sizes(1); + ptrs[0] = input; + sizes[0] = n; + + raft::spatial::knn::brute_force_knn(h, + ptrs, + sizes, + d, + input, + n, + indices, + distances, + n_neighbors, + true, + true, + nullptr, + distance_type); +} + +/** + * @brief Compute the trustworthiness score + * @param h Raft handle + * @param X[in]: Data in original dimension + * @param X_embedded[in]: Data in target dimension (embedding) + * @param n: Number of samples + * @param m: Number of features in high/original dimension + * @param d: Number of features in low/embedded dimension + * @param n_neighbors Number of neighbors considered by trustworthiness score + * @param batchSize Batch size + * @return Trustworthiness score + */ +template +double trustworthiness_score(const raft::handle_t& h, + const math_t* X, + math_t* X_embedded, + int n, + int m, + int d, + int n_neighbors, + int batchSize = 512) +{ + cudaStream_t stream = h.get_stream(); + + const int KNN_ALLOC = n * (n_neighbors + 1); + rmm::device_uvector emb_ind(KNN_ALLOC, stream); + rmm::device_uvector emb_dist(KNN_ALLOC, stream); + + run_knn(h, X_embedded, n, d, n_neighbors + 1, emb_ind.data(), emb_dist.data()); + + const int PAIRWISE_ALLOC = batchSize * n; + rmm::device_uvector X_ind(PAIRWISE_ALLOC, stream); + rmm::device_uvector X_dist(PAIRWISE_ALLOC, stream); + rmm::device_uvector lookup_table(PAIRWISE_ALLOC, stream); + + double t = 0.0; + rmm::device_scalar t_dbuf(stream); + + int toDo = n; + while (toDo > 0) { + int curBatchSize = min(toDo, batchSize); + + // Takes at most batchSize vectors at a time + raft::distance::pairwise_distance( + h, &X[(n - toDo) * m], X, X_dist.data(), curBatchSize, n, m, distance_type); + + size_t colSortWorkspaceSize = 0; + bool bAllocWorkspace = false; + + raft::matrix::sort_cols_per_row(X_dist.data(), + X_ind.data(), + curBatchSize, + n, + bAllocWorkspace, + nullptr, + colSortWorkspaceSize, + stream); + + if (bAllocWorkspace) { + rmm::device_uvector sortColsWorkspace(colSortWorkspaceSize, stream); + + raft::matrix::sort_cols_per_row(X_dist.data(), + X_ind.data(), + curBatchSize, + n, + bAllocWorkspace, + sortColsWorkspace.data(), + colSortWorkspaceSize, + stream); + } + + int work = curBatchSize * n; + int n_blocks = raft::ceildiv(work, N_THREADS); + build_lookup_table<<>>( + lookup_table.data(), X_ind.data(), n, work); + + RAFT_CUDA_TRY(cudaMemsetAsync(t_dbuf.data(), 0, sizeof(double), stream)); + + work = curBatchSize * (n_neighbors + 1); + n_blocks = raft::ceildiv(work, N_THREADS); + compute_rank<<>>( + t_dbuf.data(), + lookup_table.data(), + &emb_ind.data()[(n - toDo) * (n_neighbors + 1)], + n, + n_neighbors + 1, + work); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + t += t_dbuf.value(stream); + + toDo -= curBatchSize; + } + + t = 1.0 - ((2.0 / ((n * n_neighbors) * ((2.0 * n) - (3.0 * n_neighbors) - 1.0))) * t); + + return t; +} + +} // namespace detail +} // namespace stats +} // namespace raft diff --git a/cpp/include/raft/stats/detail/v_measure.cuh b/cpp/include/raft/stats/detail/v_measure.cuh new file mode 100644 index 0000000000..c51ababbb9 --- /dev/null +++ b/cpp/include/raft/stats/detail/v_measure.cuh @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file v_measure.cuh + */ + +#include + +namespace raft { +namespace stats { +namespace detail { + +/** + * @brief Function to calculate the v-measure between two clusters + * + * @param truthClusterArray: the array of truth classes of type T + * @param predClusterArray: the array of predicted classes of type T + * @param size: the size of the data points of type int + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + * @param stream: the cudaStream object + * @param beta: v_measure parameter + */ +template +double v_measure(const T* truthClusterArray, + const T* predClusterArray, + int size, + T lowerLabelRange, + T upperLabelRange, + cudaStream_t stream, + double beta = 1.0) +{ + double computedHomogeity, computedCompleteness, computedVMeasure; + + computedHomogeity = raft::stats::homogeneity_score( + truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); + computedCompleteness = raft::stats::homogeneity_score( + predClusterArray, truthClusterArray, size, lowerLabelRange, upperLabelRange, stream); + + if (computedCompleteness + computedHomogeity == 0.0) + computedVMeasure = 0.0; + else + computedVMeasure = ((1 + beta) * computedHomogeity * computedCompleteness / + (beta * computedHomogeity + computedCompleteness)); + + return computedVMeasure; +} + +}; // end namespace detail +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/dispersion.hpp b/cpp/include/raft/stats/dispersion.hpp new file mode 100644 index 0000000000..b50b196157 --- /dev/null +++ b/cpp/include/raft/stats/dispersion.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft { + namespace stats { + +/** + * @brief Compute cluster dispersion metric. This is very useful for + * automatically finding the 'k' (in kmeans) that improves this metric. + * @tparam DataT data type + * @tparam IdxT index type + * @tparam TPB threads block for kernels launched + * @param centroids the cluster centroids. This is assumed to be row-major + * and of dimension (nClusters x dim) + * @param clusterSizes number of points in the dataset which belong to each + * cluster. This is of length nClusters + * @param globalCentroid compute the global weighted centroid of all cluster + * centroids. This is of length dim. Pass a nullptr if this is not needed + * @param nClusters number of clusters + * @param nPoints number of points in the dataset + * @param dim dataset dimensionality + * @param stream cuda stream + * @return the cluster dispersion value + */ +template +DataT dispersion(const DataT* centroids, + const IdxT* clusterSizes, + DataT* globalCentroid, + IdxT nClusters, + IdxT nPoints, + IdxT dim, + cudaStream_t stream) { + return detail::dispersion(centroids, clusterSizes, globalCentroid, nClusters, nPoints, dim, stream); +} + +} // end namespace stats +} // end namespace raft diff --git a/cpp/include/raft/stats/entropy.hpp b/cpp/include/raft/stats/entropy.hpp new file mode 100644 index 0000000000..5c2710fc6a --- /dev/null +++ b/cpp/include/raft/stats/entropy.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include + +namespace raft { +namespace stats { + +/** + * @brief Function to calculate entropy + * more info on entropy + * + * @param clusterArray: the array of classes of type T + * @param size: the size of the data points of type int + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + * @param stream: the cudaStream object + * @return the entropy score + */ +template +double entropy(const T* clusterArray, + const int size, + const T lowerLabelRange, + const T upperLabelRange, + cudaStream_t stream) { + return detail::entropy(clusterArray, size, lowerLabelRange, upperLabelRange, stream); +} + +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/homogeneity_score.hpp b/cpp/include/raft/stats/homogeneity_score.hpp new file mode 100644 index 0000000000..365acb8c24 --- /dev/null +++ b/cpp/include/raft/stats/homogeneity_score.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft { +namespace stats { + +/** + * @brief Function to calculate the homogeneity score between two clusters + * more info on mutual + * information + * @param truthClusterArray: the array of truth classes of type T + * @param predClusterArray: the array of predicted classes of type T + * @param size: the size of the data points of type int + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + * @param stream: the cudaStream object + */ +template +double homogeneity_score(const T* truthClusterArray, + const T* predClusterArray, + int size, + T lowerLabelRange, + T upperLabelRange, + cudaStream_t stream) { + return detail::homogeneity_score(truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); +} + +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/information_criterion.hpp b/cpp/include/raft/stats/information_criterion.hpp new file mode 100644 index 0000000000..08a4014e11 --- /dev/null +++ b/cpp/include/raft/stats/information_criterion.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file information_criterion.cuh + * @brief These information criteria are used to evaluate the quality of models + * by balancing the quality of the fit and the number of parameters. + * + * See: + * - AIC: https://en.wikipedia.org/wiki/Akaike_information_criterion + * - AICc: https://en.wikipedia.org/wiki/Akaike_information_criterion#AICc + * - BIC: https://en.wikipedia.org/wiki/Bayesian_information_criterion + */ +#pragma once + +#include +#include + +namespace raft { +namespace stats { + +/** + * Compute the given type of information criterion + * + * @note: it is safe to do the computation in-place (i.e give same pointer + * as input and output) + * + * @param[out] d_ic Information criterion to be returned for each + * series (device) + * @param[in] d_loglikelihood Log-likelihood for each series (device) + * @param[in] ic_type Type of criterion to compute. See IC_Type + * @param[in] n_params Number of parameters in the model + * @param[in] batch_size Number of series in the batch + * @param[in] n_samples Number of samples in each series + * @param[in] stream CUDA stream + */ +template +void information_criterion_batched(ScalarT *d_ic, + const ScalarT *d_loglikelihood, + IC_Type ic_type, + IdxT n_params, + IdxT batch_size, + IdxT n_samples, + cudaStream_t stream) { + batched::detail::information_criterion(d_ic, d_loglikelihood, ic_type, n_params, batch_size, n_params, stream); +} + +} // namespace stats +} // namespace raft diff --git a/cpp/include/raft/stats/kl_divergence.hpp b/cpp/include/raft/stats/kl_divergence.hpp new file mode 100644 index 0000000000..b8865a2c16 --- /dev/null +++ b/cpp/include/raft/stats/kl_divergence.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace raft { +namespace stats { + +/** + * @brief Function to calculate KL Divergence + * more info on KL + * Divergence + * + * @tparam DataT: Data type of the input array + * @param modelPDF: the model array of probability density functions of type DataT + * @param candidatePDF: the candidate array of probability density functions of type DataT + * @param size: the size of the data points of type int + * @param stream: the cudaStream object + */ +template +DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, cudaStream_t stream) +{ + return detail::kl_divergence(modelPDF, candidatePDF, size, stream); +} + +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/meanvar.hpp b/cpp/include/raft/stats/meanvar.hpp index 3a41ee8a00..3cf2d35bef 100644 --- a/cpp/include/raft/stats/meanvar.hpp +++ b/cpp/include/raft/stats/meanvar.hpp @@ -48,8 +48,7 @@ void meanvar(Type* mean, IdxType N, bool sample, bool rowMajor, - cudaStream_t stream) -{ + cudaStream_t stream) { detail::meanvar(mean, var, data, D, N, sample, rowMajor, stream); } diff --git a/cpp/include/raft/stats/mutual_info_score.hpp b/cpp/include/raft/stats/mutual_info_score.hpp new file mode 100644 index 0000000000..383868e578 --- /dev/null +++ b/cpp/include/raft/stats/mutual_info_score.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace raft { +namespace stats { + + +/** + * @brief Function to calculate the mutual information between two clusters + * more info on mutual information + * @param firstClusterArray: the array of classes of type T + * @param secondClusterArray: the array of classes of type T + * @param size: the size of the data points of type int + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + * @param stream: the cudaStream object + */ +template +double mutual_info_score(const T* firstClusterArray, + const T* secondClusterArray, + int size, + T lowerLabelRange, + T upperLabelRange, + cudaStream_t stream) { + return detail::mutual_info_score(firstClusterArray, secondClusterArray, size, + lowerLabelRange, upperLabelRange, stream); +} + +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/r2_score.hpp b/cpp/include/raft/stats/r2_score.hpp new file mode 100644 index 0000000000..08d97c591b --- /dev/null +++ b/cpp/include/raft/stats/r2_score.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft { +namespace stats { + +/** + * Calculates the "Coefficient of Determination" (R-Squared) score + * normalizing the sum of squared errors by the total sum of squares. + * + * This score indicates the proportionate amount of variation in an + * expected response variable is explained by the independent variables + * in a linear regression model. The larger the R-squared value, the + * more variability is explained by the linear regression model. + * + * @param y: Array of ground-truth response variables + * @param y_hat: Array of predicted response variables + * @param n: Number of elements in y and y_hat + * @param stream: cuda stream + * @return: The R-squared value. + */ +template +math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) { + return detail::r2_score(y, y_hat, n, stream); +} + +} // namespace stats +} // namespace raft diff --git a/cpp/include/raft/stats/rand_index.hpp b/cpp/include/raft/stats/rand_index.hpp new file mode 100644 index 0000000000..b0e3786c49 --- /dev/null +++ b/cpp/include/raft/stats/rand_index.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft { +namespace stats { + + +/** + * @brief Function to calculate RandIndex + * more info on rand index + * @param firstClusterArray: the array of classes of type T + * @param secondClusterArray: the array of classes of type T + * @param size: the size of the data points of type uint64_t + * @param stream: the cudaStream object + */ +template +double rand_index(T* firstClusterArray, + T* secondClusterArray, + uint64_t size, + cudaStream_t stream) +{ + return detail::compute_rand_index(firstClusterArray, secondClusterArray, size, stream); +} + +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/include/raft/stats/regression_metrics.hpp b/cpp/include/raft/stats/regression_metrics.hpp new file mode 100644 index 0000000000..be7abdd786 --- /dev/null +++ b/cpp/include/raft/stats/regression_metrics.hpp @@ -0,0 +1,49 @@ +/* +* Copyright (c) 2019-2022, NVIDIA CORPORATION. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#pragma once + +#include + +namespace raft { +namespace stats { + +/** +* @brief Compute regression metrics mean absolute error, mean squared error, median absolute error +* @tparam T: data type for predictions (e.g., float or double for regression). +* @param[in] predictions: array of predictions (GPU pointer). +* @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). +* @param[in] n: number of elements in each of predictions, ref_predictions. Should be > 0. +* @param[in] stream: cuda stream. +* @param[out] mean_abs_error: Mean Absolute Error. Sum over n of (|predictions[i] - +* ref_predictions[i]|) / n. +* @param[out] mean_squared_error: Mean Squared Error. Sum over n of ((predictions[i] - +* ref_predictions[i])^2) / n. +* @param[out] median_abs_error: Median Absolute Error. Median of |predictions[i] - +* ref_predictions[i]| for i in [0, n). +*/ +template +void regression_metrics(const T* predictions, + const T* ref_predictions, + int n, + cudaStream_t stream, + double& mean_abs_error, + double& mean_squared_error, + double& median_abs_error) { + detail::regression_metrics(predictions, ref_predictions, n, stream, mean_abs_error, mean_squared_error, median_abs_error); +} +} // namespace stats +} // namespace raft diff --git a/cpp/include/raft/stats/silhouette_score.hpp b/cpp/include/raft/stats/silhouette_score.hpp new file mode 100644 index 0000000000..94b840ee3d --- /dev/null +++ b/cpp/include/raft/stats/silhouette_score.hpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace raft { +namespace stats { + +/** + * @brief main function that returns the average silhouette score for a given set of data and its + * clusterings + * @tparam DataT: type of the data samples + * @tparam LabelT: type of the labels + * @param X_in: pointer to the input Data samples array (nRows x nCols) + * @param nRows: number of data samples + * @param nCols: number of features + * @param labels: the pointer to the array containing labels for every data sample (1 x nRows) + * @param nLabels: number of Labels + * @param silhouette_scorePerSample: pointer to the array that is optionally taken in as input and + * is populated with the silhouette score for every sample (1 x nRows) + * @param stream: the cuda stream where to launch this kernel + * @param metric: the numerical value that maps to the type of distance metric to be used in the + * calculations + */ +template +DataT silhouette_score( + const raft::handle_t& handle, + DataT* X_in, + int nRows, + int nCols, + LabelT* labels, + int nLabels, + DataT* silhouette_scorePerSample, + cudaStream_t stream, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) { + return detail::silhouette_score(handle, X_in, nRows, nCols, labels, nLabels, + silhouette_scorePerSample, stream, metric); +} + +template +value_t silhouette_score_batched( + const raft::handle_t& handle, + value_t* X, + value_idx n_rows, + value_idx n_cols, + label_idx* y, + label_idx n_labels, + value_t* scores, + value_idx chunk, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) { + return batched::detail::silhouette_score(handle, X, n_rows, n_cols, y, n_labels, scores, chunk, metric); +} + +}; // namespace stats +}; // namespace raft diff --git a/cpp/include/raft/stats/specializations.hpp b/cpp/include/raft/stats/specializations.hpp new file mode 100644 index 0000000000..8f33690e5b --- /dev/null +++ b/cpp/include/raft/stats/specializations.hpp @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include \ No newline at end of file diff --git a/cpp/include/raft/stats/trustworthiness_score.hpp b/cpp/include/raft/stats/trustworthiness_score.hpp new file mode 100644 index 0000000000..4d7e3178f4 --- /dev/null +++ b/cpp/include/raft/stats/trustworthiness_score.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include + +namespace raft { +namespace stats { + +/** + * @brief Compute the trustworthiness score + * @param h Raft handle + * @param X[in]: Data in original dimension + * @param X_embedded[in]: Data in target dimension (embedding) + * @param n: Number of samples + * @param m: Number of features in high/original dimension + * @param d: Number of features in low/embedded dimension + * @param n_neighbors Number of neighbors considered by trustworthiness score + * @param batchSize Batch size + * @return Trustworthiness score + */ +template +double trustworthiness_score(const raft::handle_t& h, + const math_t* X, + math_t* X_embedded, + int n, + int m, + int d, + int n_neighbors, + int batchSize = 512) { + return detail::trustworthiness_score(h, X, X_embedded, n, m, d, n_neighbors, batchSize); +} +} // namespace stats +} // namespace raft diff --git a/cpp/include/raft/stats/v_measure.hpp b/cpp/include/raft/stats/v_measure.hpp new file mode 100644 index 0000000000..44531221d2 --- /dev/null +++ b/cpp/include/raft/stats/v_measure.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include + +namespace raft { +namespace stats { + +/** + * @brief Function to calculate the v-measure between two clusters + * + * @param truthClusterArray: the array of truth classes of type T + * @param predClusterArray: the array of predicted classes of type T + * @param size: the size of the data points of type int + * @param lowerLabelRange: the lower bound of the range of labels + * @param upperLabelRange: the upper bound of the range of labels + * @param stream: the cudaStream object + * @param beta: v_measure parameter + */ +template +double v_measure(const T* truthClusterArray, + const T* predClusterArray, + int size, + T lowerLabelRange, + T upperLabelRange, + cudaStream_t stream, + double beta = 1.0) { + return detail::v_measure(truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream, beta); +} + +}; // end namespace stats +}; // end namespace raft diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 430b69341c..7dcacdf7e1 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -111,15 +111,27 @@ add_executable(test_raft test/spatial/faiss_mr.cu test/spatial/selection.cu test/spectral_matrix.cu + test/stats/adjusted_rand_index.cu + test/stats/completeness_score.cu + test/stats/contingencyMatrix.cu test/stats/cov.cu + test/stats/dispersion.cu + test/stats/entropy.cu test/stats/histogram.cu + test/stats/homogeneity_score.cu + test/stats/kl_divergence.cu test/stats/mean.cu test/stats/meanvar.cu test/stats/mean_center.cu test/stats/minmax.cu + test/stats/mutual_info_score.cu + test/stats/rand_index.cu + test/stats/silhouette_score.cu test/stats/stddev.cu test/stats/sum.cu + test/stats/trustworthiness.cu test/stats/weighted_mean.cu + test/stats/v_measure.cu test/test.cpp ) diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/test/stats/adjusted_rand_index.cu new file mode 100644 index 0000000000..8ab3fbb1fb --- /dev/null +++ b/cpp/test/stats/adjusted_rand_index.cu @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { + +struct adjustedRandIndexParam { + int nElements; + int lowerLabelRange; + int upperLabelRange; + bool sameArrays; + double tolerance; + // if this is true, then it is assumed that `sameArrays` is also true + // further it also assumes `lowerLabelRange` and `upperLabelRange` are 0 + bool testZeroArray; +}; + +template +class adjustedRandIndexTest : public ::testing::TestWithParam { + protected: + adjustedRandIndexTest() : firstClusterArray(0, stream), secondClusterArray(0, stream) {} + + void SetUp() override + { + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + params = ::testing::TestWithParam::GetParam(); + nElements = params.nElements; + + firstClusterArray.resize(nElements, stream); + secondClusterArray.resize(nElements, stream); + RAFT_CUDA_TRY( + cudaMemsetAsync(firstClusterArray.data(), 0, firstClusterArray.size() * sizeof(T), stream)); + RAFT_CUDA_TRY( + cudaMemsetAsync(secondClusterArray.data(), 0, secondClusterArray.size() * sizeof(T), stream)); + + if (!params.testZeroArray) { + SetUpDifferentArrays(); + } else { + SetupZeroArray(); + } + // allocating and initializing memory to the GPU + computed_adjusted_rand_index = adjusted_rand_index( + firstClusterArray.data(), secondClusterArray.data(), nElements, stream); + } + + void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } + + void SetUpDifferentArrays() + { + lowerLabelRange = params.lowerLabelRange; + upperLabelRange = params.upperLabelRange; + std::vector arr1(nElements, 0); + std::vector arr2(nElements, 0); + std::random_device rd; + std::default_random_engine dre(rd()); + std::uniform_int_distribution intGenerator(lowerLabelRange, upperLabelRange); + std::generate(arr1.begin(), arr1.end(), [&]() { return intGenerator(dre); }); + if (params.sameArrays) { + arr2 = arr1; + } else { + std::generate(arr2.begin(), arr2.end(), [&]() { return intGenerator(dre); }); + } + // calculating golden output + int numUniqueClasses = upperLabelRange - lowerLabelRange + 1; + size_t sizeOfMat = numUniqueClasses * numUniqueClasses * sizeof(int); + int* hGoldenOutput = (int*)malloc(sizeOfMat); + memset(hGoldenOutput, 0, sizeOfMat); + for (int i = 0; i < nElements; i++) { + int row = arr1[i] - lowerLabelRange; + int column = arr2[i] - lowerLabelRange; + hGoldenOutput[row * numUniqueClasses + column] += 1; + } + int sumOfNijCTwo = 0; + int* a = (int*)malloc(numUniqueClasses * sizeof(int)); + int* b = (int*)malloc(numUniqueClasses * sizeof(int)); + memset(a, 0, numUniqueClasses * sizeof(int)); + memset(b, 0, numUniqueClasses * sizeof(int)); + int sumOfAiCTwo = 0; + int sumOfBiCTwo = 0; + // calculating the sum of number of pairwise points in each index + // and also the reducing contingency matrix along row and column + for (int i = 0; i < numUniqueClasses; ++i) { + for (int j = 0; j < numUniqueClasses; ++j) { + int Nij = hGoldenOutput[i * numUniqueClasses + j]; + sumOfNijCTwo += ((Nij) * (Nij - 1)) / 2; + a[i] += hGoldenOutput[i * numUniqueClasses + j]; + b[i] += hGoldenOutput[j * numUniqueClasses + i]; + } + } + // claculating the sum of number pairwise points in ever column sum + // claculating the sum of number pairwise points in ever row sum + for (int i = 0; i < numUniqueClasses; ++i) { + sumOfAiCTwo += ((a[i]) * (a[i] - 1)) / 2; + sumOfBiCTwo += ((b[i]) * (b[i] - 1)) / 2; + } + // calculating the ARI + double nCTwo = double(nElements) * double(nElements - 1) / 2.0; + double expectedIndex = (double(sumOfBiCTwo) * double(sumOfAiCTwo)) / double(nCTwo); + double maxIndex = (double(sumOfAiCTwo) + double(sumOfBiCTwo)) / 2.0; + double index = (double)sumOfNijCTwo; + if (maxIndex - expectedIndex) + truth_adjusted_rand_index = (index - expectedIndex) / (maxIndex - expectedIndex); + else + truth_adjusted_rand_index = 0; + raft::update_device(firstClusterArray.data(), &arr1[0], nElements, stream); + raft::update_device(secondClusterArray.data(), &arr2[0], nElements, stream); + } + + void SetupZeroArray() + { + lowerLabelRange = 0; + upperLabelRange = 0; + truth_adjusted_rand_index = 1.0; + } + + adjustedRandIndexParam params; + T lowerLabelRange, upperLabelRange; + rmm::device_uvector firstClusterArray; + rmm::device_uvector secondClusterArray; + int nElements = 0; + double truth_adjusted_rand_index = 0; + double computed_adjusted_rand_index = 0; + cudaStream_t stream = 0; +}; + +const std::vector inputs = { + {199, 1, 10, false, 0.000001, false}, + {200, 15, 100, false, 0.000001, false}, + {100, 1, 20, false, 0.000001, false}, + {10, 1, 10, false, 0.000001, false}, + {198, 1, 100, false, 0.000001, false}, + {300, 3, 99, false, 0.000001, false}, + {199, 1, 10, true, 0.000001, false}, + {200, 15, 100, true, 0.000001, false}, + {100, 1, 20, true, 0.000001, false}, + // FIXME: disabled temporarily due to flaky test + // {10, 1, 10, true, 0.000001, false}, + {198, 1, 100, true, 0.000001, false}, + {300, 3, 99, true, 0.000001, false}, + + {199, 0, 0, false, 0.000001, true}, + {200, 0, 0, false, 0.000001, true}, + {100, 0, 0, false, 0.000001, true}, + {10, 0, 0, false, 0.000001, true}, + {198, 0, 0, false, 0.000001, true}, + {300, 0, 0, false, 0.000001, true}, + {199, 0, 0, true, 0.000001, true}, + {200, 0, 0, true, 0.000001, true}, + {100, 0, 0, true, 0.000001, true}, + {10, 0, 0, true, 0.000001, true}, + {198, 0, 0, true, 0.000001, true}, + {300, 0, 0, true, 0.000001, true}, +}; + +const std::vector large_inputs = { + {2000000, 1, 1000, false, 0.000001, false}, + {2000000, 1, 1000, true, 0.000001, false}, + + {2000000, 0, 0, false, 0.000001, true}, + {2000000, 0, 0, true, 0.000001, true}, +}; + +typedef adjustedRandIndexTest ARI_ii; +TEST_P(ARI_ii, Result) +{ + ASSERT_NEAR(computed_adjusted_rand_index, truth_adjusted_rand_index, params.tolerance); +} +INSTANTIATE_TEST_CASE_P(adjusted_rand_index, ARI_ii, ::testing::ValuesIn(inputs)); + +typedef adjustedRandIndexTest ARI_il; +TEST_P(ARI_il, Result) +{ + ASSERT_NEAR(computed_adjusted_rand_index, truth_adjusted_rand_index, params.tolerance); +} +INSTANTIATE_TEST_CASE_P(adjusted_rand_index, ARI_il, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(adjusted_rand_index_large, ARI_il, ::testing::ValuesIn(large_inputs)); + +} // end namespace stats +} // end namespace raft diff --git a/cpp/test/stats/completeness_score.cu b/cpp/test/stats/completeness_score.cu new file mode 100644 index 0000000000..00cbe09627 --- /dev/null +++ b/cpp/test/stats/completeness_score.cu @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { + +// parameter structure definition +struct completenessParam { + int nElements; + int lowerLabelRange; + int upperLabelRange; + bool sameArrays; + double tolerance; +}; + +// test fixture class +template +class completenessTest : public ::testing::TestWithParam { + protected: + // the constructor + void SetUp() override + { + // getting the parameters + params = ::testing::TestWithParam::GetParam(); + + nElements = params.nElements; + lowerLabelRange = params.lowerLabelRange; + upperLabelRange = params.upperLabelRange; + + // generating random value test input + std::vector arr1(nElements, 0); + std::vector arr2(nElements, 0); + std::random_device rd; + std::default_random_engine dre(rd()); + std::uniform_int_distribution intGenerator(lowerLabelRange, upperLabelRange); + + std::generate(arr1.begin(), arr1.end(), [&]() { return intGenerator(dre); }); + if (params.sameArrays) { + arr2 = arr1; + } else { + std::generate(arr2.begin(), arr2.end(), [&]() { return intGenerator(dre); }); + } + + // allocating and initializing memory to the GPU + + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + + rmm::device_uvector truthClusterArray(nElements, stream); + rmm::device_uvector predClusterArray(nElements, stream); + raft::update_device(truthClusterArray.data(), arr1.data(), (int)nElements, stream); + raft::update_device(predClusterArray.data(), arr2.data(), (int)nElements, stream); + + // calculating the golden output + double truthMI, truthEntropy; + + truthMI = raft::stats::mutual_info_score(truthClusterArray.data(), + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); + truthEntropy = raft::stats::entropy( + predClusterArray.data(), nElements, lowerLabelRange, upperLabelRange, stream); + + if (truthEntropy) { + truthCompleteness = truthMI / truthEntropy; + } else + truthCompleteness = 1.0; + + if (nElements == 0) truthCompleteness = 1.0; + + // calling the completeness CUDA implementation + computedCompleteness = raft::stats::completeness_score(truthClusterArray.data(), + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); + } + + // the destructor + void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } + + // declaring the data values + completenessParam params; + T lowerLabelRange, upperLabelRange; + int nElements = 0; + double truthCompleteness = 0; + double computedCompleteness = 0; + cudaStream_t stream = 0; +}; + +// setting test parameter values +const std::vector inputs = {{199, 1, 10, false, 0.000001}, + {200, 15, 100, false, 0.000001}, + {100, 1, 20, false, 0.000001}, + {10, 1, 10, false, 0.000001}, + {198, 1, 100, false, 0.000001}, + {300, 3, 99, false, 0.000001}, + {199, 1, 10, true, 0.000001}, + {200, 15, 100, true, 0.000001}, + {100, 1, 20, true, 0.000001}, + {10, 1, 10, true, 0.000001}, + {198, 1, 100, true, 0.000001}, + {300, 3, 99, true, 0.000001}}; + +// writing the test suite +typedef completenessTest completenessTestClass; +TEST_P(completenessTestClass, Result) +{ + ASSERT_NEAR(computedCompleteness, truthCompleteness, params.tolerance); +} +INSTANTIATE_TEST_CASE_P(completeness, completenessTestClass, ::testing::ValuesIn(inputs)); + +} // end namespace stats +} // end namespace raft diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu new file mode 100644 index 0000000000..ff6998fe97 --- /dev/null +++ b/cpp/test/stats/contingencyMatrix.cu @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { + +struct ContingencyMatrixParam { + int nElements; + int minClass; + int maxClass; + bool calcCardinality; + bool skipLabels; + float tolerance; +}; + +template +class ContingencyMatrixTest : public ::testing::TestWithParam { + protected: + ContingencyMatrixTest() + : pWorkspace(0, stream), + dY(0, stream), + dYHat(0, stream), + dComputedOutput(0, stream), + dGoldenOutput(0, stream) + { + } + + void SetUp() override + { + params = ::testing::TestWithParam::GetParam(); + + int numElements = params.nElements; + int lowerLabelRange = params.minClass; + int upperLabelRange = params.maxClass; + + std::vector y(numElements, 0); + std::vector y_hat(numElements, 0); + std::random_device rd; + std::default_random_engine dre(rd()); + std::uniform_int_distribution intGenerator(lowerLabelRange, upperLabelRange); + + std::generate(y.begin(), y.end(), [&]() { return intGenerator(dre); }); + std::generate(y_hat.begin(), y_hat.end(), [&]() { return intGenerator(dre); }); + + if (params.skipLabels) { + // remove two label value from input arrays + int y1 = (upperLabelRange - lowerLabelRange) / 2; + int y2 = y1 + (upperLabelRange - lowerLabelRange) / 4; + + // replacement values + int y1_R = y1 + 1; + int y2_R = y2 + 1; + + std::replace(y.begin(), y.end(), y1, y1_R); + std::replace(y.begin(), y.end(), y2, y2_R); + std::replace(y_hat.begin(), y_hat.end(), y1, y1_R); + std::replace(y_hat.begin(), y_hat.end(), y2, y2_R); + } + + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + dY.resize(numElements, stream); + dYHat.resize(numElements, stream); + + raft::update_device(dYHat.data(), &y_hat[0], numElements, stream); + raft::update_device(dY.data(), &y[0], numElements, stream); + + if (params.calcCardinality) { + raft::stats::getInputClassCardinality( + dY.data(), numElements, stream, minLabel, maxLabel); + } else { + minLabel = lowerLabelRange; + maxLabel = upperLabelRange; + } + + numUniqueClasses = maxLabel - minLabel + 1; + + dComputedOutput.resize(numUniqueClasses * numUniqueClasses, stream); + dGoldenOutput.resize(numUniqueClasses * numUniqueClasses, stream); + + // generate golden output on CPU + size_t sizeOfMat = numUniqueClasses * numUniqueClasses * sizeof(int); + std::vector hGoldenOutput(sizeOfMat, 0); + + for (int i = 0; i < numElements; i++) { + auto row = y[i] - minLabel; + auto column = y_hat[i] - minLabel; + hGoldenOutput[row * numUniqueClasses + column] += 1; + } + + raft::update_device( + dGoldenOutput.data(), hGoldenOutput.data(), numUniqueClasses * numUniqueClasses, stream); + + workspaceSz = raft::stats::getContingencyMatrixWorkspaceSize( + numElements, dY.data(), stream, minLabel, maxLabel); + pWorkspace.resize(workspaceSz, stream); + raft::interruptible::synchronize(stream); + } + + void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } + + void RunTest() + { + int numElements = params.nElements; + raft::stats::contingencyMatrix(dY.data(), + dYHat.data(), + numElements, + dComputedOutput.data(), + stream, + (void*)pWorkspace.data(), + workspaceSz, + minLabel, + maxLabel); + ASSERT_TRUE(raft::devArrMatch(dComputedOutput.data(), + dGoldenOutput.data(), + numUniqueClasses * numUniqueClasses, + raft::Compare())); + } + + ContingencyMatrixParam params; + int numUniqueClasses = -1; + T minLabel, maxLabel; + cudaStream_t stream = 0; + size_t workspaceSz; + rmm::device_uvector pWorkspace; + rmm::device_uvector dY, dYHat; + rmm::device_uvector dComputedOutput, dGoldenOutput; +}; + +const std::vector inputs = { + {10000, 1, 10, true, false, 0.000001}, + {10000, 1, 5000, true, false, 0.000001}, + {10000, 1, 10000, true, false, 0.000001}, + {10000, 1, 20000, true, false, 0.000001}, + {10000, 1, 10, false, false, 0.000001}, + {10000, 1, 5000, false, false, 0.000001}, + {10000, 1, 10000, false, false, 0.000001}, + {10000, 1, 20000, false, false, 0.000001}, + {100000, 1, 100, false, false, 0.000001}, + {1000000, 1, 1200, true, false, 0.000001}, + {1000000, 1, 10000, false, false, 0.000001}, + {100000, 1, 100, false, true, 0.000001}, +}; + +typedef ContingencyMatrixTest ContingencyMatrixTestS; +TEST_P(ContingencyMatrixTestS, Result) { RunTest(); } +INSTANTIATE_TEST_CASE_P(ContingencyMatrix, ContingencyMatrixTestS, ::testing::ValuesIn(inputs)); +} // namespace stats +} // namespace raft diff --git a/cpp/test/stats/dispersion.cu b/cpp/test/stats/dispersion.cu new file mode 100644 index 0000000000..4676059746 --- /dev/null +++ b/cpp/test/stats/dispersion.cu @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { + +template +struct DispersionInputs { + T tolerance; + int dim, clusters; + unsigned long long int seed; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const DispersionInputs& dims) +{ + return os; +} + +template +class DispersionTest : public ::testing::TestWithParam> { + protected: + DispersionTest() : exp_mean(0, stream), act_mean(0, stream) {} + + void SetUp() override + { + params = ::testing::TestWithParam>::GetParam(); + raft::random::Rng r(params.seed); + int len = params.clusters * params.dim; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + rmm::device_uvector data(len, stream); + rmm::device_uvector counts(params.clusters, stream); + exp_mean.resize(params.dim, stream); + act_mean.resize(params.dim, stream); + r.uniform(data.data(), len, (T)-1.0, (T)1.0, stream); + r.uniformInt(counts.data(), params.clusters, 1, 100, stream); + std::vector h_counts(params.clusters, 0); + raft::update_host(&(h_counts[0]), counts.data(), params.clusters, stream); + npoints = 0; + for (const auto& val : h_counts) { + npoints += val; + } + actualVal = dispersion( + data.data(), counts.data(), act_mean.data(), params.clusters, npoints, params.dim, stream); + expectedVal = T(0); + std::vector h_data(len, T(0)); + raft::update_host(&(h_data[0]), data.data(), len, stream); + std::vector mean(params.dim, T(0)); + for (int i = 0; i < params.clusters; ++i) { + for (int j = 0; j < params.dim; ++j) { + mean[j] += h_data[i * params.dim + j] * T(h_counts[i]); + } + } + for (int i = 0; i < params.dim; ++i) { + mean[i] /= T(npoints); + } + raft::update_device(exp_mean.data(), &(mean[0]), params.dim, stream); + for (int i = 0; i < params.clusters; ++i) { + for (int j = 0; j < params.dim; ++j) { + auto diff = h_data[i * params.dim + j] - mean[j]; + expectedVal += diff * diff * T(h_counts[i]); + } + } + expectedVal = sqrt(expectedVal); + raft::interruptible::synchronize(stream); + } + + void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } + + protected: + DispersionInputs params; + rmm::device_uvector exp_mean, act_mean; + cudaStream_t stream = 0; + int npoints; + T expectedVal, actualVal; +}; + +const std::vector> inputsf = { + {0.001f, 10, 1000, 1234ULL}, {0.001f, 100, 100, 1234ULL}, {0.001f, 1000, 1000, 1234ULL}}; +typedef DispersionTest DispersionTestF; +TEST_P(DispersionTestF, Result) +{ + auto eq = raft::CompareApprox(params.tolerance); + ASSERT_TRUE(devArrMatch(exp_mean.data(), act_mean.data(), params.dim, eq)); + ASSERT_TRUE(match(expectedVal, actualVal, eq)); +} +INSTANTIATE_TEST_CASE_P(DispersionTests, DispersionTestF, ::testing::ValuesIn(inputsf)); + +const std::vector> inputsd = { + {0.001, 10, 1000, 1234ULL}, {0.001, 100, 100, 1234ULL}, {0.001, 1000, 1000, 1234ULL}}; +typedef DispersionTest DispersionTestD; +TEST_P(DispersionTestD, Result) +{ + auto eq = raft::CompareApprox(params.tolerance); + ASSERT_TRUE(devArrMatch(exp_mean.data(), act_mean.data(), params.dim, eq)); + ASSERT_TRUE(match(expectedVal, actualVal, eq)); +} +INSTANTIATE_TEST_CASE_P(DispersionTests, DispersionTestD, ::testing::ValuesIn(inputsd)); + +} // end namespace stats +} // end namespace raft diff --git a/cpp/test/stats/entropy.cu b/cpp/test/stats/entropy.cu new file mode 100644 index 0000000000..46237f0b21 --- /dev/null +++ b/cpp/test/stats/entropy.cu @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { + +struct entropyParam { + int nElements; + int lowerLabelRange; + int upperLabelRange; + double tolerance; +}; + +// test fixture class +template +class entropyTest : public ::testing::TestWithParam { + protected: + // the constructor + void SetUp() override + { + // getting the parameters + params = ::testing::TestWithParam::GetParam(); + + nElements = params.nElements; + lowerLabelRange = params.lowerLabelRange; + upperLabelRange = params.upperLabelRange; + + // generating random value test input + std::vector arr1(nElements, 0); + std::random_device rd; + std::default_random_engine dre(rd()); + std::uniform_int_distribution intGenerator(lowerLabelRange, upperLabelRange); + + std::generate(arr1.begin(), arr1.end(), [&]() { return intGenerator(dre); }); + + // generating the golden output + int numUniqueClasses = upperLabelRange - lowerLabelRange + 1; + + int* p = (int*)malloc(numUniqueClasses * sizeof(int)); + memset(p, 0, numUniqueClasses * sizeof(int)); + + // calculating the bincount array + for (int i = 0; i < nElements; ++i) { + ++p[arr1[i] - lowerLabelRange]; + } + + // calculating the aggregate entropy + for (int i = 0; i < numUniqueClasses; ++i) { + if (p[i]) + truthEntropy += + -1 * (double(p[i]) / double(nElements)) * (log(double(p[i])) - log(double(nElements))); + } + + // allocating and initializing memory to the GPU + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + rmm::device_uvector clusterArray(nElements, stream); + raft::update_device(clusterArray.data(), &arr1[0], (int)nElements, stream); + + raft::interruptible::synchronize(stream); + // calling the entropy CUDA implementation + computedEntropy = raft::stats::entropy( + clusterArray.data(), nElements, lowerLabelRange, upperLabelRange, stream); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); + } + + // declaring the data values + entropyParam params; + T lowerLabelRange, upperLabelRange; + + int nElements = 0; + double truthEntropy = 0; + double computedEntropy = 0; + cudaStream_t stream = 0; +}; + +// setting test parameter values +const std::vector inputs = {{199, 1, 10, 0.000001}, + {200, 15, 100, 0.000001}, + {100, 1, 20, 0.000001}, + {10, 1, 10, 0.000001}, + {198, 1, 100, 0.000001}, + {300, 3, 99, 0.000001}, + {199, 1, 10, 0.000001}, + {200, 15, 100, 0.000001}, + {100, 1, 20, 0.000001}, + {10, 1, 10, 0.000001}, + {198, 1, 100, 0.000001}, + {300, 3, 99, 0.000001}}; + +// writing the test suite +typedef entropyTest entropyTestClass; +TEST_P(entropyTestClass, Result) { ASSERT_NEAR(computedEntropy, truthEntropy, params.tolerance); } +INSTANTIATE_TEST_CASE_P(entropy, entropyTestClass, ::testing::ValuesIn(inputs)); + +} // end namespace stats +} // end namespace raft diff --git a/cpp/test/stats/homogeneity_score.cu b/cpp/test/stats/homogeneity_score.cu new file mode 100644 index 0000000000..787b8c3a91 --- /dev/null +++ b/cpp/test/stats/homogeneity_score.cu @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { + +// parameter structure definition +struct homogeneityParam { + int nElements; + int lowerLabelRange; + int upperLabelRange; + bool sameArrays; + double tolerance; +}; + +// test fixture class +template +class homogeneityTest : public ::testing::TestWithParam { + protected: + // the constructor + void SetUp() override + { + // getting the parameters + params = ::testing::TestWithParam::GetParam(); + + nElements = params.nElements; + lowerLabelRange = params.lowerLabelRange; + upperLabelRange = params.upperLabelRange; + + // generating random value test input + std::vector arr1(nElements, 0); + std::vector arr2(nElements, 0); + std::random_device rd; + std::default_random_engine dre(rd()); + std::uniform_int_distribution intGenerator(lowerLabelRange, upperLabelRange); + + std::generate(arr1.begin(), arr1.end(), [&]() { return intGenerator(dre); }); + if (params.sameArrays) { + arr2 = arr1; + } else { + std::generate(arr2.begin(), arr2.end(), [&]() { return intGenerator(dre); }); + } + + // allocating and initializing memory to the GPU + + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + + rmm::device_uvector truthClusterArray(nElements, stream); + rmm::device_uvector predClusterArray(nElements, stream); + raft::update_device(truthClusterArray.data(), &arr1[0], (int)nElements, stream); + raft::update_device(predClusterArray.data(), &arr2[0], (int)nElements, stream); + + // calculating the golden output + double truthMI, truthEntropy; + + truthMI = raft::stats::mutual_info_score(truthClusterArray.data(), + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); + truthEntropy = raft::stats::entropy( + truthClusterArray.data(), nElements, lowerLabelRange, upperLabelRange, stream); + + if (truthEntropy) { + truthHomogeneity = truthMI / truthEntropy; + } else + truthHomogeneity = 1.0; + + if (nElements == 0) truthHomogeneity = 1.0; + + // calling the homogeneity CUDA implementation + computedHomogeneity = raft::stats::homogeneity_score(truthClusterArray.data(), + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); + } + + // declaring the data values + homogeneityParam params; + T lowerLabelRange, upperLabelRange; + int nElements = 0; + double truthHomogeneity = 0; + double computedHomogeneity = 0; + cudaStream_t stream = 0; +}; + +// setting test parameter values +const std::vector inputs = {{199, 1, 10, false, 0.000001}, + {200, 15, 100, false, 0.000001}, + {100, 1, 20, false, 0.000001}, + {10, 1, 10, false, 0.000001}, + {198, 1, 100, false, 0.000001}, + {300, 3, 99, false, 0.000001}, + {199, 1, 10, true, 0.000001}, + {200, 15, 100, true, 0.000001}, + {100, 1, 20, true, 0.000001}, + {10, 1, 10, true, 0.000001}, + {198, 1, 100, true, 0.000001}, + {300, 3, 99, true, 0.000001}}; + +// writing the test suite +typedef homogeneityTest homogeneityTestClass; +TEST_P(homogeneityTestClass, Result) +{ + ASSERT_NEAR(computedHomogeneity, truthHomogeneity, params.tolerance); +} +INSTANTIATE_TEST_CASE_P(homogeneity, homogeneityTestClass, ::testing::ValuesIn(inputs)); + +} // end namespace stats +} // end namespace raft diff --git a/cpp/test/stats/kl_divergence.cu b/cpp/test/stats/kl_divergence.cu new file mode 100644 index 0000000000..c4348d150e --- /dev/null +++ b/cpp/test/stats/kl_divergence.cu @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { + +// parameter structure definition +struct klDivergenceParam { + int nElements; + double tolerance; +}; + +// test fixture class +template +class klDivergenceTest : public ::testing::TestWithParam { + protected: + // the constructor + void SetUp() override + { + // getting the parameters + params = ::testing::TestWithParam::GetParam(); + + nElements = params.nElements; + + // generating random value test input + std::vector h_modelPDF(nElements, 0); + std::vector h_candidatePDF(nElements, 0); + std::random_device rd; + std::default_random_engine dre(rd()); + std::uniform_real_distribution realGenerator(0.0, 1.0); + + std::generate(h_modelPDF.begin(), h_modelPDF.end(), [&]() { return realGenerator(dre); }); + std::generate( + h_candidatePDF.begin(), h_candidatePDF.end(), [&]() { return realGenerator(dre); }); + + // allocating and initializing memory to the GPU + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + + rmm::device_uvector d_modelPDF(nElements, stream); + rmm::device_uvector d_candidatePDF(nElements, stream); + RAFT_CUDA_TRY(cudaMemset(d_modelPDF.data(), 0, d_modelPDF.size() * sizeof(DataT))); + RAFT_CUDA_TRY(cudaMemset(d_candidatePDF.data(), 0, d_candidatePDF.size() * sizeof(DataT))); + + raft::update_device(d_modelPDF.data(), &h_modelPDF[0], (int)nElements, stream); + raft::update_device(d_candidatePDF.data(), &h_candidatePDF[0], (int)nElements, stream); + + // generating the golden output + for (int i = 0; i < nElements; ++i) { + if (h_modelPDF[i] == 0.0) + truthklDivergence += 0; + + else + truthklDivergence += h_modelPDF[i] * log(h_modelPDF[i] / h_candidatePDF[i]); + } + + // calling the kl_divergence CUDA implementation + computedklDivergence = + raft::stats::kl_divergence(d_modelPDF.data(), d_candidatePDF.data(), nElements, stream); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); + } + + // declaring the data values + klDivergenceParam params; + int nElements = 0; + DataT truthklDivergence = 0; + DataT computedklDivergence = 0; + cudaStream_t stream = 0; +}; + +// setting test parameter values +const std::vector inputs = { + {500, 0.000001}, {200, 0.001}, {5000, 0.000001}, {500000, 0.000001} + +}; + +// writing the test suite +typedef klDivergenceTest klDivergenceTestClass; +TEST_P(klDivergenceTestClass, Result) +{ + ASSERT_NEAR(computedklDivergence, truthklDivergence, params.tolerance); +} +INSTANTIATE_TEST_CASE_P(klDivergence, klDivergenceTestClass, ::testing::ValuesIn(inputs)); + +} // end namespace stats +} // end namespace raft diff --git a/cpp/test/stats/mutual_info_score.cu b/cpp/test/stats/mutual_info_score.cu new file mode 100644 index 0000000000..0228b72184 --- /dev/null +++ b/cpp/test/stats/mutual_info_score.cu @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { + +// parameter structure definition +struct mutualInfoParam { + int nElements; + int lowerLabelRange; + int upperLabelRange; + bool sameArrays; + double tolerance; +}; + +// test fixture class +template +class mutualInfoTest : public ::testing::TestWithParam { + protected: + // the constructor + void SetUp() override + { + // getting the parameters + params = ::testing::TestWithParam::GetParam(); + + nElements = params.nElements; + lowerLabelRange = params.lowerLabelRange; + upperLabelRange = params.upperLabelRange; + + // generating random value test input + std::vector arr1(nElements, 0); + std::vector arr2(nElements, 0); + std::random_device rd; + std::default_random_engine dre(rd()); + std::uniform_int_distribution intGenerator(lowerLabelRange, upperLabelRange); + + std::generate(arr1.begin(), arr1.end(), [&]() { return intGenerator(dre); }); + if (params.sameArrays) { + arr2 = arr1; + } else { + std::generate(arr2.begin(), arr2.end(), [&]() { return intGenerator(dre); }); + } + + // generating the golden output + // calculating the contingency matrix + int numUniqueClasses = upperLabelRange - lowerLabelRange + 1; + size_t sizeOfMat = numUniqueClasses * numUniqueClasses * sizeof(int); + int* hGoldenOutput = (int*)malloc(sizeOfMat); + memset(hGoldenOutput, 0, sizeOfMat); + int i, j; + for (i = 0; i < nElements; i++) { + int row = arr1[i] - lowerLabelRange; + int column = arr2[i] - lowerLabelRange; + + hGoldenOutput[row * numUniqueClasses + column] += 1; + } + + int* a = (int*)malloc(numUniqueClasses * sizeof(int)); + int* b = (int*)malloc(numUniqueClasses * sizeof(int)); + memset(a, 0, numUniqueClasses * sizeof(int)); + memset(b, 0, numUniqueClasses * sizeof(int)); + + // and also the reducing contingency matrix along row and column + for (i = 0; i < numUniqueClasses; ++i) { + for (j = 0; j < numUniqueClasses; ++j) { + a[i] += hGoldenOutput[i * numUniqueClasses + j]; + b[i] += hGoldenOutput[j * numUniqueClasses + i]; + } + } + + // calculating the truth mutual information + for (int i = 0; i < numUniqueClasses; ++i) { + for (int j = 0; j < numUniqueClasses; ++j) { + if (a[i] * b[j] != 0 && hGoldenOutput[i * numUniqueClasses + j] != 0) { + truthmutualInfo += + (double)(hGoldenOutput[i * numUniqueClasses + j]) * + (log((double)(double(nElements) * hGoldenOutput[i * numUniqueClasses + j])) - + log((double)(a[i] * b[j]))); + } + } + } + + truthmutualInfo /= nElements; + + // allocating and initializing memory to the GPU + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + + rmm::device_uvector firstClusterArray(nElements, stream); + rmm::device_uvector secondClusterArray(nElements, stream); + RAFT_CUDA_TRY( + cudaMemsetAsync(firstClusterArray.data(), 0, firstClusterArray.size() * sizeof(T), stream)); + RAFT_CUDA_TRY( + cudaMemsetAsync(secondClusterArray.data(), 0, secondClusterArray.size() * sizeof(T), stream)); + + raft::update_device(firstClusterArray.data(), &arr1[0], (int)nElements, stream); + raft::update_device(secondClusterArray.data(), &arr2[0], (int)nElements, stream); + + // calling the mutualInfo CUDA implementation + computedmutualInfo = raft::stats::mutual_info_score(firstClusterArray.data(), + secondClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); + } + + // the destructor + void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } + + // declaring the data values + mutualInfoParam params; + T lowerLabelRange, upperLabelRange; + int nElements = 0; + double truthmutualInfo = 0; + double computedmutualInfo = 0; + cudaStream_t stream = 0; +}; + +// setting test parameter values +const std::vector inputs = {{199, 1, 10, false, 0.000001}, + {200, 15, 100, false, 0.000001}, + {100, 1, 20, false, 0.000001}, + {10, 1, 10, false, 0.000001}, + {198, 1, 100, false, 0.000001}, + {300, 3, 99, false, 0.000001}, + {199, 1, 10, true, 0.000001}, + {200, 15, 100, true, 0.000001}, + {100, 1, 20, true, 0.000001}, + {10, 1, 10, true, 0.000001}, + {198, 1, 100, true, 0.000001}, + {300, 3, 99, true, 0.000001}}; + +// writing the test suite +typedef mutualInfoTest mutualInfoTestClass; +TEST_P(mutualInfoTestClass, Result) +{ + ASSERT_NEAR(computedmutualInfo, truthmutualInfo, params.tolerance); +} +INSTANTIATE_TEST_CASE_P(mutualInfo, mutualInfoTestClass, ::testing::ValuesIn(inputs)); + +} // end namespace stats +} // end namespace raft diff --git a/cpp/test/stats/rand_index.cu b/cpp/test/stats/rand_index.cu new file mode 100644 index 0000000000..806987e216 --- /dev/null +++ b/cpp/test/stats/rand_index.cu @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" + +#include + +#include + +#include +#include +#include +#include + +namespace raft { +namespace stats { + +// parameter structure definition +struct randIndexParam { + uint64_t nElements; + int lowerLabelRange; + int upperLabelRange; + double tolerance; +}; + +// test fixture class +template +class randIndexTest : public ::testing::TestWithParam { + protected: + // the constructor + void SetUp() override + { + // getting the parameters + params = ::testing::TestWithParam::GetParam(); + + size = params.nElements; + lowerLabelRange = params.lowerLabelRange; + upperLabelRange = params.upperLabelRange; + + // generating random value test input + std::vector arr1(size, 0); + std::vector arr2(size, 0); + std::random_device rd; + std::default_random_engine dre(rd()); + std::uniform_int_distribution intGenerator(lowerLabelRange, upperLabelRange); + + std::generate(arr1.begin(), arr1.end(), [&]() { return intGenerator(dre); }); + std::generate(arr2.begin(), arr2.end(), [&]() { return intGenerator(dre); }); + + // generating the golden output + int64_t a_truth = 0; + int64_t b_truth = 0; + + for (uint64_t iter = 0; iter < size; ++iter) { + for (uint64_t jiter = 0; jiter < iter; ++jiter) { + if (arr1[iter] == arr1[jiter] && arr2[iter] == arr2[jiter]) { + ++a_truth; + } else if (arr1[iter] != arr1[jiter] && arr2[iter] != arr2[jiter]) { + ++b_truth; + } + } + } + uint64_t nChooseTwo = (size * (size - 1)) / 2; + truthRandIndex = (double)(((double)(a_truth + b_truth)) / (double)nChooseTwo); + + // allocating and initializing memory to the GPU + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + + rmm::device_uvector firstClusterArray(size, stream); + rmm::device_uvector secondClusterArray(size, stream); + RAFT_CUDA_TRY( + cudaMemsetAsync(firstClusterArray.data(), 0, firstClusterArray.size() * sizeof(T), stream)); + RAFT_CUDA_TRY( + cudaMemsetAsync(secondClusterArray.data(), 0, secondClusterArray.size() * sizeof(T), stream)); + + raft::update_device(firstClusterArray.data(), &arr1[0], (int)size, stream); + raft::update_device(secondClusterArray.data(), &arr2[0], (int)size, stream); + + // calling the rand_index CUDA implementation + computedRandIndex = raft::stats::rand_index( + firstClusterArray.data(), secondClusterArray.data(), size, stream); + } + + // the destructor + void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } + + // declaring the data values + randIndexParam params; + int lowerLabelRange = 0, upperLabelRange = 2; + uint64_t size = 0; + double truthRandIndex = 0; + double computedRandIndex = 0; + cudaStream_t stream = 0; +}; + +// setting test parameter values +const std::vector inputs = {{199, 1, 10, 0.000001}, + {200, 1, 100, 0.000001}, + {10, 1, 1200, 0.000001}, + {100, 1, 10000, 0.000001}, + {198, 1, 100, 0.000001}, + {300, 3, 99, 0.000001}, + {2, 0, 0, 0.00001}}; + +// writing the test suite +typedef randIndexTest randIndexTestClass; +TEST_P(randIndexTestClass, Result) +{ + ASSERT_NEAR(computedRandIndex, truthRandIndex, params.tolerance); +} +INSTANTIATE_TEST_CASE_P(randIndex, randIndexTestClass, ::testing::ValuesIn(inputs)); + +} // end namespace stats +} // end namespace raft diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu new file mode 100644 index 0000000000..b7cdf20484 --- /dev/null +++ b/cpp/test/stats/silhouette_score.cu @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { + +// parameter structure definition +struct silhouetteScoreParam { + int nRows; + int nCols; + int nLabels; + raft::distance::DistanceType metric; + int chunk; + double tolerance; +}; + +// test fixture class +template +class silhouetteScoreTest : public ::testing::TestWithParam { + protected: + silhouetteScoreTest() + : d_X(0, handle.get_stream()), + sampleSilScore(0, handle.get_stream()), + d_labels(0, handle.get_stream()) + { + } + + void host_silhouette_score() + { + // generating random value test input + std::vector h_X(nElements, 0.0); + std::vector h_labels(nRows, 0); + std::random_device rd; + std::default_random_engine dre(nElements * nLabels); + std::uniform_int_distribution intGenerator(0, nLabels - 1); + std::uniform_real_distribution realGenerator(0, 100); + + std::generate(h_X.begin(), h_X.end(), [&]() { return realGenerator(dre); }); + std::generate(h_labels.begin(), h_labels.end(), [&]() { return intGenerator(dre); }); + + // allocating and initializing memory to the GPU + auto stream = handle.get_stream(); + d_X.resize(nElements, stream); + d_labels.resize(nElements, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(d_X.data(), 0, d_X.size() * sizeof(DataT), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(d_labels.data(), 0, d_labels.size() * sizeof(LabelT), stream)); + sampleSilScore.resize(nElements, stream); + + raft::update_device(d_X.data(), &h_X[0], (int)nElements, stream); + raft::update_device(d_labels.data(), &h_labels[0], (int)nElements, stream); + + // finding the distance matrix + + rmm::device_uvector d_distanceMatrix(nRows * nRows, stream); + double* h_distanceMatrix = (double*)malloc(nRows * nRows * sizeof(double*)); + + raft::distance::pairwise_distance( + handle, d_X.data(), d_X.data(), d_distanceMatrix.data(), nRows, nRows, nCols, params.metric); + + handle.sync_stream(stream); + + raft::update_host(h_distanceMatrix, d_distanceMatrix.data(), nRows * nRows, stream); + + + // finding the bincount array + + double* binCountArray = (double*)malloc(nLabels * sizeof(double*)); + memset(binCountArray, 0, nLabels * sizeof(double)); + + for (int i = 0; i < nRows; ++i) { + binCountArray[h_labels[i]] += 1; + } + + // finding the average intra cluster distance for every element + + double* a = (double*)malloc(nRows * sizeof(double*)); + + for (int i = 0; i < nRows; ++i) { + int myLabel = h_labels[i]; + double sumOfIntraClusterD = 0; + + for (int j = 0; j < nRows; ++j) { + if (h_labels[j] == myLabel) { sumOfIntraClusterD += h_distanceMatrix[i * nRows + j]; } + } + + if (binCountArray[myLabel] <= 1) + a[i] = -1; + else + a[i] = sumOfIntraClusterD / (binCountArray[myLabel] - 1); + } + + // finding the average inter cluster distance for every element + + double* b = (double*)malloc(nRows * sizeof(double*)); + + for (int i = 0; i < nRows; ++i) { + int myLabel = h_labels[i]; + double minAvgInterCD = ULLONG_MAX; + + for (int j = 0; j < nLabels; ++j) { + int curClLabel = j; + if (curClLabel == myLabel) continue; + double avgInterCD = 0; + + for (int k = 0; k < nRows; ++k) { + if (h_labels[k] == curClLabel) { avgInterCD += h_distanceMatrix[i * nRows + k]; } + } + + if (binCountArray[curClLabel]) + avgInterCD /= binCountArray[curClLabel]; + else + avgInterCD = ULLONG_MAX; + minAvgInterCD = min(minAvgInterCD, avgInterCD); + } + + b[i] = minAvgInterCD; + } + + // finding the silhouette score for every element + + double* truthSampleSilScore = (double*)malloc(nRows * sizeof(double*)); + for (int i = 0; i < nRows; ++i) { + if (a[i] == -1) + truthSampleSilScore[i] = 0; + else if (a[i] == 0 && b[i] == 0) + truthSampleSilScore[i] = 0; + else + truthSampleSilScore[i] = (b[i] - a[i]) / max(a[i], b[i]); + truthSilhouetteScore += truthSampleSilScore[i]; + } + + truthSilhouetteScore /= nRows; + } + + // the constructor + void SetUp() override + { + // getting the parameters + params = ::testing::TestWithParam::GetParam(); + + nRows = params.nRows; + nCols = params.nCols; + nLabels = params.nLabels; + chunk = params.chunk; + nElements = nRows * nCols; + + host_silhouette_score(); + + // calling the silhouette_score CUDA implementation + computedSilhouetteScore = raft::stats::silhouette_score(handle, + d_X.data(), + nRows, + nCols, + d_labels.data(), + nLabels, + sampleSilScore.data(), + handle.get_stream(), + params.metric); + + batchedSilhouetteScore = raft::stats::silhouette_score_batched(handle, + d_X.data(), + nRows, + nCols, + d_labels.data(), + nLabels, + sampleSilScore.data(), + chunk, + params.metric); + } + + // declaring the data values + silhouetteScoreParam params; + int nLabels; + rmm::device_uvector d_X; + rmm::device_uvector sampleSilScore; + rmm::device_uvector d_labels; + int nRows; + int nCols; + int nElements; + double truthSilhouetteScore = 0; + double computedSilhouetteScore = 0; + double batchedSilhouetteScore = 0; + raft::handle_t handle; + int chunk; +}; + +// setting test parameter values +const std::vector inputs = { + {4, 2, 3, raft::distance::DistanceType::L2Expanded, 4, 0.00001}, + {4, 2, 2, raft::distance::DistanceType::L2SqrtUnexpanded, 2, 0.00001}, + {8, 8, 3, raft::distance::DistanceType::L2Unexpanded, 4, 0.00001}, + {11, 2, 5, raft::distance::DistanceType::L2Expanded, 3, 0.00001}, + {40, 2, 8, raft::distance::DistanceType::L2Expanded, 10, 0.00001}, + {12, 7, 3, raft::distance::DistanceType::CosineExpanded, 8, 0.00001}, + {7, 5, 5, raft::distance::DistanceType::L1, 2, 0.00001}}; + +// writing the test suite +typedef silhouetteScoreTest silhouetteScoreTestClass; +TEST_P(silhouetteScoreTestClass, Result) +{ + ASSERT_NEAR(computedSilhouetteScore, truthSilhouetteScore, params.tolerance); + ASSERT_NEAR(batchedSilhouetteScore, truthSilhouetteScore, params.tolerance); +} +INSTANTIATE_TEST_CASE_P(silhouetteScore, silhouetteScoreTestClass, ::testing::ValuesIn(inputs)); + +} // end namespace stats +} // end namespace raft diff --git a/cpp/test/stats/trustworthiness.cu b/cpp/test/stats/trustworthiness.cu new file mode 100644 index 0000000000..e554417395 --- /dev/null +++ b/cpp/test/stats/trustworthiness.cu @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { + +class TrustworthinessScoreTest : public ::testing::Test { + protected: + void basicTest() + { + std::vector X = { + 5.6142087, 8.59787, -4.382763, -3.6452143, -5.8816037, -0.6330313, 4.6920023, + -0.79210913, 0.6106314, 2.1210914, 5.919943, -8.43784, -6.4819884, 0.41001374, + -6.1052523, -4.0825715, -5.314755, -2.834671, 5.751696, -6.5012555, -0.4719201, + -7.53353, 7.6789393, -1.4959852, -5.5977287, -9.564147, 1.2902534, 3.559834, + -6.7659483, 8.265964, 4.595404, 9.133477, -6.1553917, -6.319754, -2.9039452, + 4.4150834, -3.094395, -4.426273, 9.584571, -5.64133, 6.6209483, 7.4044604, + 3.9620576, 5.639907, 10.33007, -0.8792053, 5.143776, -7.464049, 1.2448754, + -5.6300974, 5.4518576, 4.119535, 6.749645, 7.627064, -7.2298336, 1.9681473, + -6.9083176, 6.404673, 0.07186685, 9.0994835, 8.51037, -8.986389, 0.40534487, + 2.115397, 4.086756, 1.2284287, -2.6272132, 0.06527536, -9.587425, -7.206078, + 7.864875, 7.4397306, -6.9233336, -2.6643622, 3.3466153, 7.0408177, -3.6069896, + -9.971769, 4.4075623, 7.9063697, 2.559074, 4.323717, 1.6867131, -1.1576937, + -9.893141, -3.251416, -7.4889135, -4.0588717, -2.73338, -7.4852257, 3.4460473, + 9.759119, -5.4680476, -4.722435, -8.032619, -1.4598992, 4.227361, 3.135568, + 1.1950601, 1.1982028, 6.998856, -6.131138, -6.6921015, 0.5361224, -7.1213965, + -5.6104236, -7.2212887, -2.2710054, 8.544764, -6.0254574, 1.4582269, -5.5587835, + 8.031556, -0.26328218, -5.2591386, -9.262641, 2.8691363, 5.299787, -9.209455, + 8.523085, 5.180329, 10.655528, -5.7171874, -6.7739563, -3.6306462, 4.067106, + -1.5912259, -3.2345476, 8.042973, -3.6364832, 4.1242137, 9.886953, 5.4743724, + 6.3058076, 9.369645, -0.5175337, 4.9859877, -7.879498, 1.358422, -4.147944, + 3.8984218, 5.894656, 6.4903927, 8.702036, -8.023722, 2.802145, -7.748032, + 5.8461113, -0.34215945, 11.298865, 1.4107164, -9.949621, -1.6257563, -10.655836, + 2.4528909, 1.1570255, 5.170669, 2.8398793, 7.1838694, 9.088459, 2.631155, + 3.964414, 2.8769252, 0.04198391, -0.16993195, 3.6747139, -2.8377378, 6.1782537, + 10.759618, -4.5642614, -8.522967, 0.8614642, 6.623416, -1.029324, 5.5488334, + -7.804511, 2.128833, 7.9042315, 7.789576, -2.7944536, 0.72271067, -10.511495, + -0.78634536, -10.661714, 2.9376361, 1.9148129, 6.22859, 0.26264945, 8.028384, + 6.8743043, 0.9351067, 7.0690722, 4.2846055, 1.4134506, -0.18144785, 5.2778087, + -1.7140163, 9.217541, 8.602799, -2.6537218, -7.8377395, 1.1244944, 5.4540544, + -0.38506773, 3.9885726, -10.76455, 1.4440702, 9.136163, 6.664117, -5.7046547, + 8.038592, -9.229767, -0.2799413, 3.6064725, 4.187257, 1.0516582, -2.0707326, + -0.7615968, -8.561018, -3.7831352, 10.300297, 5.332594, -6.5880876, -4.2508664, + 1.7985519, 5.7226253, -4.1223383, -9.6697855, 1.4885283, 7.524974, 1.7206005, + 4.890457, 3.7264557, 0.4428284, -9.922455, -4.250455, -6.4410596, -2.107994, + -1.4109765, -6.1325397, 0.32883006, 6.0489736, 7.7257385, -8.281174, 1.0129383, + -10.792166, 8.378851, 10.802716, 9.848448, -9.188757, 1.3151443, 1.9971865, + -2.521849, 4.3268294, -7.775683, -2.2902298, 3.0824065, -7.17559, 9.6100855, + 7.3965735, -10.476525, 5.895973, -3.6974669, -7.6688933, 1.7354839, -7.4045196, + -1.7992063, -4.0394845, 5.2471714, -2.250571, 2.528036, -8.343515, -2.2374575, + -10.019771, 0.73371273, 3.1853926, 2.7994921, 2.6637669, 7.620401, 7.515571, + 0.68636256, 5.834537, 4.650282, -1.0362619, 0.4461701, 3.7870514, -4.1340904, + 7.202998, 9.736904, -3.005512, -8.920467, 1.1228397, 6.2598724, 1.2812365, + 4.5442104, -8.791537, 0.92113096, 8.464749, 8.359035, -4.3923397, 1.2252625, + -10.1986475, -1.4409319, -10.013967, 3.9071581, 1.683064, 4.877419, 1.6570637, + 9.559105, 7.3546534, 0.36635467, 5.220211, 4.6303267, 0.6601065, 0.16149978, + 3.8818731, -3.4438233, 8.42085, 8.659159, -3.0935583, -8.039611, 2.3060374, + 5.134666, 1.0458113, 6.0190983, -9.143728, 0.99048865, 9.210842, 6.670241, + -5.9614363, 0.8747396, 7.078824, 8.067469, -10.314754, 0.45977542, -9.28306, + 9.1838665, 9.318644, 7.189082, -11.092555, 1.0320464, 3.882163, 0.10953151, + 7.9029684, -6.9068265, -1.3526366, 5.3996363, -8.430931, 11.452577, 6.39663, + -11.090514, 4.6662245, -3.1268113, -8.357452, 2.2276728, -10.357126, -0.9291848, + -3.4193344, 3.1289792, -2.5030103, 6.772719, 11.457757, -4.2125936, -6.684548, + -4.7611327, 3.6960156, -2.3030636, -3.0591488, 10.452471, -4.1267314, 5.66614, + 7.501461, 5.072407, 6.636537, 8.990381, -0.2559256, 4.737867, -6.2149944, + 2.535682, -5.5484023, 5.7113924, 3.4742818, 7.9915137, 7.0052586, -7.156467, + 1.4354781, -8.286235, 5.7523417, -2.4175215, 9.678009, 0.05066403, -9.645226, + -2.2658763, -9.518178, 4.493372, 2.3232365, 2.1659086, 0.42507997, 8.360246, + 8.23535, 2.6878164, 5.236947, 3.4924245, -0.6089895, 0.8884741, 4.359464, + -4.6073823, 7.83441, 8.958755, -3.4690795, -9.182282, 1.2478025, 5.6311107, + -1.2408862, 3.6316886, -8.684654, 2.1078515, 7.2813864, 7.9265943, -3.6135032, + 0.4571511, 8.493568, 10.496853, -7.432897, 0.8625995, -9.607528, 7.2899456, + 8.83158, 8.908199, -10.300263, 1.1451302, 3.7871468, -0.97040755, 5.7664757, + -8.9688, -2.146672, 5.9641485, -6.2908535, 10.126465, 6.1553903, -12.066902, + 6.301596, -5.0419583, -8.228695, 2.4879954, -8.918582, -3.7434099, -4.1593685, + 3.7431836, -1.1704745, 0.5524103, 9.109399, 9.571567, -11.209955, 1.2462777, + -9.554555, 9.091726, 11.477966, 7.630937, -10.450911, 1.9205878, 5.358983, + -0.44546837, 6.7611346, -9.74753, -0.5939732, 3.8892255, -6.437991, 10.294727, + 5.6723895, -10.7883, 6.192348, -5.293862, -10.811491, 1.0194173, -7.074576, + -3.192368, -2.5231771, 4.2791643, -0.53309685, 0.501366, 9.636625, 7.710316, + -6.4219728, 1.0975566, -8.218886, 6.9011984, 9.873679, 8.903804, -9.316832, + 1.2404599, 4.9039655, 1.2272617, 4.541515, -5.2753224, -3.2196746, 3.1303136, + -7.285681, 9.041425, 5.6417427, -9.93667, 5.7548947, -5.113397, -8.544622, + 4.182665, -7.7709813, -3.2810235, -3.312072, 3.8900535, -2.0604856, 6.709082, + -8.461194, 1.2666026, 4.8770437, 2.6955879, 3.0340345, -1.1614609, -3.536341, + -7.090382, -5.36146, 9.072544, 6.4554095, -4.4728956, -1.88395, 3.1095037, + 8.782348, -3.316743, -8.65248, 1.6802986, 8.186188, 2.1783829, 4.931278, + 4.158475, 1.4033595, -11.320101, -3.7084908, -6.740436, -2.5555193, -1.0451177, + -6.5569925, 0.82810307, 8.505919, 8.332857, -9.488569, -0.21588463, -8.056692, + 8.493993, 7.6401625, 8.812983, -9.377281, 2.4369764, 3.1766508, 0.6300803, + 5.6666765, -7.913654, -0.42301777, 4.506412, -7.8954244, 10.904591, 5.042256, + -9.626183, 8.347351, -3.605006, -7.923387, 1.1024277, -8.705793, -2.5151258, + -2.5066147, 4.0515003, -2.060757, 6.2635093, 8.286584, -6.0509276, -6.76452, + -3.1158175, 1.6578803, -1.4608748, -1.24211, 8.151246, -4.2970877, 6.093071, + 7.4911637, 4.51018, 4.8425875, 9.211085, -2.4386222, 4.5830803, -5.6079445, + 2.3713675, -4.0707507, 3.1787417, 5.462342, 6.915912, 6.3928423, -7.2970796, + 5.0112796, -9.140893, 4.9990606, 0.38391754, 7.7088532, 1.9340848, 8.18833, + 8.16617, -9.42086, -0.3388326, -9.659727, 8.243045, 8.099073, 8.439428, + -7.038694, 2.1077902, 3.3866816, -1.9975324, 7.4972878, -7.2525196, -1.553731, + 4.08758, -6.6922374, 9.50525, 4.026735, -9.243538, 7.2740564, -3.9319072, + -6.3228955, 1.6693478, -7.923119, -3.7423058, -2.2813146, 5.3469067, -1.8285407, + 3.3118162, 8.826356, -4.4641976, -6.4751124, -9.200089, -2.519147, 4.225298, + 2.4105988, -0.4344186, 0.53441775, 5.2836394, -8.2816105, -4.996147, -1.6870759, + -7.8543897, -3.9788852, -7.0346904, -3.1289773, 7.4567637, -5.6227813, 1.0709786, + -8.866012, 8.427324, -1.1755563, -5.789216, -8.197835, 5.3342214, 6.0646234, + -6.8975716, 7.717031, 3.480355, 8.312151, -3.6645212, -3.0976524, -8.090359, + -1.9176173, 2.4257212, 1.9700835, 0.4098958, 2.1341088, 7.652741, -9.9595585, + -5.989757, 0.10119354, -7.935407, -5.792786, -5.22783, -4.318978, 5.414037, + -6.4621663, 1.670883, -6.9224787, 8.696932, -2.0214002, -6.6681314, -8.326418, + 4.9049683, 5.4442496, -6.403739, 7.5822453, 7.0972915, -9.072851, -0.23897195, + 1.7662339, 5.3096304, 1.983179, -2.222645, -0.34700772, -9.094717, -6.107907, + 9.525174, 8.1550665, -5.6940084, -4.1636486, 1.7360662, 8.528821, -3.7299833, + -9.341266, 2.608542, 9.108706, 0.7978509, 4.2488184, 2.454484, 0.9446999, + -10.106636, -3.8973773, -6.6566644, -4.5647273, -0.99837756, -6.568582, 9.324853, + -7.9020953, 2.0910501, 2.2896829, 1.6790711, 1.3159255, -3.5258796, 1.8898442, + -8.105812, -4.924962, 8.771129, 7.1202874, -5.991957, -3.4106019, 2.4450088, + 7.796387, -3.055946, -7.8971434, 1.9856719, 9.001636, 1.8511922, 3.019749, + 3.1227696, 0.4822102, -10.021213, -3.530504, -6.225959, -3.0029628, -1.7881511, + -7.3879776, 1.3925704, 9.499782, -3.7318087, -3.7074296, -7.7466836, -1.5284524, + 4.0535855, 3.112011, 0.10340207, -0.5429599, 6.67026, -9.155924, -4.924038, + 0.64248866, -10.0103655, -3.2742946, -4.850029, -3.6707063, 8.586258, -5.855605, + 4.906918, -6.7813993, 7.9938135, -2.5473144, -5.688948, -7.822478, 2.1421318, + 4.66659, -9.701272, 9.549149, 0.8998125, -8.651497, -0.56899565, -8.639817, + 2.3088377, 2.1264515, 3.2764478, 2.341989, 8.594338, 8.630639, 2.8440373, + 6.2043204, 4.433932, 0.6320018, -1.8179281, 5.09452, -1.5741565, 8.153934, + 8.744339, -3.6945698, -8.883078, 1.5329908, 5.2745943, 0.44716078, 4.8809066, + -7.9594903, 1.134374, 9.233994, 6.5528665, -4.520542, 9.477355, -8.622195, + -0.23191702, 2.0485356, 3.9379985, 1.5916302, -1.4516805, -0.0843819, -7.8554378, + -5.88308, 7.999766, 6.2572145, -5.585321, -4.0097756, 0.42382592, 6.160884, + -3.631315, -8.333449, 2.770595, 7.8495173, 3.3331623, 4.940415, 3.6207345, + -0.037517, -11.034698, -3.185103, -6.614664, -3.2177854, -2.0792234, -6.8879867, + 7.821685, -8.455084, 1.0784642, 4.0033927, 2.7343264, 2.6052725, -4.1224284, + -0.89305353, -6.8267674, -4.9715133, 8.880253, 5.6994023, -5.9695024, -4.9181266, + 1.3017995, 7.972617, -3.9452884, -10.424556, 2.4504194, 6.21529, 0.93840516, + 4.2070026, 6.159839, 0.91979957, -8.706724, -4.317946, -6.6823545, -3.0388, + -2.464262, -7.3716645, 1.3926703, 6.544412, -5.6251183, -5.122411, -8.622049, + -2.3905911, 3.9138813, 1.9779967, -0.05011125, 0.13310997, 7.229751, -9.742043, + -8.08724, 1.2426697, -7.9230795, -3.3162494, -7.129571, -3.5488048, 7.4701195, + -5.2357526, 0.5917681, -6.272206, 6.342328, -2.909731, -4.991607, -8.845513, + 3.3228495, 7.033246, -7.8180246, 8.214469, 6.3910093, 9.185153, -6.20472, + -7.713809, -3.8481297, 3.5579286, 0.7078448, -3.2893546, 7.384514, -4.448121, + 3.0104196, 9.492943, 8.024847, 4.9114385, 9.965594, -3.014036, 5.182494, + -5.8806014, 2.5312455, -5.9926524, 4.474469, 6.3717875, 6.993105, 6.493093, + -8.935534, 3.004074, -8.055647, 8.315765, -1.3026813, 8.250377, 0.02606229, + 6.8508425, 9.655665, -7.0116496, -0.41060972, -10.049198, 7.897801, 6.7791023, + 8.3362, -9.821014, 2.491157, 3.5160472, -1.6228812, 7.398063, -8.769123, + -3.1743705, 3.2827861, -6.497855, 10.831924, 5.2761307, -9.704417, 4.3817043, + -3.9841619, -8.111647, 1.1883026, -8.115312, -2.9240117, -5.8879666, 4.20928, + -0.3587938, 6.935672, -10.177582, 0.48819053, 3.1250648, 2.9306343, 3.082544, + -3.477687, -1.3768549, -7.4922366, -3.756631, 10.039836, 3.6670392, -5.9761434, + -4.4728765, 3.244255, 7.027899, -2.3806512, -10.4100685, 1.605716, 7.7953773, + 0.5408159, 1.7156523, 3.824097, -1.0604783, -10.142124, -5.246805, -6.5283823, + -4.579547, -2.42714, -6.709197, 2.7782338, 7.33353, -6.454507, -2.9929368, + -7.8362985, -2.695445, 2.4900775, 1.6682367, 0.4641757, -1.0495365, 6.9631333, + -9.291356, -8.23837, -0.34263706, -8.275113, -2.8454232, -5.0864096, -2.681942, + 7.5450225, -6.2517986, 0.06810654, -6.470652, 4.9042645, -1.8369255, -6.6937943, + -7.9625087, 2.8510258, 6.180508, -8.282598, 7.919079, 1.4897474, 6.7217417, + -4.2459426, -4.114431, -8.375707, -2.143264, 5.6972933, 1.5574739, 0.39375135, + 1.7930849, 5.1737595, -7.826241, -5.160268, -0.80433255, -7.839536, -5.2620406, + -5.4643164, -3.185536, 6.620315, -7.065227, 1.0524757, -6.125088, 5.7126627, + -1.6161644, -3.852159, -9.164279, 2.7005782, 5.946544, -8.468236, 8.2145405, + 1.1035942, 6.590157, -4.0461283, -4.8090615, -7.6702685, -2.1121511, 5.1147075, + 1.6128504, 2.0064135, 1.0544407, 6.0038295, -7.8282537, -4.801278, 0.32349443, + -8.0649805, -4.372714, -5.61336, -5.21394, 8.176595, -5.4753284, 1.7800134, + -8.267283, 7.2133374, -0.16594432, -6.317046, -9.490406, 4.1261597, 5.473317, + -7.7551675, 7.007468, 7.478628, -8.801905, 0.10975724, 3.5478222, 4.797803, + 1.3825226, -3.357369, 0.99262005, -6.94877, -5.4781394, 9.632604, 5.7492557, + -5.9014316, -3.1632116, 2.340859, 8.708098, -3.1255999, -8.848661, 4.5612836, + 8.455157, 0.73460823, 4.112301, 4.392744, -0.30759293, -6.8036823, -3.0331545, + -8.269506, -2.82415, -0.9411246, -5.993506, 2.1618164, -8.716055, -0.7432543, + -10.255819, 3.095418, 2.5131428, 4.752442, 0.9907621, 7.8279433, 7.85814, + 0.50430876, 5.2840405, 4.457291, 0.03330028, -0.40692952, 3.9244103, -2.117118, + 7.6977615, 8.759009, -4.2157164, -9.136053, 3.247858, 4.668686, 0.76162136, + 5.3833632, -9.231471, 0.44309422, 8.380872, 6.7211227, -3.091507, 2.173508, + -9.038242, -1.3666698, -9.819077, 0.37825826, 2.3898845, 4.2440815, 1.9161536, + 7.24787, 6.9124637, 1.6238527, 5.1140285, 3.1935842, 1.02845, -1.1273454, + 5.638998, -2.497932, 8.342559, 8.586319, -2.9069402, -7.6387944, 3.5975037, + 4.4115705, 0.41506064, 4.9078383, -9.68327, 1.8159529, 9.744613, 8.40622, + -4.495336, 9.244892, -8.789869, 1.3158468, 4.018167, 3.3922846, 2.652022, + -2.7495477, 0.2528986, -8.268324, -6.004913, 10.428784, 6.6580734, -5.537176, + -1.7177434, 2.7504628, 6.7735, -2.4454272, -9.998361, 2.9483433, 6.8266654, + 2.3787718, 4.472637, 2.5871701, 0.7355365, -7.7027745, -4.1879907, -7.172832, + -4.1843605, -0.03646783, -5.419406, 6.958486, 11.011111, -7.1821184, -7.956423, + -3.408451, 4.6850276, -2.348787, -4.398289, 6.9787564, -3.8324208, 5.967827, + 8.433518, 4.660108, 5.5657144, 9.964243, -1.3515275, 6.404833, -6.4805903, + 2.4379845, -6.0816774, 1.752272, 5.3771873, 6.9613523, 6.9788294, -6.3894596, + 3.7521114, -6.8034263, 6.4458385, -0.7233525, 10.512529, 4.362273, 9.231461, + -6.3382263, -7.659, -3.461823, 4.71463, 0.17817476, -3.685746, 7.2962036, + -4.6489477, 5.218017, 11.546999, 4.7218375, 6.8498397, 9.281103, -3.900459, + 6.844054, -7.0886965, -0.05019227, -8.233724, 5.5808983, 6.374517, 8.321048, + 7.969449, -7.3478637, 1.4917561, -8.003144, 4.780668, -1.1981848, 7.753739, + 2.0260844, -8.880096, -3.4258451, -7.141975, 1.9637157, 1.814725, 5.311151, + 1.4831505, 7.8483663, 7.257948, 1.395786, 6.417756, 5.376912, 0.59505713, + 0.00062552, 3.6634305, -4.159713, 7.3571978, 10.966816, -2.5419605, -8.466229, + 1.904205, 5.6338267, -0.52567476, 5.59736, -8.361799, 0.5009981, 8.460681, + 7.3891273, -3.5272243, 5.0552278, 9.921456, -7.69693, -7.286378, -1.9198836, + 3.1666567, -2.5832257, -2.2445817, 9.888111, -5.076563, 5.677401, 7.497946, + 5.662994, 5.414262, 8.566503, -2.5530663, 7.1032815, -6.0612082, 1.3419591, + -4.9595256, 4.3377542, 4.3790717, 6.793512, 8.383502, -7.1278043, 3.3240774, + -9.379446, 6.838661, -0.81241214, 8.694813, 0.79141915, 7.632467, 8.575382, + -8.533798, 0.28954387, -7.5675836, 5.8653326, 8.97235, 7.1649346, -10.575289, + 0.9359381, 5.02381, -0.5609511, 5.543464, -7.69131, -2.1792977, 2.4729247, + -6.1917787, 10.373678, 7.6549597, -8.809486, 5.5657206, -3.3169382, -8.042887, + 2.0874746, -7.079005, -3.33398, -3.6843317, 4.0172358, -2.0754814, 1.1726758, + 7.4618697, 6.9483604, -8.469206, 0.7401797, -10.318176, 8.384557, 10.5476265, + 9.146971, -9.250223, 0.6290606, 4.4941425, -0.7514017, 7.2271705, -8.309598, + -1.4761636, 4.0140634, -6.021102, 9.132852, 5.6610966, -11.249811, 8.359293, + -1.9445792, -7.7393436, -0.3931331, -8.824441, -2.5995944, -2.5714035, 4.140213, + -3.6863053, 5.517265, 9.020411, -4.9286127, -7.871219, -3.7446704, 2.5179656, + -1.4543481, -2.2703636, 7.010597, -3.6436229, 6.753862, 7.4129915, 7.1406755, + 5.653706, 9.5445175, 0.15698843, 4.761813, -7.698002, 1.6870106, -4.5410123, + 4.171763, 5.3747005, 6.341021, 7.456738, -8.231657, 2.763487, -9.208167, + 6.676799, -1.1957736, 10.062605, 4.0975976, 7.312957, -2.4981596, -2.9658387, + -8.150425, -2.1075552, 2.64375, 1.6636052, 1.1483809, 0.09276015, 5.8556347, + -7.8481026, -5.9913163, -0.02840613, -9.937289, -1.0486673, -5.2340155, -3.83912, + 7.7165728, -8.409944, 0.80863273, -6.9119215, 7.5712357, 0.36031485, -6.056131, + -8.470033, 1.8678337, 3.0121377, -7.3096333, 8.205484, 5.262654, 8.774514, + -4.7603083, -7.2096143, -4.437014, 3.6080024, -1.624254, -4.2787876, 8.880863, + -4.8984556, 5.1782074, 9.944454, 3.911282, 3.5396595, 8.867042, -1.2006199, + 5.393288, -5.6455317, 0.7829499, -4.0338907, 2.479272, 6.5080743, 8.582535, + 7.0097537, -6.9823785, 3.984318, -7.225381, 5.3135114, -1.0391048, 8.951443, + -0.70119005, -8.510742, -0.42949116, -10.9224825, 2.8176029, 1.6800792, 5.778404, + 1.7269998, 7.1975236, 7.7258267, 2.7632928, 5.3399253, 3.4650044, 0.01971426, + -1.6468811, 4.114996, -1.5110453, 6.8689218, 8.269899, -3.1568048, -7.0344677, + 1.2911975, 5.950357, 0.19028673, 4.657226, -8.199647, 2.246055, 8.989509, + 5.3101015, -4.2400866}; + + std::vector X_embedded = { + -0.41849962, -0.53906363, 0.46958843, -0.35832694, -0.23779503, -0.29751351, -0.01072748, + -0.21353109, -0.54769957, -0.55086273, 0.37093949, -0.12714292, -0.06639574, -0.36098689, + -0.13060696, -0.07362658, -1.01205945, -0.39285606, 0.2864089, -0.32031146, -0.19595343, + 0.08900568, -0.04813879, -0.06563424, -0.42655188, -0.69014251, 0.51459783, -0.1942696, + -0.07767916, -0.6119386, 0.04813685, -0.22557008, -0.56890118, -0.60293794, 0.43429622, + -0.09240723, -0.00624062, -0.25800395, -0.1886092, 0.01655941, -0.01961523, -0.14147359, + 0.41414487, -0.8512944, -0.61199242, -0.18586016, 0.14024924, -0.41635606, -0.02890144, + 0.1065347, 0.39700791, -1.14060664, -0.95313865, 0.14416681, 0.17306046, -0.53189689, + -0.98987544, -0.67918193, 0.41787854, -0.20878236, -0.06612862, 0.03502904, -0.03765266, + -0.0980606, -0.00971657, 0.29432917, 0.36575687, -1.1645509, -0.89094597, 0.03718805, + 0.2310573, -0.38345811, -0.10401925, -0.10653082, 0.38469055, -0.88302094, -0.80197543, + 0.03548668, 0.02775662, -0.54374295, 0.03379983, 0.00923623, 0.29320273, -1.05263519, + -0.93360096, 0.03778313, 0.12360487, -0.56437284, 0.0644429, 0.33432651, 0.36450726, + -1.22978747, -0.83822101, -0.18796451, 0.34888434, -0.3801491, -0.45327303, -0.59747899, + 0.39697698, -0.15616602, -0.06159166, -0.40301991, -0.11725303, -0.11913263, -0.12406619, + -0.11227967, 0.43083835, -0.90535849, -0.81646025, 0.10012121, -0.0141237, -0.63747931, + 0.04805023, 0.34190539, 0.50725192, -1.17861414, -0.74641538, -0.09333111, 0.27992678, + -0.56214809, 0.04970971, 0.36249384, 0.57705611, -1.16913795, -0.69849908, 0.10957897, + 0.27983218, -0.62088525, 0.0410459, 0.23973398, 0.40960434, -1.14183664, -0.83321381, + 0.02149482, 0.21720445, -0.49869928, -0.95655465, -0.51680422, 0.45761383, -0.08351214, + -0.12151554, 0.00819737, -0.20813803, -0.01055793, 0.25319234, 0.36154974, 0.1822421, + -1.15837133, -0.92209691, -0.0501582, 0.08535917, -0.54003763, -1.08675635, -1.04009593, + 0.09408128, 0.07009826, -0.01762833, -0.19180447, -0.18029785, -0.20342001, 0.04034991, + 0.1814747, 0.36906669, -1.13532007, -0.8852452, 0.0782818, 0.16825101, -0.50301319, + -0.29128098, -0.65341312, 0.51484352, -0.38758236, -0.22531103, -0.55021971, 0.10804344, + -0.3521522, -0.38849035, -0.74110794, 0.53761131, -0.25142813, -0.1118066, -0.47453368, + 0.06347904, -0.23796193, -1.02682328, -0.47594091, 0.39515916, -0.2782529, -0.16566519, + 0.08063579, 0.00810116, -0.06213913, -1.059654, -0.62496334, 0.53698546, -0.11806234, + 0.00356161, 0.11513405, -0.14213292, 0.04102662, -0.36622161, -0.73686272, 0.48323864, + -0.27338892, -0.14203401, -0.41736352, 0.03332564, -0.21907479, -0.06396769, 0.01831361, + 0.46263444, -1.01878166, -0.86486858, 0.17622118, -0.01249686, -0.74530888, -0.9354887, + -0.5027945, 0.38170099, -0.15547098, 0.00677824, -0.04677663, -0.13541745, 0.07253501, + -0.97933143, -0.58001202, 0.48235369, -0.18836913, -0.02430783, 0.07572441, -0.08101331, + 0.00630076, -0.16881248, -0.67989182, 0.46083611, -0.43910736, -0.29321918, -0.38735861, + 0.07669903, -0.29749861, -0.40047669, -0.56722462, 0.33168188, -0.13118173, -0.06672747, + -0.56856316, -0.26269144, -0.14236671, 0.10651901, 0.4962585, 0.38848072, -1.06653547, + -0.64079332, -0.47378591, 0.43195483, -0.04856951, -0.9840439, -0.70610428, 0.34028092, + -0.2089237, -0.05382041, 0.01625874, -0.02080803, -0.12535211, -0.04146428, -1.24533033, + 0.48944879, 0.0578458, 0.26708388, -0.90321028, 0.35377088, -0.36791429, -0.35382384, + -0.52748734, 0.42854419, -0.31744713, -0.19174226, -0.39073724, -0.03258846, -0.19978228, + -0.36185205, -0.57412046, 0.43681973, -0.25414538, -0.12904905, -0.46334973, -0.03123853, + -0.11303604, -0.87073672, -0.45441297, 0.41825858, -0.25303507, -0.21845073, 0.10248682, + -0.11045569, -0.10002795, -0.00572806, 0.16519061, 0.42651513, -1.11417019, -0.83789682, + 0.02995787, 0.16843079, -0.53874511, 0.03056994, 0.17877036, 0.49632853, -1.03276777, + -0.74778616, -0.03971953, 0.10907949, -0.67385727, -0.9523471, -0.56550741, 0.40409449, + -0.2703723, -0.10175014, 0.13605487, -0.06306008, -0.01768126, -0.4749442, -0.56964815, + 0.39389887, -0.19248079, -0.04161081, -0.38728487, -0.20341556, -0.12656988, -0.35949609, + -0.46137866, 0.28798422, -0.06603147, -0.04363992, -0.60343552, -0.23565227, -0.10242701, + -0.06792886, 0.09689897, 0.33259571, -0.98854214, -0.84444433, 0.00673901, 0.13457057, + -0.43145794, -0.51500046, -0.50821936, 0.38000089, 0.0132636, 0.0580942, -0.40157595, + -0.11967677, 0.02549113, -0.10350953, 0.22918226, 0.40411913, -1.05619383, -0.71218503, + -0.02197581, 0.26422262, -0.34765676, 0.06601537, 0.21712676, 0.34723559, -1.20982027, + -0.95646334, 0.00793948, 0.27620381, -0.43475035, -0.67326003, -0.6137197, 0.43724492, + -0.17666136, -0.06591748, -0.18937394, -0.07400128, -0.06881691, -0.5201112, -0.61088628, + 0.4225319, -0.18969463, -0.06921366, -0.33993208, -0.06990873, -0.10288513, -0.70659858, + -0.56003648, 0.46628812, -0.16090363, -0.0185108, -0.1431348, -0.1128775, -0.0078648, + -0.02323332, 0.04292452, 0.39291084, -0.94897962, -0.63863206, -0.16546988, 0.23698957, + -0.30633628}; + + raft::handle_t handle; + + cudaStream_t stream = handle.get_stream(); + + rmm::device_uvector d_X(X.size(), stream); + rmm::device_uvector d_X_embedded(X_embedded.size(), stream); + + raft::update_device(d_X.data(), X.data(), X.size(), stream); + raft::update_device(d_X_embedded.data(), X_embedded.data(), X_embedded.size(), stream); + + // euclidean test + score = trustworthiness_score( + handle, d_X.data(), d_X_embedded.data(), 50, 30, 8, 5); + } + + void SetUp() override { basicTest(); } + + void TearDown() override {} + + protected: + double score; +}; + +typedef TrustworthinessScoreTest TrustworthinessScoreTestF; +TEST_F(TrustworthinessScoreTestF, Result) { ASSERT_TRUE(0.9375 < score && score < 0.9379); } +}; // namespace stats +}; // namespace raft diff --git a/cpp/test/stats/v_measure.cu b/cpp/test/stats/v_measure.cu new file mode 100644 index 0000000000..bef3b1bc3b --- /dev/null +++ b/cpp/test/stats/v_measure.cu @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace stats { + +// parameter structure definition +struct vMeasureParam { + int nElements; + int lowerLabelRange; + int upperLabelRange; + double beta; + bool sameArrays; + double tolerance; +}; + +// test fixture class +template +class vMeasureTest : public ::testing::TestWithParam { + protected: + // the constructor + void SetUp() override + { + // getting the parameters + params = ::testing::TestWithParam::GetParam(); + + nElements = params.nElements; + lowerLabelRange = params.lowerLabelRange; + upperLabelRange = params.upperLabelRange; + + // generating random value test input + std::vector arr1(nElements, 0); + std::vector arr2(nElements, 0); + std::random_device rd; + std::default_random_engine dre(rd()); + std::uniform_int_distribution intGenerator(lowerLabelRange, upperLabelRange); + + std::generate(arr1.begin(), arr1.end(), [&]() { return intGenerator(dre); }); + if (params.sameArrays) { + arr2 = arr1; + } else { + std::generate(arr2.begin(), arr2.end(), [&]() { return intGenerator(dre); }); + } + + // allocating and initializing memory to the GPU + + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + rmm::device_uvector truthClusterArray(nElements, stream); + rmm::device_uvector predClusterArray(nElements, stream); + raft::update_device(truthClusterArray.data(), &arr1[0], (int)nElements, stream); + raft::update_device(predClusterArray.data(), &arr2[0], (int)nElements, stream); + + // calculating the golden output + double truthHomogeity, truthCompleteness; + + truthHomogeity = raft::stats::homogeneity_score(truthClusterArray.data(), + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); + truthCompleteness = raft::stats::homogeneity_score(predClusterArray.data(), + truthClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); + + if (truthCompleteness + truthHomogeity == 0.0) + truthVMeasure = 0.0; + else + truthVMeasure = ((1 + params.beta) * truthHomogeity * truthCompleteness / + (params.beta * truthHomogeity + truthCompleteness)); + // calling the v_measure CUDA implementation + computedVMeasure = raft::stats::v_measure(truthClusterArray.data(), + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream, + params.beta); + } + + // the destructor + void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } + + // declaring the data values + vMeasureParam params; + T lowerLabelRange, upperLabelRange; + int nElements = 0; + double truthVMeasure = 0; + double computedVMeasure = 0; + cudaStream_t stream = 0; +}; + +// setting test parameter values +const std::vector inputs = {{199, 1, 10, 1.0, false, 0.000001}, + {200, 15, 100, 1.0, false, 0.000001}, + {100, 1, 20, 1.0, false, 0.000001}, + {10, 1, 10, 1.0, false, 0.000001}, + {198, 1, 100, 1.0, false, 0.000001}, + {300, 3, 99, 1.0, false, 0.000001}, + {199, 1, 10, 1.0, true, 0.000001}, + {200, 15, 100, 1.0, true, 0.000001}, + {100, 1, 20, 1.0, true, 0.000001}, + {10, 1, 10, 1.0, true, 0.000001}, + {198, 1, 100, 1.0, true, 0.000001}, + {300, 3, 99, 1.0, true, 0.000001}}; + +// writing the test suite +typedef vMeasureTest vMeasureTestClass; +TEST_P(vMeasureTestClass, Result) +{ + ASSERT_NEAR(computedVMeasure, truthVMeasure, params.tolerance); +} +INSTANTIATE_TEST_CASE_P(vMeasure, vMeasureTestClass, ::testing::ValuesIn(inputs)); + +} // end namespace stats +} // end namespace raft From 39461bba723ce1b50a02cc4e0c40fc7d6953eeda Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 16 Feb 2022 12:52:08 -0500 Subject: [PATCH 02/12] Updating style --- cpp/include/raft/matrix/col_wise_sort.hpp | 59 +-- .../raft/matrix/detail/columnWiseSort.cuh | 466 +++++++++--------- cpp/include/raft/stats/accuracy.hpp | 8 +- .../raft/stats/adjusted_rand_index.hpp | 10 +- cpp/include/raft/stats/common.hpp | 1 - cpp/include/raft/stats/completeness_score.hpp | 7 +- cpp/include/raft/stats/contingency_matrix.hpp | 18 +- .../detail/batched/information_criterion.cuh | 59 ++- cpp/include/raft/stats/detail/entropy.cuh | 23 +- .../raft/stats/detail/homogeneity_score.cuh | 2 +- .../raft/stats/detail/kl_divergence.cuh | 2 - .../raft/stats/detail/mutual_info_score.cuh | 20 +- .../stats/detail/trustworthiness_score.cuh | 30 +- cpp/include/raft/stats/dispersion.hpp | 8 +- cpp/include/raft/stats/entropy.hpp | 5 +- cpp/include/raft/stats/homogeneity_score.hpp | 6 +- .../raft/stats/information_criterion.hpp | 20 +- cpp/include/raft/stats/kl_divergence.hpp | 2 +- cpp/include/raft/stats/meanvar.hpp | 3 +- cpp/include/raft/stats/mutual_info_score.hpp | 8 +- cpp/include/raft/stats/r2_score.hpp | 5 +- cpp/include/raft/stats/rand_index.hpp | 8 +- cpp/include/raft/stats/regression_metrics.hpp | 60 +-- cpp/include/raft/stats/silhouette_score.hpp | 45 +- .../raft/stats/trustworthiness_score.hpp | 6 +- cpp/include/raft/stats/v_measure.hpp | 6 +- cpp/test/stats/adjusted_rand_index.cu | 2 +- cpp/test/stats/completeness_score.cu | 24 +- cpp/test/stats/contingencyMatrix.cu | 23 +- cpp/test/stats/dispersion.cu | 2 +- cpp/test/stats/entropy.cu | 2 +- cpp/test/stats/homogeneity_score.cu | 22 +- cpp/test/stats/kl_divergence.cu | 2 +- cpp/test/stats/mutual_info_score.cu | 12 +- cpp/test/stats/rand_index.cu | 4 +- cpp/test/stats/silhouette_score.cu | 35 +- cpp/test/stats/trustworthiness.cu | 2 +- cpp/test/stats/v_measure.cu | 36 +- 38 files changed, 533 insertions(+), 520 deletions(-) diff --git a/cpp/include/raft/matrix/col_wise_sort.hpp b/cpp/include/raft/matrix/col_wise_sort.hpp index c4f8435424..5e519141c9 100644 --- a/cpp/include/raft/matrix/col_wise_sort.hpp +++ b/cpp/include/raft/matrix/col_wise_sort.hpp @@ -1,18 +1,18 @@ /* -* Copyright (c) 2019-2022, NVIDIA CORPORATION. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once @@ -22,18 +22,18 @@ namespace raft { namespace matrix { /** -* @brief sort columns within each row of row-major input matrix and return sorted indexes -* modelled as key-value sort with key being input matrix and value being index of values -* @param in: input matrix -* @param out: output value(index) matrix -* @param n_rows: number rows of input matrix -* @param n_cols: number columns of input matrix -* @param bAllocWorkspace: check returned value, if true allocate workspace passed in workspaceSize -* @param workspacePtr: pointer to workspace memory -* @param workspaceSize: Size of workspace to be allocated -* @param stream: cuda stream to execute prim on -* @param sortedKeys: Optional, output matrix for sorted keys (input) -*/ + * @brief sort columns within each row of row-major input matrix and return sorted indexes + * modelled as key-value sort with key being input matrix and value being index of values + * @param in: input matrix + * @param out: output value(index) matrix + * @param n_rows: number rows of input matrix + * @param n_cols: number columns of input matrix + * @param bAllocWorkspace: check returned value, if true allocate workspace passed in workspaceSize + * @param workspacePtr: pointer to workspace memory + * @param workspaceSize: Size of workspace to be allocated + * @param stream: cuda stream to execute prim on + * @param sortedKeys: Optional, output matrix for sorted keys (input) + */ template void sort_cols_per_row(const InType* in, OutType* out, @@ -43,9 +43,10 @@ void sort_cols_per_row(const InType* in, void* workspacePtr, size_t& workspaceSize, cudaStream_t stream, - InType* sortedKeys = nullptr) { - detail::sortColumnsPerRow(in, out, n_rows, n_columns, bAllocWorkspace, workspacePtr, workspaceSize, - stream, sortedKeys); + InType* sortedKeys = nullptr) +{ + detail::sortColumnsPerRow( + in, out, n_rows, n_columns, bAllocWorkspace, workspacePtr, workspaceSize, stream, sortedKeys); } }; // end namespace matrix }; // end namespace raft diff --git a/cpp/include/raft/matrix/detail/columnWiseSort.cuh b/cpp/include/raft/matrix/detail/columnWiseSort.cuh index f5758e3459..65febcb6d8 100644 --- a/cpp/include/raft/matrix/detail/columnWiseSort.cuh +++ b/cpp/include/raft/matrix/detail/columnWiseSort.cuh @@ -1,30 +1,30 @@ /* -* Copyright (c) 2019-2022, NVIDIA CORPORATION. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once #include -#include #include #include #include +#include #define INST_BLOCK_SORT(keyIn, keyOut, valueInOut, rows, columns, blockSize, elemPT, stream) \ -devKeyValSortColumnPerRow<<>>( \ -keyIn, keyOut, valueInOut, rows, columns, std::numeric_limits::max()) + devKeyValSortColumnPerRow<<>>( \ + keyIn, keyOut, valueInOut, rows, columns, std::numeric_limits::max()) namespace raft { namespace matrix { @@ -32,198 +32,198 @@ namespace detail { template struct TemplateChecker { -enum { + enum { IsValid = (std::is_same::value && BLOCK_SIZE <= 1024) || (std::is_same::value && BLOCK_SIZE <= 1024) || (std::is_same::value && BLOCK_SIZE <= 1024) || (std::is_same::value && BLOCK_SIZE <= 512) -}; + }; }; template struct SmemPerBlock { -typedef cub::BlockLoad - BlockLoadTypeKey; + typedef cub::BlockLoad + BlockLoadTypeKey; -typedef cub::BlockRadixSort BlockRadixSortType; + typedef cub::BlockRadixSort BlockRadixSortType; -union TempStorage { + union TempStorage { typename BlockLoadTypeKey::TempStorage keyLoad; typename BlockRadixSortType::TempStorage sort; -} tempStorage; + } tempStorage; }; template __global__ void devLayoutIdx(InType* in, int n_cols, int totalElements) { -int idx = threadIdx.x + blockDim.x * blockIdx.x; -int n = n_cols; + int idx = threadIdx.x + blockDim.x * blockIdx.x; + int n = n_cols; -if (idx < totalElements) { in[idx] = idx % n; } + if (idx < totalElements) { in[idx] = idx % n; } } template __global__ void devOffsetKernel(T* in, T value, int n_times) { -int idx = threadIdx.x + blockIdx.x * blockDim.x; -if (idx < n_times) in[idx] = idx * value; + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < n_times) in[idx] = idx * value; } // block level radix sort - can only sort as much data we can fit within shared memory template < - typename InType, - typename OutType, - int BLOCK_SIZE, - int ITEMS_PER_THREAD, - typename std::enable_if::IsValid, InType>::type* = nullptr> + typename InType, + typename OutType, + int BLOCK_SIZE, + int ITEMS_PER_THREAD, + typename std::enable_if::IsValid, InType>::type* = nullptr> __global__ void __launch_bounds__(1024, 1) devKeyValSortColumnPerRow(const InType* inputKeys, - InType* outputKeys, - OutType* inputVals, - int n_rows, - int n_cols, - InType MAX_VALUE) + InType* outputKeys, + OutType* inputVals, + int n_rows, + int n_cols, + InType MAX_VALUE) { -typedef cub::BlockLoad - BlockLoadTypeKey; + typedef cub::BlockLoad + BlockLoadTypeKey; -typedef cub::BlockRadixSort BlockRadixSortType; + typedef cub::BlockRadixSort BlockRadixSortType; -__shared__ SmemPerBlock tmpSmem; + __shared__ SmemPerBlock tmpSmem; -InType threadKeys[ITEMS_PER_THREAD]; -OutType threadValues[ITEMS_PER_THREAD]; + InType threadKeys[ITEMS_PER_THREAD]; + OutType threadValues[ITEMS_PER_THREAD]; -int blockOffset = blockIdx.x * n_cols; -BlockLoadTypeKey(tmpSmem.tempStorage.keyLoad) - .Load(inputKeys + blockOffset, threadKeys, n_cols, MAX_VALUE); + int blockOffset = blockIdx.x * n_cols; + BlockLoadTypeKey(tmpSmem.tempStorage.keyLoad) + .Load(inputKeys + blockOffset, threadKeys, n_cols, MAX_VALUE); -OutType idxBase = threadIdx.x * ITEMS_PER_THREAD; -for (int i = 0; i < ITEMS_PER_THREAD; i++) { + OutType idxBase = threadIdx.x * ITEMS_PER_THREAD; + for (int i = 0; i < ITEMS_PER_THREAD; i++) { OutType eId = idxBase + (OutType)i; if (eId < n_cols) - threadValues[i] = eId; + threadValues[i] = eId; else - threadValues[i] = MAX_VALUE; -} + threadValues[i] = MAX_VALUE; + } -__syncthreads(); + __syncthreads(); -BlockRadixSortType(tmpSmem.tempStorage.sort).SortBlockedToStriped(threadKeys, threadValues); + BlockRadixSortType(tmpSmem.tempStorage.sort).SortBlockedToStriped(threadKeys, threadValues); -// storing index values back (not keys) -cub::StoreDirectStriped(threadIdx.x, inputVals + blockOffset, threadValues, n_cols); + // storing index values back (not keys) + cub::StoreDirectStriped(threadIdx.x, inputVals + blockOffset, threadValues, n_cols); -if (outputKeys) { + if (outputKeys) { cub::StoreDirectStriped(threadIdx.x, outputKeys + blockOffset, threadKeys, n_cols); -} + } } template < - typename InType, - typename OutType, - int BLOCK_SIZE, - int ITEMS_PER_THREAD, - typename std::enable_if::IsValid), InType>::type* = nullptr> + typename InType, + typename OutType, + int BLOCK_SIZE, + int ITEMS_PER_THREAD, + typename std::enable_if::IsValid), InType>::type* = nullptr> __global__ void devKeyValSortColumnPerRow(const InType* inputKeys, - InType* outputKeys, - OutType* inputVals, - int n_rows, - int n_cols, - InType MAX_VALUE) + InType* outputKeys, + OutType* inputVals, + int n_rows, + int n_cols, + InType MAX_VALUE) { -// place holder function -// so that compiler unrolls for all template types successfully + // place holder function + // so that compiler unrolls for all template types successfully } // helper function to layout values (index's) for key-value sort template cudaError_t layoutIdx(OutType* in, int n_rows, int n_columns, cudaStream_t stream) { -int totalElements = n_rows * n_columns; -dim3 block(256); -dim3 grid((totalElements + block.x - 1) / block.x); -devLayoutIdx<<>>(in, n_columns, totalElements); -return cudaGetLastError(); + int totalElements = n_rows * n_columns; + dim3 block(256); + dim3 grid((totalElements + block.x - 1) / block.x); + devLayoutIdx<<>>(in, n_columns, totalElements); + return cudaGetLastError(); } // helper function to layout offsets for rows for DeviceSegmentedRadixSort template cudaError_t layoutSortOffset(T* in, T value, int n_times, cudaStream_t stream) { -dim3 block(128); -dim3 grid((n_times + block.x - 1) / block.x); -devOffsetKernel<<>>(in, value, n_times); -return cudaGetLastError(); + dim3 block(128); + dim3 grid((n_times + block.x - 1) / block.x); + devOffsetKernel<<>>(in, value, n_times); + return cudaGetLastError(); } /** -* @brief sort columns within each row of row-major input matrix and return sorted indexes -* modelled as key-value sort with key being input matrix and value being index of values -* @param in: input matrix -* @param out: output value(index) matrix -* @param n_rows: number rows of input matrix -* @param n_cols: number columns of input matrix -* @param bAllocWorkspace: check returned value, if true allocate workspace passed in workspaceSize -* @param workspacePtr: pointer to workspace memory -* @param workspaceSize: Size of workspace to be allocated -* @param stream: cuda stream to execute prim on -* @param sortedKeys: Optional, output matrix for sorted keys (input) -*/ + * @brief sort columns within each row of row-major input matrix and return sorted indexes + * modelled as key-value sort with key being input matrix and value being index of values + * @param in: input matrix + * @param out: output value(index) matrix + * @param n_rows: number rows of input matrix + * @param n_cols: number columns of input matrix + * @param bAllocWorkspace: check returned value, if true allocate workspace passed in workspaceSize + * @param workspacePtr: pointer to workspace memory + * @param workspaceSize: Size of workspace to be allocated + * @param stream: cuda stream to execute prim on + * @param sortedKeys: Optional, output matrix for sorted keys (input) + */ template void sortColumnsPerRow(const InType* in, - OutType* out, - int n_rows, - int n_columns, - bool& bAllocWorkspace, - void* workspacePtr, - size_t& workspaceSize, - cudaStream_t stream, - InType* sortedKeys = nullptr) + OutType* out, + int n_rows, + int n_columns, + bool& bAllocWorkspace, + void* workspacePtr, + size_t& workspaceSize, + cudaStream_t stream, + InType* sortedKeys = nullptr) { -// assume non-square row-major matrices -// current use-case: KNN, trustworthiness scores -// output : either sorted indices or sorted indices and input values -// future : this prim can be modified to be more generic and serve as a way to sort column entries -// per row -// i.e. another output format: sorted values only - -int totalElements = n_rows * n_columns; -size_t perElementSmemUsage = sizeof(InType) + sizeof(OutType); -size_t memAlignWidth = 256; - -// @ToDo: Figure out dynamic shared memory for block sort kernel - better for volta and beyond -// int currDevice = 0, smemLimit = 0; -// RAFT_CUDA_TRY(cudaGetDevice(&currDevice)); -// RAFT_CUDA_TRY(cudaDeviceGetAttribute(&smemLimit, cudaDevAttrMaxSharedMemoryPerBlock, -// currDevice)); size_t maxElementsForBlockSort = smemLimit / perElementSmemUsage; - -// for 48KB smem/block, can fit in 6144 4byte key-value pair -// assuming key-value sort for now - smem computation will change for value only sort -// dtype being size of key-value pair -std::map dtypeToColumnMap = {{4, 12288}, // short + short - {8, 12288}, // float/int + int/float - {12, 6144}, // double + int/float - {16, 6144}}; // double + double - -if (dtypeToColumnMap.count(perElementSmemUsage) != 0 && - n_columns <= dtypeToColumnMap[perElementSmemUsage]) { + // assume non-square row-major matrices + // current use-case: KNN, trustworthiness scores + // output : either sorted indices or sorted indices and input values + // future : this prim can be modified to be more generic and serve as a way to sort column entries + // per row + // i.e. another output format: sorted values only + + int totalElements = n_rows * n_columns; + size_t perElementSmemUsage = sizeof(InType) + sizeof(OutType); + size_t memAlignWidth = 256; + + // @ToDo: Figure out dynamic shared memory for block sort kernel - better for volta and beyond + // int currDevice = 0, smemLimit = 0; + // RAFT_CUDA_TRY(cudaGetDevice(&currDevice)); + // RAFT_CUDA_TRY(cudaDeviceGetAttribute(&smemLimit, cudaDevAttrMaxSharedMemoryPerBlock, + // currDevice)); size_t maxElementsForBlockSort = smemLimit / perElementSmemUsage; + + // for 48KB smem/block, can fit in 6144 4byte key-value pair + // assuming key-value sort for now - smem computation will change for value only sort + // dtype being size of key-value pair + std::map dtypeToColumnMap = {{4, 12288}, // short + short + {8, 12288}, // float/int + int/float + {12, 6144}, // double + int/float + {16, 6144}}; // double + double + + if (dtypeToColumnMap.count(perElementSmemUsage) != 0 && + n_columns <= dtypeToColumnMap[perElementSmemUsage]) { // more elements per thread --> more register pressure // 512(blockSize) * 8 elements per thread = 71 register / thread // instantiate some kernel combinations if (n_columns <= 512) - INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 128, 4, stream); + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 128, 4, stream); else if (n_columns > 512 && n_columns <= 1024) - INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 128, 8, stream); + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 128, 8, stream); else if (n_columns > 1024 && n_columns <= 3072) - INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 512, 6, stream); + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 512, 6, stream); else if (n_columns > 3072 && n_columns <= 4096) - INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 512, 8, stream); + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 512, 8, stream); else if (n_columns > 4096 && n_columns <= 6144) - INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 512, 12, stream); + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 512, 12, stream); else - INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 1024, 12, stream); -} else if (n_columns <= (1 << 18) && n_rows > 1) { + INST_BLOCK_SORT(in, sortedKeys, out, n_rows, n_columns, 1024, 12, stream); + } else if (n_columns <= (1 << 18) && n_rows > 1) { // device Segmented radix sort // 2^18 column cap to restrict size of workspace ~512 MB // will give better perf than below deviceWide Sort for even larger dims @@ -232,114 +232,114 @@ if (dtypeToColumnMap.count(perElementSmemUsage) != 0 && // need auxillary storage: cub sorting + keys (if user not passing) + // staging for values out + segment partition if (workspaceSize == 0 || !workspacePtr) { - OutType* tmpValIn = nullptr; - int* tmpOffsetBuffer = nullptr; - - // first call is to get size of workspace - RAFT_CUDA_TRY(cub::DeviceSegmentedRadixSort::SortPairs(workspacePtr, - workspaceSize, - in, - sortedKeys, - tmpValIn, - out, - totalElements, - numSegments, - tmpOffsetBuffer, - tmpOffsetBuffer + 1)); - bAllocWorkspace = true; - // more staging space for temp output of keys - if (!sortedKeys) - workspaceSize += raft::alignTo(sizeof(InType) * (size_t)totalElements, memAlignWidth); - - // value in KV pair need to be passed in, out buffer is separate - workspaceSize += raft::alignTo(sizeof(OutType) * (size_t)totalElements, memAlignWidth); - - // for segment offsets - workspaceSize += raft::alignTo(sizeof(int) * (size_t)numSegments, memAlignWidth); + OutType* tmpValIn = nullptr; + int* tmpOffsetBuffer = nullptr; + + // first call is to get size of workspace + RAFT_CUDA_TRY(cub::DeviceSegmentedRadixSort::SortPairs(workspacePtr, + workspaceSize, + in, + sortedKeys, + tmpValIn, + out, + totalElements, + numSegments, + tmpOffsetBuffer, + tmpOffsetBuffer + 1)); + bAllocWorkspace = true; + // more staging space for temp output of keys + if (!sortedKeys) + workspaceSize += raft::alignTo(sizeof(InType) * (size_t)totalElements, memAlignWidth); + + // value in KV pair need to be passed in, out buffer is separate + workspaceSize += raft::alignTo(sizeof(OutType) * (size_t)totalElements, memAlignWidth); + + // for segment offsets + workspaceSize += raft::alignTo(sizeof(int) * (size_t)numSegments, memAlignWidth); } else { - size_t workspaceOffset = 0; - - if (!sortedKeys) { - sortedKeys = reinterpret_cast(workspacePtr); - workspaceOffset = raft::alignTo(sizeof(InType) * (size_t)totalElements, memAlignWidth); - workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); - } - - OutType* dValuesIn = reinterpret_cast(workspacePtr); - workspaceOffset = raft::alignTo(sizeof(OutType) * (size_t)totalElements, memAlignWidth); - workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); - - int* dSegmentOffsets = reinterpret_cast(workspacePtr); - workspaceOffset = raft::alignTo(sizeof(int) * (size_t)numSegments, memAlignWidth); - workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); - - // layout idx - RAFT_CUDA_TRY(layoutIdx(dValuesIn, n_rows, n_columns, stream)); - - // layout segment lengths - spread out column length - RAFT_CUDA_TRY(layoutSortOffset(dSegmentOffsets, n_columns, numSegments, stream)); - - RAFT_CUDA_TRY(cub::DeviceSegmentedRadixSort::SortPairs(workspacePtr, - workspaceSize, - in, - sortedKeys, - dValuesIn, - out, - totalElements, - numSegments, - dSegmentOffsets, - dSegmentOffsets + 1, - 0, - sizeof(InType) * 8, - stream)); + size_t workspaceOffset = 0; + + if (!sortedKeys) { + sortedKeys = reinterpret_cast(workspacePtr); + workspaceOffset = raft::alignTo(sizeof(InType) * (size_t)totalElements, memAlignWidth); + workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); + } + + OutType* dValuesIn = reinterpret_cast(workspacePtr); + workspaceOffset = raft::alignTo(sizeof(OutType) * (size_t)totalElements, memAlignWidth); + workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); + + int* dSegmentOffsets = reinterpret_cast(workspacePtr); + workspaceOffset = raft::alignTo(sizeof(int) * (size_t)numSegments, memAlignWidth); + workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); + + // layout idx + RAFT_CUDA_TRY(layoutIdx(dValuesIn, n_rows, n_columns, stream)); + + // layout segment lengths - spread out column length + RAFT_CUDA_TRY(layoutSortOffset(dSegmentOffsets, n_columns, numSegments, stream)); + + RAFT_CUDA_TRY(cub::DeviceSegmentedRadixSort::SortPairs(workspacePtr, + workspaceSize, + in, + sortedKeys, + dValuesIn, + out, + totalElements, + numSegments, + dSegmentOffsets, + dSegmentOffsets + 1, + 0, + sizeof(InType) * 8, + stream)); } -} else { + } else { // batched per row device wide sort if (workspaceSize == 0 || !workspacePtr) { - OutType* tmpValIn = nullptr; + OutType* tmpValIn = nullptr; - // first call is to get size of workspace - RAFT_CUDA_TRY(cub::DeviceRadixSort::SortPairs( - workspacePtr, workspaceSize, in, sortedKeys, tmpValIn, out, n_columns)); - bAllocWorkspace = true; + // first call is to get size of workspace + RAFT_CUDA_TRY(cub::DeviceRadixSort::SortPairs( + workspacePtr, workspaceSize, in, sortedKeys, tmpValIn, out, n_columns)); + bAllocWorkspace = true; - if (!sortedKeys) - workspaceSize += raft::alignTo(sizeof(InType) * (size_t)n_columns, memAlignWidth); + if (!sortedKeys) + workspaceSize += raft::alignTo(sizeof(InType) * (size_t)n_columns, memAlignWidth); - workspaceSize += raft::alignTo(sizeof(OutType) * (size_t)n_columns, memAlignWidth); + workspaceSize += raft::alignTo(sizeof(OutType) * (size_t)n_columns, memAlignWidth); } else { - size_t workspaceOffset = 0; - bool userKeyOutputBuffer = true; - - if (!sortedKeys) { - userKeyOutputBuffer = false; - sortedKeys = reinterpret_cast(workspacePtr); - workspaceOffset = raft::alignTo(sizeof(InType) * (size_t)n_columns, memAlignWidth); - workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); - } - - OutType* dValuesIn = reinterpret_cast(workspacePtr); - workspaceOffset = raft::alignTo(sizeof(OutType) * (size_t)n_columns, memAlignWidth); - workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); - - // layout idx - RAFT_CUDA_TRY(layoutIdx(dValuesIn, 1, n_columns, stream)); - - for (int i = 0; i < n_rows; i++) { - InType* rowIn = - reinterpret_cast((size_t)in + (i * sizeof(InType) * (size_t)n_columns)); - OutType* rowOut = - reinterpret_cast((size_t)out + (i * sizeof(OutType) * (size_t)n_columns)); - - RAFT_CUDA_TRY(cub::DeviceRadixSort::SortPairs( - workspacePtr, workspaceSize, rowIn, sortedKeys, dValuesIn, rowOut, n_columns)); - - if (userKeyOutputBuffer) - sortedKeys = - reinterpret_cast((size_t)sortedKeys + sizeof(InType) * (size_t)n_columns); - } + size_t workspaceOffset = 0; + bool userKeyOutputBuffer = true; + + if (!sortedKeys) { + userKeyOutputBuffer = false; + sortedKeys = reinterpret_cast(workspacePtr); + workspaceOffset = raft::alignTo(sizeof(InType) * (size_t)n_columns, memAlignWidth); + workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); + } + + OutType* dValuesIn = reinterpret_cast(workspacePtr); + workspaceOffset = raft::alignTo(sizeof(OutType) * (size_t)n_columns, memAlignWidth); + workspacePtr = (void*)((size_t)workspacePtr + workspaceOffset); + + // layout idx + RAFT_CUDA_TRY(layoutIdx(dValuesIn, 1, n_columns, stream)); + + for (int i = 0; i < n_rows; i++) { + InType* rowIn = + reinterpret_cast((size_t)in + (i * sizeof(InType) * (size_t)n_columns)); + OutType* rowOut = + reinterpret_cast((size_t)out + (i * sizeof(OutType) * (size_t)n_columns)); + + RAFT_CUDA_TRY(cub::DeviceRadixSort::SortPairs( + workspacePtr, workspaceSize, rowIn, sortedKeys, dValuesIn, rowOut, n_columns)); + + if (userKeyOutputBuffer) + sortedKeys = + reinterpret_cast((size_t)sortedKeys + sizeof(InType) * (size_t)n_columns); + } } -} + } } }; // end namespace detail }; // end namespace matrix diff --git a/cpp/include/raft/stats/accuracy.hpp b/cpp/include/raft/stats/accuracy.hpp index 2d4d92131d..043d2c0d0b 100644 --- a/cpp/include/raft/stats/accuracy.hpp +++ b/cpp/include/raft/stats/accuracy.hpp @@ -31,11 +31,9 @@ namespace stats { * @return: Accuracy score in [0, 1]; higher is better. */ template -float accuracy(const math_t* predictions, - const math_t* ref_predictions, - int n, - cudaStream_t stream) { - return detail::accuracy_score(predictions, ref_predictions, n, stream); +float accuracy(const math_t* predictions, const math_t* ref_predictions, int n, cudaStream_t stream) +{ + return detail::accuracy_score(predictions, ref_predictions, n, stream); } } // namespace stats diff --git a/cpp/include/raft/stats/adjusted_rand_index.hpp b/cpp/include/raft/stats/adjusted_rand_index.hpp index be1ee54ad3..7b5d862e2c 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.hpp +++ b/cpp/include/raft/stats/adjusted_rand_index.hpp @@ -27,7 +27,6 @@ namespace raft { namespace stats { - /** * @brief Function to calculate Adjusted RandIndex as described * here @@ -40,10 +39,11 @@ namespace stats { */ template double adjusted_rand_index(const T* firstClusterArray, - const T* secondClusterArray, - int size, - cudaStream_t stream) { - return detail::compute_adjusted_rand_index(firstClusterArray, secondClusterArray, size, stream); + const T* secondClusterArray, + int size, + cudaStream_t stream) +{ + return detail::compute_adjusted_rand_index(firstClusterArray, secondClusterArray, size, stream); } }; // end namespace stats diff --git a/cpp/include/raft/stats/common.hpp b/cpp/include/raft/stats/common.hpp index 0545345698..da3f44a0fa 100644 --- a/cpp/include/raft/stats/common.hpp +++ b/cpp/include/raft/stats/common.hpp @@ -67,6 +67,5 @@ enum HistType { /// Supported types of information criteria enum IC_Type { AIC, AICc, BIC }; - }; // end namespace stats }; // end namespace raft diff --git a/cpp/include/raft/stats/completeness_score.hpp b/cpp/include/raft/stats/completeness_score.hpp index 96cd217259..ee8598bcc4 100644 --- a/cpp/include/raft/stats/completeness_score.hpp +++ b/cpp/include/raft/stats/completeness_score.hpp @@ -37,9 +37,10 @@ double completeness_score(const T* truthClusterArray, int size, T lowerLabelRange, T upperLabelRange, - cudaStream_t stream) { - return detail::completeness_score(truthClusterArray, predClusterArray, size, - lowerLabelRange, upperLabelRange, stream); + cudaStream_t stream) +{ + return detail::completeness_score( + truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); } }; // end namespace stats diff --git a/cpp/include/raft/stats/contingency_matrix.hpp b/cpp/include/raft/stats/contingency_matrix.hpp index c1e8d5ff2d..153cb088ad 100644 --- a/cpp/include/raft/stats/contingency_matrix.hpp +++ b/cpp/include/raft/stats/contingency_matrix.hpp @@ -21,7 +21,6 @@ namespace raft { namespace stats { - /** * @brief use this to allocate output matrix size * size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int) @@ -33,9 +32,9 @@ namespace stats { */ template void getInputClassCardinality( - const T* groundTruth, const int nSamples, cudaStream_t stream, T& minLabel, T& maxLabel) + const T* groundTruth, const int nSamples, cudaStream_t stream, T& minLabel, T& maxLabel) { - detail::getInputClassCardinality(groundTruth, nSamples, stream, minLabel, maxLabel); + detail::getInputClassCardinality(groundTruth, nSamples, stream, minLabel, maxLabel); } /** @@ -55,7 +54,8 @@ size_t getContingencyMatrixWorkspaceSize(int nSamples, T minLabel = std::numeric_limits::max(), T maxLabel = std::numeric_limits::max()) { - return detail::getContingencyMatrixWorkspaceSize(nSamples, groundTruth, stream, minLabel, maxLabel); + return detail::getContingencyMatrixWorkspaceSize( + nSamples, groundTruth, stream, minLabel, maxLabel); } /** @@ -86,7 +86,15 @@ void contingencyMatrix(const T* groundTruth, T minLabel = std::numeric_limits::max(), T maxLabel = std::numeric_limits::max()) { - detail::contingencyMatrix(groundTruth, predictedLabel, nSamples, outMat, stream, workspace, workspaceSize, minLabel, maxLabel); + detail::contingencyMatrix(groundTruth, + predictedLabel, + nSamples, + outMat, + stream, + workspace, + workspaceSize, + minLabel, + maxLabel); } }; // namespace stats diff --git a/cpp/include/raft/stats/detail/batched/information_criterion.cuh b/cpp/include/raft/stats/detail/batched/information_criterion.cuh index 3cc9b09403..a6d8d174b0 100644 --- a/cpp/include/raft/stats/detail/batched/information_criterion.cuh +++ b/cpp/include/raft/stats/detail/batched/information_criterion.cuh @@ -15,8 +15,8 @@ */ #pragma once -#include #include +#include #include @@ -40,36 +40,33 @@ namespace detail { * @param[in] n_samples Number of samples in each series * @param[in] stream CUDA stream */ - template - void information_criterion(ScalarT *d_ic, - const ScalarT *d_loglikelihood, - IC_Type ic_type, - IdxT n_params, - IdxT batch_size, - IdxT n_samples, - cudaStream_t stream) { - ScalarT ic_base{}; - ScalarT N = static_cast(n_params); - ScalarT T = static_cast(n_samples); - switch (ic_type) { - case AIC: - ic_base = (ScalarT) 2.0 * N; - break; - case AICc: - ic_base = (ScalarT) 2.0 * (N + (N * (N + (ScalarT) 1.0)) / (T - N - (ScalarT) 1.0)); - break; - case BIC: - ic_base = std::log(T) * N; - break; - } - /* Compute information criterion from log-likelihood and base term */ - raft::linalg::unaryOp( - d_ic, - d_loglikelihood, - batch_size, - [=] __device__(ScalarT loglike) { return ic_base - (ScalarT) 2.0 * loglike; }, - stream); - } +template +void information_criterion(ScalarT* d_ic, + const ScalarT* d_loglikelihood, + IC_Type ic_type, + IdxT n_params, + IdxT batch_size, + IdxT n_samples, + cudaStream_t stream) +{ + ScalarT ic_base{}; + ScalarT N = static_cast(n_params); + ScalarT T = static_cast(n_samples); + switch (ic_type) { + case AIC: ic_base = (ScalarT)2.0 * N; break; + case AICc: + ic_base = (ScalarT)2.0 * (N + (N * (N + (ScalarT)1.0)) / (T - N - (ScalarT)1.0)); + break; + case BIC: ic_base = std::log(T) * N; break; + } + /* Compute information criterion from log-likelihood and base term */ + raft::linalg::unaryOp( + d_ic, + d_loglikelihood, + batch_size, + [=] __device__(ScalarT loglike) { return ic_base - (ScalarT)2.0 * loglike; }, + stream); +} } // namespace detail } // namespace batched diff --git a/cpp/include/raft/stats/detail/entropy.cuh b/cpp/include/raft/stats/detail/entropy.cuh index ce57a86693..3eed86f705 100644 --- a/cpp/include/raft/stats/detail/entropy.cuh +++ b/cpp/include/raft/stats/detail/entropy.cuh @@ -33,24 +33,23 @@ namespace raft { namespace stats { namespace detail { - /** * @brief Lambda to calculate the entropy of a sample given its probability value * * @param p: the input to the functional mapping * @param q: dummy param */ - struct entropyOp { - HDI double operator()(double p, double q) - { - if (p) - return -1 * (p) * (log(p)); - else - return 0.0; - } - }; - - /** +struct entropyOp { + HDI double operator()(double p, double q) + { + if (p) + return -1 * (p) * (log(p)); + else + return 0.0; + } +}; + +/** * @brief function to calculate the bincounts of number of samples in every label * * @tparam LabelT: type of the labels diff --git a/cpp/include/raft/stats/detail/homogeneity_score.cuh b/cpp/include/raft/stats/detail/homogeneity_score.cuh index 23198f910d..b91175fe0f 100644 --- a/cpp/include/raft/stats/detail/homogeneity_score.cuh +++ b/cpp/include/raft/stats/detail/homogeneity_score.cuh @@ -22,9 +22,9 @@ #pragma once +#include #include #include -#include namespace raft { namespace stats { diff --git a/cpp/include/raft/stats/detail/kl_divergence.cuh b/cpp/include/raft/stats/detail/kl_divergence.cuh index 0638565207..117dfd07fc 100644 --- a/cpp/include/raft/stats/detail/kl_divergence.cuh +++ b/cpp/include/raft/stats/detail/kl_divergence.cuh @@ -50,8 +50,6 @@ struct KLDOp { } }; - - /** * @brief Function to calculate KL Divergence * more info on KL diff --git a/cpp/include/raft/stats/detail/mutual_info_score.cuh b/cpp/include/raft/stats/detail/mutual_info_score.cuh index c39001dc38..b1349d6379 100644 --- a/cpp/include/raft/stats/detail/mutual_info_score.cuh +++ b/cpp/include/raft/stats/detail/mutual_info_score.cuh @@ -25,14 +25,14 @@ */ #pragma once -#include #include #include #include #include +#include #include +#include #include -#include #include namespace raft { @@ -119,14 +119,14 @@ double mutual_info_score(const T* firstClusterArray, // calculating the contingency matrix raft::stats::contingencyMatrix(firstClusterArray, - secondClusterArray, - (int)size, - (int*)dContingencyMatrix.data(), - stream, - (void*)pWorkspace.data(), - workspaceSz, - lowerLabelRange, - upperLabelRange); + secondClusterArray, + (int)size, + (int*)dContingencyMatrix.data(), + stream, + (void*)pWorkspace.data(), + workspaceSz, + lowerLabelRange, + upperLabelRange); // creating device buffers for all the parameters involved in ARI calculation // device variables diff --git a/cpp/include/raft/stats/detail/trustworthiness_score.cuh b/cpp/include/raft/stats/detail/trustworthiness_score.cuh index b2d92df755..04ae0228d6 100644 --- a/cpp/include/raft/stats/detail/trustworthiness_score.cuh +++ b/cpp/include/raft/stats/detail/trustworthiness_score.cuh @@ -15,10 +15,10 @@ */ #include +#include #include #include #include -#include #define N_THREADS 512 @@ -165,25 +165,25 @@ double trustworthiness_score(const raft::handle_t& h, bool bAllocWorkspace = false; raft::matrix::sort_cols_per_row(X_dist.data(), - X_ind.data(), - curBatchSize, - n, - bAllocWorkspace, - nullptr, - colSortWorkspaceSize, - stream); + X_ind.data(), + curBatchSize, + n, + bAllocWorkspace, + nullptr, + colSortWorkspaceSize, + stream); if (bAllocWorkspace) { rmm::device_uvector sortColsWorkspace(colSortWorkspaceSize, stream); raft::matrix::sort_cols_per_row(X_dist.data(), - X_ind.data(), - curBatchSize, - n, - bAllocWorkspace, - sortColsWorkspace.data(), - colSortWorkspaceSize, - stream); + X_ind.data(), + curBatchSize, + n, + bAllocWorkspace, + sortColsWorkspace.data(), + colSortWorkspaceSize, + stream); } int work = curBatchSize * n; diff --git a/cpp/include/raft/stats/dispersion.hpp b/cpp/include/raft/stats/dispersion.hpp index b50b196157..381f210d85 100644 --- a/cpp/include/raft/stats/dispersion.hpp +++ b/cpp/include/raft/stats/dispersion.hpp @@ -19,7 +19,7 @@ #include namespace raft { - namespace stats { +namespace stats { /** * @brief Compute cluster dispersion metric. This is very useful for @@ -46,8 +46,10 @@ DataT dispersion(const DataT* centroids, IdxT nClusters, IdxT nPoints, IdxT dim, - cudaStream_t stream) { - return detail::dispersion(centroids, clusterSizes, globalCentroid, nClusters, nPoints, dim, stream); + cudaStream_t stream) +{ + return detail::dispersion( + centroids, clusterSizes, globalCentroid, nClusters, nPoints, dim, stream); } } // end namespace stats diff --git a/cpp/include/raft/stats/entropy.hpp b/cpp/include/raft/stats/entropy.hpp index 5c2710fc6a..c1f15cb0fe 100644 --- a/cpp/include/raft/stats/entropy.hpp +++ b/cpp/include/raft/stats/entropy.hpp @@ -36,8 +36,9 @@ double entropy(const T* clusterArray, const int size, const T lowerLabelRange, const T upperLabelRange, - cudaStream_t stream) { - return detail::entropy(clusterArray, size, lowerLabelRange, upperLabelRange, stream); + cudaStream_t stream) +{ + return detail::entropy(clusterArray, size, lowerLabelRange, upperLabelRange, stream); } }; // end namespace stats diff --git a/cpp/include/raft/stats/homogeneity_score.hpp b/cpp/include/raft/stats/homogeneity_score.hpp index 365acb8c24..e94d519902 100644 --- a/cpp/include/raft/stats/homogeneity_score.hpp +++ b/cpp/include/raft/stats/homogeneity_score.hpp @@ -38,8 +38,10 @@ double homogeneity_score(const T* truthClusterArray, int size, T lowerLabelRange, T upperLabelRange, - cudaStream_t stream) { - return detail::homogeneity_score(truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); + cudaStream_t stream) +{ + return detail::homogeneity_score( + truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream); } }; // end namespace stats diff --git a/cpp/include/raft/stats/information_criterion.hpp b/cpp/include/raft/stats/information_criterion.hpp index 08a4014e11..02881d6677 100644 --- a/cpp/include/raft/stats/information_criterion.hpp +++ b/cpp/include/raft/stats/information_criterion.hpp @@ -47,15 +47,17 @@ namespace stats { * @param[in] n_samples Number of samples in each series * @param[in] stream CUDA stream */ -template -void information_criterion_batched(ScalarT *d_ic, - const ScalarT *d_loglikelihood, - IC_Type ic_type, - IdxT n_params, - IdxT batch_size, - IdxT n_samples, - cudaStream_t stream) { - batched::detail::information_criterion(d_ic, d_loglikelihood, ic_type, n_params, batch_size, n_params, stream); +template +void information_criterion_batched(ScalarT* d_ic, + const ScalarT* d_loglikelihood, + IC_Type ic_type, + IdxT n_params, + IdxT batch_size, + IdxT n_samples, + cudaStream_t stream) +{ + batched::detail::information_criterion( + d_ic, d_loglikelihood, ic_type, n_params, batch_size, n_params, stream); } } // namespace stats diff --git a/cpp/include/raft/stats/kl_divergence.hpp b/cpp/include/raft/stats/kl_divergence.hpp index b8865a2c16..377e96719d 100644 --- a/cpp/include/raft/stats/kl_divergence.hpp +++ b/cpp/include/raft/stats/kl_divergence.hpp @@ -34,7 +34,7 @@ namespace stats { template DataT kl_divergence(const DataT* modelPDF, const DataT* candidatePDF, int size, cudaStream_t stream) { - return detail::kl_divergence(modelPDF, candidatePDF, size, stream); + return detail::kl_divergence(modelPDF, candidatePDF, size, stream); } }; // end namespace stats diff --git a/cpp/include/raft/stats/meanvar.hpp b/cpp/include/raft/stats/meanvar.hpp index 3cf2d35bef..3a41ee8a00 100644 --- a/cpp/include/raft/stats/meanvar.hpp +++ b/cpp/include/raft/stats/meanvar.hpp @@ -48,7 +48,8 @@ void meanvar(Type* mean, IdxType N, bool sample, bool rowMajor, - cudaStream_t stream) { + cudaStream_t stream) +{ detail::meanvar(mean, var, data, D, N, sample, rowMajor, stream); } diff --git a/cpp/include/raft/stats/mutual_info_score.hpp b/cpp/include/raft/stats/mutual_info_score.hpp index 383868e578..b1044d0a3c 100644 --- a/cpp/include/raft/stats/mutual_info_score.hpp +++ b/cpp/include/raft/stats/mutual_info_score.hpp @@ -20,7 +20,6 @@ namespace raft { namespace stats { - /** * @brief Function to calculate the mutual information between two clusters * more info on mutual information @@ -37,9 +36,10 @@ double mutual_info_score(const T* firstClusterArray, int size, T lowerLabelRange, T upperLabelRange, - cudaStream_t stream) { - return detail::mutual_info_score(firstClusterArray, secondClusterArray, size, - lowerLabelRange, upperLabelRange, stream); + cudaStream_t stream) +{ + return detail::mutual_info_score( + firstClusterArray, secondClusterArray, size, lowerLabelRange, upperLabelRange, stream); } }; // end namespace stats diff --git a/cpp/include/raft/stats/r2_score.hpp b/cpp/include/raft/stats/r2_score.hpp index 08d97c591b..4858a2b2a8 100644 --- a/cpp/include/raft/stats/r2_score.hpp +++ b/cpp/include/raft/stats/r2_score.hpp @@ -37,8 +37,9 @@ namespace stats { * @return: The R-squared value. */ template -math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) { - return detail::r2_score(y, y_hat, n, stream); +math_t r2_score(math_t* y, math_t* y_hat, int n, cudaStream_t stream) +{ + return detail::r2_score(y, y_hat, n, stream); } } // namespace stats diff --git a/cpp/include/raft/stats/rand_index.hpp b/cpp/include/raft/stats/rand_index.hpp index b0e3786c49..602ff11f47 100644 --- a/cpp/include/raft/stats/rand_index.hpp +++ b/cpp/include/raft/stats/rand_index.hpp @@ -21,7 +21,6 @@ namespace raft { namespace stats { - /** * @brief Function to calculate RandIndex * more info on rand index @@ -31,12 +30,9 @@ namespace stats { * @param stream: the cudaStream object */ template -double rand_index(T* firstClusterArray, - T* secondClusterArray, - uint64_t size, - cudaStream_t stream) +double rand_index(T* firstClusterArray, T* secondClusterArray, uint64_t size, cudaStream_t stream) { - return detail::compute_rand_index(firstClusterArray, secondClusterArray, size, stream); + return detail::compute_rand_index(firstClusterArray, secondClusterArray, size, stream); } }; // end namespace stats diff --git a/cpp/include/raft/stats/regression_metrics.hpp b/cpp/include/raft/stats/regression_metrics.hpp index be7abdd786..4cfbb88231 100644 --- a/cpp/include/raft/stats/regression_metrics.hpp +++ b/cpp/include/raft/stats/regression_metrics.hpp @@ -1,18 +1,18 @@ /* -* Copyright (c) 2019-2022, NVIDIA CORPORATION. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once @@ -22,19 +22,19 @@ namespace raft { namespace stats { /** -* @brief Compute regression metrics mean absolute error, mean squared error, median absolute error -* @tparam T: data type for predictions (e.g., float or double for regression). -* @param[in] predictions: array of predictions (GPU pointer). -* @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). -* @param[in] n: number of elements in each of predictions, ref_predictions. Should be > 0. -* @param[in] stream: cuda stream. -* @param[out] mean_abs_error: Mean Absolute Error. Sum over n of (|predictions[i] - -* ref_predictions[i]|) / n. -* @param[out] mean_squared_error: Mean Squared Error. Sum over n of ((predictions[i] - -* ref_predictions[i])^2) / n. -* @param[out] median_abs_error: Median Absolute Error. Median of |predictions[i] - -* ref_predictions[i]| for i in [0, n). -*/ + * @brief Compute regression metrics mean absolute error, mean squared error, median absolute error + * @tparam T: data type for predictions (e.g., float or double for regression). + * @param[in] predictions: array of predictions (GPU pointer). + * @param[in] ref_predictions: array of reference (ground-truth) predictions (GPU pointer). + * @param[in] n: number of elements in each of predictions, ref_predictions. Should be > 0. + * @param[in] stream: cuda stream. + * @param[out] mean_abs_error: Mean Absolute Error. Sum over n of (|predictions[i] - + * ref_predictions[i]|) / n. + * @param[out] mean_squared_error: Mean Squared Error. Sum over n of ((predictions[i] - + * ref_predictions[i])^2) / n. + * @param[out] median_abs_error: Median Absolute Error. Median of |predictions[i] - + * ref_predictions[i]| for i in [0, n). + */ template void regression_metrics(const T* predictions, const T* ref_predictions, @@ -42,8 +42,10 @@ void regression_metrics(const T* predictions, cudaStream_t stream, double& mean_abs_error, double& mean_squared_error, - double& median_abs_error) { - detail::regression_metrics(predictions, ref_predictions, n, stream, mean_abs_error, mean_squared_error, median_abs_error); + double& median_abs_error) +{ + detail::regression_metrics( + predictions, ref_predictions, n, stream, mean_abs_error, mean_squared_error, median_abs_error); } } // namespace stats } // namespace raft diff --git a/cpp/include/raft/stats/silhouette_score.hpp b/cpp/include/raft/stats/silhouette_score.hpp index 94b840ee3d..35481ec6ad 100644 --- a/cpp/include/raft/stats/silhouette_score.hpp +++ b/cpp/include/raft/stats/silhouette_score.hpp @@ -40,31 +40,34 @@ namespace stats { */ template DataT silhouette_score( - const raft::handle_t& handle, - DataT* X_in, - int nRows, - int nCols, - LabelT* labels, - int nLabels, - DataT* silhouette_scorePerSample, - cudaStream_t stream, - raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) { - return detail::silhouette_score(handle, X_in, nRows, nCols, labels, nLabels, - silhouette_scorePerSample, stream, metric); + const raft::handle_t& handle, + DataT* X_in, + int nRows, + int nCols, + LabelT* labels, + int nLabels, + DataT* silhouette_scorePerSample, + cudaStream_t stream, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + return detail::silhouette_score( + handle, X_in, nRows, nCols, labels, nLabels, silhouette_scorePerSample, stream, metric); } template value_t silhouette_score_batched( - const raft::handle_t& handle, - value_t* X, - value_idx n_rows, - value_idx n_cols, - label_idx* y, - label_idx n_labels, - value_t* scores, - value_idx chunk, - raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) { - return batched::detail::silhouette_score(handle, X, n_rows, n_cols, y, n_labels, scores, chunk, metric); + const raft::handle_t& handle, + value_t* X, + value_idx n_rows, + value_idx n_cols, + label_idx* y, + label_idx n_labels, + value_t* scores, + value_idx chunk, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Unexpanded) +{ + return batched::detail::silhouette_score( + handle, X, n_rows, n_cols, y, n_labels, scores, chunk, metric); } }; // namespace stats diff --git a/cpp/include/raft/stats/trustworthiness_score.hpp b/cpp/include/raft/stats/trustworthiness_score.hpp index 4d7e3178f4..be72951cda 100644 --- a/cpp/include/raft/stats/trustworthiness_score.hpp +++ b/cpp/include/raft/stats/trustworthiness_score.hpp @@ -40,8 +40,10 @@ double trustworthiness_score(const raft::handle_t& h, int m, int d, int n_neighbors, - int batchSize = 512) { - return detail::trustworthiness_score(h, X, X_embedded, n, m, d, n_neighbors, batchSize); + int batchSize = 512) +{ + return detail::trustworthiness_score( + h, X, X_embedded, n, m, d, n_neighbors, batchSize); } } // namespace stats } // namespace raft diff --git a/cpp/include/raft/stats/v_measure.hpp b/cpp/include/raft/stats/v_measure.hpp index 44531221d2..c7c4c3942d 100644 --- a/cpp/include/raft/stats/v_measure.hpp +++ b/cpp/include/raft/stats/v_measure.hpp @@ -37,8 +37,10 @@ double v_measure(const T* truthClusterArray, T lowerLabelRange, T upperLabelRange, cudaStream_t stream, - double beta = 1.0) { - return detail::v_measure(truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream, beta); + double beta = 1.0) +{ + return detail::v_measure( + truthClusterArray, predClusterArray, size, lowerLabelRange, upperLabelRange, stream, beta); } }; // end namespace stats diff --git a/cpp/test/stats/adjusted_rand_index.cu b/cpp/test/stats/adjusted_rand_index.cu index 8ab3fbb1fb..33e05295e1 100644 --- a/cpp/test/stats/adjusted_rand_index.cu +++ b/cpp/test/stats/adjusted_rand_index.cu @@ -18,9 +18,9 @@ #include #include #include +#include #include #include -#include #include namespace raft { diff --git a/cpp/test/stats/completeness_score.cu b/cpp/test/stats/completeness_score.cu index 00cbe09627..b8ca65ed7b 100644 --- a/cpp/test/stats/completeness_score.cu +++ b/cpp/test/stats/completeness_score.cu @@ -17,10 +17,10 @@ #include #include #include +#include #include -#include #include -#include +#include #include namespace raft { @@ -76,11 +76,11 @@ class completenessTest : public ::testing::TestWithParam { double truthMI, truthEntropy; truthMI = raft::stats::mutual_info_score(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); truthEntropy = raft::stats::entropy( predClusterArray.data(), nElements, lowerLabelRange, upperLabelRange, stream); @@ -93,11 +93,11 @@ class completenessTest : public ::testing::TestWithParam { // calling the completeness CUDA implementation computedCompleteness = raft::stats::completeness_score(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); } // the destructor diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu index ff6998fe97..21ed9c5df0 100644 --- a/cpp/test/stats/contingencyMatrix.cu +++ b/cpp/test/stats/contingencyMatrix.cu @@ -18,10 +18,10 @@ #include #include #include -#include #include -#include #include +#include +#include #include namespace raft { @@ -88,8 +88,7 @@ class ContingencyMatrixTest : public ::testing::TestWithParam -#include #include #include #include +#include #include #include #include diff --git a/cpp/test/stats/entropy.cu b/cpp/test/stats/entropy.cu index 46237f0b21..7074b1a6aa 100644 --- a/cpp/test/stats/entropy.cu +++ b/cpp/test/stats/entropy.cu @@ -17,9 +17,9 @@ #include #include #include -#include #include #include +#include #include #include diff --git a/cpp/test/stats/homogeneity_score.cu b/cpp/test/stats/homogeneity_score.cu index 787b8c3a91..44434aef8d 100644 --- a/cpp/test/stats/homogeneity_score.cu +++ b/cpp/test/stats/homogeneity_score.cu @@ -17,9 +17,9 @@ #include #include #include +#include #include #include -#include #include namespace raft { @@ -75,11 +75,11 @@ class homogeneityTest : public ::testing::TestWithParam { double truthMI, truthEntropy; truthMI = raft::stats::mutual_info_score(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); truthEntropy = raft::stats::entropy( truthClusterArray.data(), nElements, lowerLabelRange, upperLabelRange, stream); @@ -92,11 +92,11 @@ class homogeneityTest : public ::testing::TestWithParam { // calling the homogeneity CUDA implementation computedHomogeneity = raft::stats::homogeneity_score(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } diff --git a/cpp/test/stats/kl_divergence.cu b/cpp/test/stats/kl_divergence.cu index c4348d150e..050f48f557 100644 --- a/cpp/test/stats/kl_divergence.cu +++ b/cpp/test/stats/kl_divergence.cu @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #include namespace raft { diff --git a/cpp/test/stats/mutual_info_score.cu b/cpp/test/stats/mutual_info_score.cu index 0228b72184..b7f6406009 100644 --- a/cpp/test/stats/mutual_info_score.cu +++ b/cpp/test/stats/mutual_info_score.cu @@ -17,9 +17,9 @@ #include #include #include +#include #include #include -#include #include namespace raft { @@ -118,11 +118,11 @@ class mutualInfoTest : public ::testing::TestWithParam { // calling the mutualInfo CUDA implementation computedmutualInfo = raft::stats::mutual_info_score(firstClusterArray.data(), - secondClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + secondClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); } // the destructor diff --git a/cpp/test/stats/rand_index.cu b/cpp/test/stats/rand_index.cu index 806987e216..1f4805a160 100644 --- a/cpp/test/stats/rand_index.cu +++ b/cpp/test/stats/rand_index.cu @@ -90,8 +90,8 @@ class randIndexTest : public ::testing::TestWithParam { raft::update_device(secondClusterArray.data(), &arr2[0], (int)size, stream); // calling the rand_index CUDA implementation - computedRandIndex = raft::stats::rand_index( - firstClusterArray.data(), secondClusterArray.data(), size, stream); + computedRandIndex = + raft::stats::rand_index(firstClusterArray.data(), secondClusterArray.data(), size, stream); } // the destructor diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu index b7cdf20484..43f1b0ca00 100644 --- a/cpp/test/stats/silhouette_score.cu +++ b/cpp/test/stats/silhouette_score.cu @@ -17,10 +17,10 @@ #include #include #include -#include #include #include #include +#include #include #include @@ -84,7 +84,6 @@ class silhouetteScoreTest : public ::testing::TestWithParam #include -#include #include #include #include +#include #include namespace raft { diff --git a/cpp/test/stats/v_measure.cu b/cpp/test/stats/v_measure.cu index bef3b1bc3b..2ff60c0a86 100644 --- a/cpp/test/stats/v_measure.cu +++ b/cpp/test/stats/v_measure.cu @@ -17,9 +17,9 @@ #include #include #include -#include -#include #include +#include +#include #include namespace raft { @@ -75,17 +75,17 @@ class vMeasureTest : public ::testing::TestWithParam { double truthHomogeity, truthCompleteness; truthHomogeity = raft::stats::homogeneity_score(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); truthCompleteness = raft::stats::homogeneity_score(predClusterArray.data(), - truthClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream); + truthClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream); if (truthCompleteness + truthHomogeity == 0.0) truthVMeasure = 0.0; @@ -94,12 +94,12 @@ class vMeasureTest : public ::testing::TestWithParam { (params.beta * truthHomogeity + truthCompleteness)); // calling the v_measure CUDA implementation computedVMeasure = raft::stats::v_measure(truthClusterArray.data(), - predClusterArray.data(), - nElements, - lowerLabelRange, - upperLabelRange, - stream, - params.beta); + predClusterArray.data(), + nElements, + lowerLabelRange, + upperLabelRange, + stream, + params.beta); } // the destructor From a665053f5281a55fa2520afacd1f90413a909053 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 16 Feb 2022 15:14:24 -0500 Subject: [PATCH 03/12] Adding more tests --- cpp/test/matrix/columnSort.cu | 165 ++++++++++++++++++++++++ cpp/test/stats/information_criterion.cu | 147 +++++++++++++++++++++ 2 files changed, 312 insertions(+) create mode 100644 cpp/test/matrix/columnSort.cu create mode 100644 cpp/test/stats/information_criterion.cu diff --git a/cpp/test/matrix/columnSort.cu b/cpp/test/matrix/columnSort.cu new file mode 100644 index 0000000000..22ce27bca9 --- /dev/null +++ b/cpp/test/matrix/columnSort.cu @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace matrix { + +template +std::vector* sort_indexes(const std::vector& v) +{ + // initialize original index locations + std::vector* idx = new std::vector(v.size()); + std::iota((*idx).begin(), (*idx).end(), 0); + + // sort indexes based on comparing values in v + std::sort((*idx).begin(), (*idx).end(), [&v](int i1, int i2) { return v[i1] < v[i2]; }); + return idx; +} + +template +struct columnSort { + T tolerance; + int n_row; + int n_col; + bool testKeys; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const columnSort& dims) +{ + return os; +} + +template +class ColumnSort : public ::testing::TestWithParam> { + protected: + ColumnSort() + : keyIn(0, stream), + keySorted(0, stream), + keySortGolden(0, stream), + valueOut(0, stream), + goldenValOut(0, stream), + workspacePtr(0, stream) + { + } + + void SetUp() override + { + params = ::testing::TestWithParam>::GetParam(); + int len = params.n_row * params.n_col; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + keyIn.resize(len, stream); + valueOut.resize(len, stream); + goldenValOut.resize(len, stream); + if (params.testKeys) { + keySorted.resize(len, stream); + keySortGolden.resize(len, stream); + } + + std::vector vals(len); + std::vector cValGolden(len); + std::iota(vals.begin(), vals.end(), + 1.0f); // will have to change input param type + std::random_shuffle(vals.begin(), vals.end()); + + std::vector cKeyGolden(len); + + for (int i = 0; i < params.n_row; i++) { + std::vector tmp(vals.begin() + i * params.n_col, vals.begin() + (i + 1) * params.n_col); + auto cpuOut = sort_indexes(tmp); + std::copy((*cpuOut).begin(), (*cpuOut).end(), cValGolden.begin() + i * params.n_col); + delete cpuOut; + + if (params.testKeys) { + std::sort(tmp.begin(), tmp.end()); + std::copy(tmp.begin(), tmp.end(), cKeyGolden.begin() + i * params.n_col); + } + } + + raft::update_device(keyIn.data(), &vals[0], len, stream); + raft::update_device(goldenValOut.data(), &cValGolden[0], len, stream); + + if (params.testKeys) raft::update_device(keySortGolden.data(), &cKeyGolden[0], len, stream); + + bool needWorkspace = false; + size_t workspaceSize = 0; + // Remove this branch once the implementation of descending sort is fixed. + sortColumnsPerRow(keyIn.data(), + valueOut.data(), + params.n_row, + params.n_col, + needWorkspace, + NULL, + workspaceSize, + stream, + keySorted.data()); + if (needWorkspace) { + workspacePtr.resize(workspaceSize, stream); + sortColumnsPerRow(keyIn.data(), + valueOut.data(), + params.n_row, + params.n_col, + needWorkspace, + workspacePtr.data(), + workspaceSize, + stream, + keySorted.data()); + } + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); + } + + protected: + cudaStream_t stream = 0; + columnSort params; + rmm::device_uvector keyIn, keySorted, keySortGolden; + rmm::device_uvector valueOut, goldenValOut; // valueOut are indexes + rmm::device_uvector workspacePtr; +}; + +const std::vector> inputsf1 = {{0.000001f, 503, 2000, false}, + {0.000001f, 113, 20000, true}, + {0.000001f, 503, 2000, false}, + {0.000001f, 113, 20000, true}}; + +typedef ColumnSort ColumnSortF; +TEST_P(ColumnSortF, Result) +{ + // Remove this condition once the implementation of of descending sort is + // fixed. + ASSERT_TRUE(devArrMatch(valueOut.data(), + goldenValOut.data(), + params.n_row * params.n_col, + raft::CompareApprox(params.tolerance))); + if (params.testKeys) { + ASSERT_TRUE(devArrMatch(keySorted.data(), + keySortGolden.data(), + params.n_row * params.n_col, + raft::CompareApprox(params.tolerance))); + } +} + +INSTANTIATE_TEST_CASE_P(ColumnSortTests, ColumnSortF, ::testing::ValuesIn(inputsf1)); + +} // end namespace matrix +} // end namespace raft diff --git a/cpp/test/stats/information_criterion.cu b/cpp/test/stats/information_criterion.cu new file mode 100644 index 0000000000..f740894bf5 --- /dev/null +++ b/cpp/test/stats/information_criterion.cu @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include + +#include + +#include +#include +#include + +namespace raft { +namespace stats { + +template +void naive_ic( + T* h_ic, const T* h_loglike, IC_Type ic_type, int n_params, int batch_size, int n_samples) +{ + T ic_base{}; + T N = static_cast(n_params); + T M = static_cast(n_samples); + switch (ic_type) { + case AIC: ic_base = (T)2 * N; break; + case AICc: ic_base = (T)2 * (N + (N * (N + (T)1)) / (M - N - (T)1)); break; + case BIC: ic_base = std::log(M) * N; break; + } +#pragma omp parallel for + for (int bid = 0; bid < batch_size; bid++) { + h_ic[bid] = ic_base - (T)2.0 * h_loglike[bid]; + } +} + +template +struct BatchedICInputs { + int batch_size; + int n_params; + int n_samples; + IC_Type ic_type; + T tolerance; +}; + +template +class BatchedICTest : public ::testing::TestWithParam> { + protected: + void SetUp() override + { + using std::vector; + params = ::testing::TestWithParam>::GetParam(); + + // Create stream and allocator + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + allocator = std::make_shared(); + + // Create arrays + std::vector loglike_h = std::vector(params.batch_size); + res_h.resize(params.batch_size); + T* loglike_d = (T*)allocator->allocate(sizeof(T) * params.batch_size, stream); + res_d = (T*)allocator->allocate(sizeof(T) * params.batch_size, stream); + + // Generate random data + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution udis(0.001, 1.0); // 0 has no log + for (int i = 0; i < params.batch_size; i++) + loglike_h[i] = std::log(udis(gen)); + + // Copy the data to the device + raft::update_device(loglike_d, loglike_h.data(), params.batch_size, stream); + + // Compute the tested results + information_criterion_batched(res_d, + loglike_d, + params.ic_type, + params.n_params, + params.batch_size, + params.n_samples, + stream); + + // Compute the expected results + naive_ic(res_h.data(), + loglike_h.data(), + params.ic_type, + params.n_params, + params.batch_size, + params.n_samples); + + allocator->deallocate(loglike_d, sizeof(T) * params.batch_size, stream); + } + + void TearDown() override + { + allocator->deallocate(res_d, sizeof(T) * params.batch_size, stream); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); + } + + protected: + std::shared_ptr allocator; + BatchedICInputs params; + T* res_d; + std::vector res_h; + cudaStream_t stream = 0; +}; + +// Test parameters (op, n_batches, m, n, p, q, tolerance) +const std::vector> inputsd = { + {1, 5, 52, AIC, 1e-3}, {10, 7, 100, AICc, 1e-3}, {67, 2, 350, BIC, 1e-3}}; + +// Test parameters (op, n_batches, m, n, p, q, tolerance) +const std::vector> inputsf = { + {1, 5, 52, AIC, 1e-3}, {10, 7, 100, AICc, 1e-3}, {67, 2, 350, BIC, 1e-3}}; + +using BatchedICTestD = BatchedICTest; +using BatchedICTestF = BatchedICTest; +TEST_P(BatchedICTestD, Result) +{ + ASSERT_TRUE(devArrMatchHost( + res_h.data(), res_d, params.batch_size, raft::CompareApprox(params.tolerance), stream)); +} +TEST_P(BatchedICTestF, Result) +{ + ASSERT_TRUE(devArrMatchHost( + res_h.data(), res_d, params.batch_size, raft::CompareApprox(params.tolerance), stream)); +} + +INSTANTIATE_TEST_CASE_P(BatchedICTests, BatchedICTestD, ::testing::ValuesIn(inputsd)); +INSTANTIATE_TEST_CASE_P(BatchedICTests, BatchedICTestF, ::testing::ValuesIn(inputsf)); + +} // namespace stats +} // namespace raft From e7d2d672e969eed7b8386f52832bd9968fd874d9 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 16 Feb 2022 15:15:12 -0500 Subject: [PATCH 04/12] Test update --- cpp/test/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 7dcacdf7e1..ecc981ef1a 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -71,6 +71,7 @@ add_executable(test_raft test/linalg/unary_op.cu test/matrix/math.cu test/matrix/matrix.cu + test/matrix/colWiseSort.cu test/matrix/linewise_op.cu test/mr/host/buffer.cpp test/mr/device/buffer.cpp @@ -119,6 +120,7 @@ add_executable(test_raft test/stats/entropy.cu test/stats/histogram.cu test/stats/homogeneity_score.cu + test/stats/information_criterion.cu test/stats/kl_divergence.cu test/stats/mean.cu test/stats/meanvar.cu From 8c3c284810f2231828d9fc2372f19a41f93435e6 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 16 Feb 2022 16:00:06 -0500 Subject: [PATCH 05/12] Fixing tests. --- cpp/include/raft/matrix/col_wise_sort.hpp | 2 +- cpp/include/raft/stats/contingency_matrix.hpp | 18 +++++++++--------- .../raft/stats/detail/contingencyMatrix.cuh | 1 + .../raft/stats/information_criterion.hpp | 2 +- cpp/test/CMakeLists.txt | 2 +- cpp/test/matrix/columnSort.cu | 5 +++-- cpp/test/stats/contingencyMatrix.cu | 2 ++ cpp/test/stats/information_criterion.cu | 2 ++ 8 files changed, 20 insertions(+), 14 deletions(-) diff --git a/cpp/include/raft/matrix/col_wise_sort.hpp b/cpp/include/raft/matrix/col_wise_sort.hpp index 5e519141c9..1d7fd23523 100644 --- a/cpp/include/raft/matrix/col_wise_sort.hpp +++ b/cpp/include/raft/matrix/col_wise_sort.hpp @@ -45,7 +45,7 @@ void sort_cols_per_row(const InType* in, cudaStream_t stream, InType* sortedKeys = nullptr) { - detail::sortColumnsPerRow( + detail::sortColumnsPerRow( in, out, n_rows, n_columns, bAllocWorkspace, workspacePtr, workspaceSize, stream, sortedKeys); } }; // end namespace matrix diff --git a/cpp/include/raft/stats/contingency_matrix.hpp b/cpp/include/raft/stats/contingency_matrix.hpp index 153cb088ad..7783bb9f42 100644 --- a/cpp/include/raft/stats/contingency_matrix.hpp +++ b/cpp/include/raft/stats/contingency_matrix.hpp @@ -86,15 +86,15 @@ void contingencyMatrix(const T* groundTruth, T minLabel = std::numeric_limits::max(), T maxLabel = std::numeric_limits::max()) { - detail::contingencyMatrix(groundTruth, - predictedLabel, - nSamples, - outMat, - stream, - workspace, - workspaceSize, - minLabel, - maxLabel); + detail::contingencyMatrix(groundTruth, + predictedLabel, + nSamples, + outMat, + stream, + workspace, + workspaceSize, + minLabel, + maxLabel); } }; // namespace stats diff --git a/cpp/include/raft/stats/detail/contingencyMatrix.cuh b/cpp/include/raft/stats/detail/contingencyMatrix.cuh index 61a1cd0851..6318e241bf 100644 --- a/cpp/include/raft/stats/detail/contingencyMatrix.cuh +++ b/cpp/include/raft/stats/detail/contingencyMatrix.cuh @@ -29,6 +29,7 @@ namespace raft { namespace stats { namespace detail { + typedef enum { IMPL_NONE, SMEM_ATOMICS, diff --git a/cpp/include/raft/stats/information_criterion.hpp b/cpp/include/raft/stats/information_criterion.hpp index 02881d6677..87c87cfee5 100644 --- a/cpp/include/raft/stats/information_criterion.hpp +++ b/cpp/include/raft/stats/information_criterion.hpp @@ -57,7 +57,7 @@ void information_criterion_batched(ScalarT* d_ic, cudaStream_t stream) { batched::detail::information_criterion( - d_ic, d_loglikelihood, ic_type, n_params, batch_size, n_params, stream); + d_ic, d_loglikelihood, ic_type, n_params, batch_size, n_samples, stream); } } // namespace stats diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index ecc981ef1a..82937c0ba3 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -71,7 +71,7 @@ add_executable(test_raft test/linalg/unary_op.cu test/matrix/math.cu test/matrix/matrix.cu - test/matrix/colWiseSort.cu + test/matrix/columnSort.cu test/matrix/linewise_op.cu test/mr/host/buffer.cpp test/mr/device/buffer.cpp diff --git a/cpp/test/matrix/columnSort.cu b/cpp/test/matrix/columnSort.cu index 22ce27bca9..d0b27bb4a4 100644 --- a/cpp/test/matrix/columnSort.cu +++ b/cpp/test/matrix/columnSort.cu @@ -105,7 +105,7 @@ class ColumnSort : public ::testing::TestWithParam> { bool needWorkspace = false; size_t workspaceSize = 0; // Remove this branch once the implementation of descending sort is fixed. - sortColumnsPerRow(keyIn.data(), + sort_cols_per_row(keyIn.data(), valueOut.data(), params.n_row, params.n_col, @@ -116,7 +116,7 @@ class ColumnSort : public ::testing::TestWithParam> { keySorted.data()); if (needWorkspace) { workspacePtr.resize(workspaceSize, stream); - sortColumnsPerRow(keyIn.data(), + sort_cols_per_row(keyIn.data(), valueOut.data(), params.n_row, params.n_col, @@ -126,6 +126,7 @@ class ColumnSort : public ::testing::TestWithParam> { stream, keySorted.data()); } + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } diff --git a/cpp/test/stats/contingencyMatrix.cu b/cpp/test/stats/contingencyMatrix.cu index 21ed9c5df0..fbae9f5224 100644 --- a/cpp/test/stats/contingencyMatrix.cu +++ b/cpp/test/stats/contingencyMatrix.cu @@ -132,6 +132,8 @@ class ContingencyMatrixTest : public ::testing::TestWithParam> { params.batch_size, params.n_samples); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + allocator->deallocate(loglike_d, sizeof(T) * params.batch_size, stream); } From 1b0abdd8edd29803f204200ac51f3f7ac7cf894f Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 16 Feb 2022 17:33:33 -0500 Subject: [PATCH 06/12] Removing cuml from includes --- cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh | 2 -- cpp/include/raft/stats/detail/batched/silhouette_score.cuh | 2 -- 2 files changed, 4 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh b/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh index 153b6b1d8a..4d9bfd82ad 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh @@ -45,8 +45,6 @@ #include -#include - #include #include diff --git a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh index bc717f3235..d709c7472a 100644 --- a/cpp/include/raft/stats/detail/batched/silhouette_score.cuh +++ b/cpp/include/raft/stats/detail/batched/silhouette_score.cuh @@ -17,8 +17,6 @@ #pragma once #include "../silhouette_score.cuh" -#include - #include #include #include From ebe971c5c2fbcd211009144e54f81b94031c27ca Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 18 Feb 2022 14:22:30 -0500 Subject: [PATCH 07/12] COmpiling if necessary --- cpp/test/stats/silhouette_score.cu | 6 +++++- cpp/test/stats/trustworthiness.cu | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cpp/test/stats/silhouette_score.cu b/cpp/test/stats/silhouette_score.cu index 43f1b0ca00..6efb3a4f78 100644 --- a/cpp/test/stats/silhouette_score.cu +++ b/cpp/test/stats/silhouette_score.cu @@ -19,7 +19,11 @@ #include #include #include -#include + +#if defined RAFT_DISTANCE_COMPILED && defined RAFT_NN_COMPILED +#include +#endif + #include #include #include diff --git a/cpp/test/stats/trustworthiness.cu b/cpp/test/stats/trustworthiness.cu index 096b93d943..ebbd52a332 100644 --- a/cpp/test/stats/trustworthiness.cu +++ b/cpp/test/stats/trustworthiness.cu @@ -19,7 +19,11 @@ #include #include #include + +#if defined RAFT_DISTANCE_COMPILED && defined RAFT_NN_COMPILED #include +#endif + #include #include From 768fe879aff021fbefc6089a11e1f32d9aabedbb Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 18 Feb 2022 14:25:37 -0500 Subject: [PATCH 08/12] raft-config is idempotent no matter RAFT_COMPILE_LIBRARIES value This allows us to correctly handle raft with and without compiled libraries. --- cpp/CMakeLists.txt | 12 +++++++++--- cpp/cmake/modules/config.cmake.in | 5 ++++- cpp/cmake/thirdparty/get_faiss.cmake | 4 ++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ea0ef2c2f1..7d0ca15985 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -267,7 +267,7 @@ install(TARGETS raft DESTINATION ${lib_dir} EXPORT raft-exports) install(TARGETS raft_distance - DESTINATION ${lib_dir} + DESTINATION $lib_dir} EXPORT raft-distance-exports) install(TARGETS raft_nn DESTINATION ${lib_dir} @@ -276,13 +276,13 @@ install(TARGETS raft_nn if(TARGET raft_distance_lib) install(TARGETS raft_distance_lib DESTINATION ${lib_dir} - EXPORT raft-distance-exports) + EXPORT raft-distance-lib-exports) endif() if(TARGET raft_nn_lib) install(TARGETS raft_nn_lib DESTINATION ${lib_dir} - EXPORT raft-nn-exports) + EXPORT raft-nn-lib-exports) endif() @@ -364,6 +364,12 @@ raft_export(BUILD raft include("${rapids-cmake-dir}/export/write_dependencies.cmake") set(raft_components distance nn) +if(TARGET raft_distance_lib) + list(APPEND raft_components distance-lib) +endif() +if(TARGET raft_nn_lib) + list(APPEND raft_components nn-lib) +endif() foreach(comp IN LISTS raft_components) install( EXPORT raft-${comp}-exports diff --git a/cpp/cmake/modules/config.cmake.in b/cpp/cmake/modules/config.cmake.in index bfafe3555a..4895a3baac 100644 --- a/cpp/cmake/modules/config.cmake.in +++ b/cpp/cmake/modules/config.cmake.in @@ -40,7 +40,8 @@ foreach(lang IN LISTS rapids_global_languages) endforeach() unset(rapids_global_languages) -set(rapids_allowed_components @RAPIDS_COMPONENTS@) +set(rapids_base_components @RAPIDS_COMPONENTS@) +set(rapids_allowed_components ${rapids_base_components}) if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/@project_name@-dependencies.cmake") include("${CMAKE_CURRENT_LIST_DIR}/@project_name@-dependencies.cmake") @@ -50,6 +51,7 @@ foreach(comp IN LISTS rapids_allowed_components) # this way if a dependency can't be found we fail if(${comp} IN_LIST @project_name@_FIND_COMPONENTS) include("${CMAKE_CURRENT_LIST_DIR}/@project_name@-${comp}-dependencies.cmake" OPTIONAL) + include("${CMAKE_CURRENT_LIST_DIR}/@project_name@-${comp}-lib-dependencies.cmake" OPTIONAL) endif() endforeach() @@ -58,6 +60,7 @@ include("${CMAKE_CURRENT_LIST_DIR}/@project_name@-targets.cmake" OPTIONAL) foreach(comp IN LISTS rapids_allowed_components) if(${comp} IN_LIST @project_name@_FIND_COMPONENTS) include("${CMAKE_CURRENT_LIST_DIR}/@project_name@-${comp}-targets.cmake" OPTIONAL) + include("${CMAKE_CURRENT_LIST_DIR}/@project_name@-${comp}-lib-targets.cmake" OPTIONAL) set(@project_name@_${comp}_FOUND TRUE) endif() endforeach() diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index 1079db3294..8c29d2b321 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -59,8 +59,8 @@ function(find_and_configure_faiss) # We generate the faiss-config files when we built faiss locally, so always do `find_dependency` rapids_export_package(BUILD OpenMP raft-nn-exports) # faiss uses openMP but doesn't export a need for it - rapids_export_package(BUILD faiss raft-nn-exports) - rapids_export_package(INSTALL faiss raft-nn-exports) + rapids_export_package(BUILD faiss raft-nn-exports GLOBAL_TARGETS faiss::faiss faiss) + rapids_export_package(INSTALL faiss raft-nn-exports GLOBAL_TARGETS faiss::faiss faiss) # Tell cmake where it can find the generated faiss-config.cmake we wrote. include("${rapids-cmake-dir}/export/find_package_root.cmake") From 4313dc6238004c6ee6241dd048b3e8cdff2db678 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 18 Feb 2022 14:58:48 -0500 Subject: [PATCH 09/12] Correct typo --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7d0ca15985..118a48525a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -267,7 +267,7 @@ install(TARGETS raft DESTINATION ${lib_dir} EXPORT raft-exports) install(TARGETS raft_distance - DESTINATION $lib_dir} + DESTINATION ${lib_dir} EXPORT raft-distance-exports) install(TARGETS raft_nn DESTINATION ${lib_dir} From 6275e6c2fb2233283acb9bb7fe76548fc1a05a26 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 18 Feb 2022 15:00:18 -0500 Subject: [PATCH 10/12] Merging --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7d0ca15985..118a48525a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -267,7 +267,7 @@ install(TARGETS raft DESTINATION ${lib_dir} EXPORT raft-exports) install(TARGETS raft_distance - DESTINATION $lib_dir} + DESTINATION ${lib_dir} EXPORT raft-distance-exports) install(TARGETS raft_nn DESTINATION ${lib_dir} From 5db9745a03986573968c4cb28838837a42a784bd Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 18 Feb 2022 15:40:18 -0500 Subject: [PATCH 11/12] fixing doc --- cpp/include/raft/stats/adjusted_rand_index.hpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cpp/include/raft/stats/adjusted_rand_index.hpp b/cpp/include/raft/stats/adjusted_rand_index.hpp index 7b5d862e2c..f245ae988a 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.hpp +++ b/cpp/include/raft/stats/adjusted_rand_index.hpp @@ -13,13 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/** - * @file adjusted_rand_index.cuh - * @brief The adjusted Rand index is the corrected-for-chance version of the Rand index. - * Such a correction for chance establishes a baseline by using the expected similarity - * of all pair-wise comparisons between clusterings specified by a random model. - */ - #pragma once #include From 222f53aa62f503ed7e967c627b456936ad06ea20 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 18 Feb 2022 16:01:49 -0500 Subject: [PATCH 12/12] fixing docs --- cpp/include/raft/matrix/col_wise_sort.hpp | 2 +- cpp/include/raft/stats/adjusted_rand_index.hpp | 7 +++++++ .../raft/stats/information_criterion.hpp | 3 +-- cpp/include/raft/stats/silhouette_score.hpp | 1 + .../raft/stats/trustworthiness_score.hpp | 18 +++++++++--------- 5 files changed, 19 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/matrix/col_wise_sort.hpp b/cpp/include/raft/matrix/col_wise_sort.hpp index 1d7fd23523..7ace5881bc 100644 --- a/cpp/include/raft/matrix/col_wise_sort.hpp +++ b/cpp/include/raft/matrix/col_wise_sort.hpp @@ -27,7 +27,7 @@ namespace matrix { * @param in: input matrix * @param out: output value(index) matrix * @param n_rows: number rows of input matrix - * @param n_cols: number columns of input matrix + * @param n_columns: number columns of input matrix * @param bAllocWorkspace: check returned value, if true allocate workspace passed in workspaceSize * @param workspacePtr: pointer to workspace memory * @param workspaceSize: Size of workspace to be allocated diff --git a/cpp/include/raft/stats/adjusted_rand_index.hpp b/cpp/include/raft/stats/adjusted_rand_index.hpp index f245ae988a..22d81e5296 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.hpp +++ b/cpp/include/raft/stats/adjusted_rand_index.hpp @@ -13,6 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @file adjusted_rand_index.hpp + * @brief The adjusted Rand index is the corrected-for-chance version of the Rand index. + * Such a correction for chance establishes a baseline by using the expected similarity + * of all pair-wise comparisons between clusterings specified by a random model. + */ + #pragma once #include diff --git a/cpp/include/raft/stats/information_criterion.hpp b/cpp/include/raft/stats/information_criterion.hpp index 87c87cfee5..c367471953 100644 --- a/cpp/include/raft/stats/information_criterion.hpp +++ b/cpp/include/raft/stats/information_criterion.hpp @@ -13,9 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - /** - * @file information_criterion.cuh + * @file information_criterion.hpp * @brief These information criteria are used to evaluate the quality of models * by balancing the quality of the fit and the number of parameters. * diff --git a/cpp/include/raft/stats/silhouette_score.hpp b/cpp/include/raft/stats/silhouette_score.hpp index 35481ec6ad..c0e4afb413 100644 --- a/cpp/include/raft/stats/silhouette_score.hpp +++ b/cpp/include/raft/stats/silhouette_score.hpp @@ -27,6 +27,7 @@ namespace stats { * clusterings * @tparam DataT: type of the data samples * @tparam LabelT: type of the labels + * @param handle: raft handle for managing expensive resources * @param X_in: pointer to the input Data samples array (nRows x nCols) * @param nRows: number of data samples * @param nCols: number of features diff --git a/cpp/include/raft/stats/trustworthiness_score.hpp b/cpp/include/raft/stats/trustworthiness_score.hpp index be72951cda..f3f1bacfd4 100644 --- a/cpp/include/raft/stats/trustworthiness_score.hpp +++ b/cpp/include/raft/stats/trustworthiness_score.hpp @@ -22,15 +22,15 @@ namespace stats { /** * @brief Compute the trustworthiness score - * @param h Raft handle - * @param X[in]: Data in original dimension - * @param X_embedded[in]: Data in target dimension (embedding) - * @param n: Number of samples - * @param m: Number of features in high/original dimension - * @param d: Number of features in low/embedded dimension - * @param n_neighbors Number of neighbors considered by trustworthiness score - * @param batchSize Batch size - * @return Trustworthiness score + * @param[in] h: raft handle + * @param[in] X: Data in original dimension + * @param[in] X_embedded: Data in target dimension (embedding) + * @param[in] n: Number of samples + * @param[in] m: Number of features in high/original dimension + * @param[in] d: Number of features in low/embedded dimension + * @param[in] n_neighbors Number of neighbors considered by trustworthiness score + * @param[in] batchSize Batch size + * @return[out] Trustworthiness score */ template double trustworthiness_score(const raft::handle_t& h,