-
Notifications
You must be signed in to change notification settings - Fork 197
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Hiding implementation details for lap, clustering, spectral, and label (
#477) Also managed to remove the raft host/device buffers in the process Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Divye Gala (https://github.com/divyegala) URL: #477
- Loading branch information
Showing
46 changed files
with
697 additions
and
784 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
/* | ||
* Copyright (c) 2020, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include <raft/cluster/detail/kmeans.cuh> | ||
|
||
namespace raft { | ||
namespace cluster { | ||
|
||
/** | ||
* @brief Find clusters with k-means algorithm. | ||
* Initial centroids are chosen with k-means++ algorithm. Empty | ||
* clusters are reinitialized by choosing new centroids with | ||
* k-means++ algorithm. | ||
* @tparam index_type_t the type of data used for indexing. | ||
* @tparam value_type_t the type of data used for weights, distances. | ||
* @param handle the raft handle. | ||
* @param n Number of observation vectors. | ||
* @param d Dimension of observation vectors. | ||
* @param k Number of clusters. | ||
* @param tol Tolerance for convergence. k-means stops when the | ||
* change in residual divided by n is less than tol. | ||
* @param maxiter Maximum number of k-means iterations. | ||
* @param obs (Input, device memory, d*n entries) Observation | ||
* matrix. Matrix is stored column-major and each column is an | ||
* observation vector. Matrix dimensions are d x n. | ||
* @param codes (Output, device memory, n entries) Cluster | ||
* assignments. | ||
* @param residual On exit, residual sum of squares (sum of squares | ||
* of distances between observation vectors and centroids). | ||
* @param iters on exit, number of k-means iterations. | ||
* @param seed random seed to be used. | ||
* @return error flag | ||
*/ | ||
template <typename index_type_t, typename value_type_t> | ||
int kmeans(handle_t const& handle, | ||
index_type_t n, | ||
index_type_t d, | ||
index_type_t k, | ||
value_type_t tol, | ||
index_type_t maxiter, | ||
const value_type_t* __restrict__ obs, | ||
index_type_t* __restrict__ codes, | ||
value_type_t& residual, | ||
index_type_t& iters, | ||
unsigned long long seed = 123456) | ||
{ | ||
return detail::kmeans<index_type_t, value_type_t>( | ||
handle, n, d, k, tol, maxiter, obs, codes, residual, iters, seed); | ||
} | ||
} // namespace cluster | ||
} // namespace raft |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
/* | ||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <raft/label/detail/classlabels.cuh> | ||
|
||
namespace raft { | ||
namespace label { | ||
|
||
/** | ||
* Get unique class labels. | ||
* | ||
* The y array is assumed to store class labels. The unique values are selected | ||
* from this array. | ||
* | ||
* @tparam value_t numeric type of the arrays with class labels | ||
* @param [inout] unique output unique labels | ||
* @param [in] y device array of labels, size [n] | ||
* @param [in] n number of labels | ||
* @param [in] stream cuda stream | ||
* @returns unique device array of unique labels, unallocated on entry, | ||
* on exit it has size | ||
*/ | ||
template <typename value_t> | ||
int getUniquelabels(rmm::device_uvector<value_t>& unique, value_t* y, size_t n, cudaStream_t stream) | ||
{ | ||
return detail::getUniquelabels<value_t>(unique, y, n, stream); | ||
} | ||
|
||
/** | ||
* Assign one versus rest labels. | ||
* | ||
* The output labels will have values +/-1: | ||
* y_out = (y == y_unique[idx]) ? +1 : -1; | ||
* | ||
* The output type currently is set to value_t, but for SVM in principle we are | ||
* free to choose other type for y_out (it should represent +/-1, and it is used | ||
* in floating point arithmetics). | ||
* | ||
* @param [in] y device array if input labels, size [n] | ||
* @param [in] n number of labels | ||
* @param [in] y_unique device array of unique labels, size [n_classes] | ||
* @param [in] n_classes number of unique labels | ||
* @param [out] y_out device array of output labels | ||
* @param [in] idx index of unique label that should be labeled as 1 | ||
* @param [in] stream cuda stream | ||
*/ | ||
template <typename value_t> | ||
void getOvrlabels( | ||
value_t* y, int n, value_t* y_unique, int n_classes, value_t* y_out, int idx, cudaStream_t stream) | ||
{ | ||
detail::getOvrlabels<value_t>(y, n, y_unique, n_classes, y_out, idx, stream); | ||
} | ||
/** | ||
* Maps an input array containing a series of numbers into a new array | ||
* where numbers have been mapped to a monotonically increasing set | ||
* of labels. This can be useful in machine learning algorithms, for instance, | ||
* where a given set of labels is not taken from a monotonically increasing | ||
* set. This can happen if they are filtered or if only a subset of the | ||
* total labels are used in a dataset. This is also useful in graph algorithms | ||
* where a set of vertices need to be labeled in a monotonically increasing | ||
* order. | ||
* @tparam Type the numeric type of the input and output arrays | ||
* @tparam Lambda the type of an optional filter function, which determines | ||
* which items in the array to map. | ||
* @param[out] out the output monotonic array | ||
* @param[in] in input label array | ||
* @param[in] N number of elements in the input array | ||
* @param[in] stream cuda stream to use | ||
* @param[in] filter_op an optional function for specifying which values | ||
* should have monotonically increasing labels applied to them. | ||
* @param[in] zero_based force monotonic set to start at 0? | ||
*/ | ||
template <typename Type, typename Lambda> | ||
void make_monotonic( | ||
Type* out, Type* in, size_t N, cudaStream_t stream, Lambda filter_op, bool zero_based = false) | ||
{ | ||
detail::make_monotonic<Type, Lambda>(out, in, N, stream, filter_op, zero_based); | ||
} | ||
|
||
/** | ||
* Maps an input array containing a series of numbers into a new array | ||
* where numbers have been mapped to a monotonically increasing set | ||
* of labels. This can be useful in machine learning algorithms, for instance, | ||
* where a given set of labels is not taken from a monotonically increasing | ||
* set. This can happen if they are filtered or if only a subset of the | ||
* total labels are used in a dataset. This is also useful in graph algorithms | ||
* where a set of vertices need to be labeled in a monotonically increasing | ||
* order. | ||
* @tparam Type the numeric type of the input and output arrays | ||
* @param[out] out output label array with labels assigned monotonically | ||
* @param[in] in input label array | ||
* @param[in] N number of elements in the input array | ||
* @param[in] stream cuda stream to use | ||
* @param[in] zero_based force monotonic label set to start at 0? | ||
*/ | ||
template <typename Type> | ||
void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zero_based = false) | ||
{ | ||
detail::make_monotonic<Type>(out, in, N, stream, zero_based); | ||
} | ||
}; // namespace label | ||
}; // end namespace raft |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* | ||
* Copyright (c) 2020-2021, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <raft/label/detail/merge_labels.cuh> | ||
|
||
namespace raft { | ||
namespace label { | ||
|
||
/** | ||
* @brief Merge two labellings in-place, according to a core mask | ||
* | ||
* A labelling is a representation of disjoint sets (groups) where points that | ||
* belong to the same group have the same label. It is assumed that group | ||
* labels take values between 1 and N. labels relate to points, i.e a label i+1 | ||
* means that you belong to the same group as the point i. | ||
* The special value MAX_LABEL is used to mark points that are not labelled. | ||
* | ||
* The two label arrays A and B induce two sets of groups over points 0..N-1. | ||
* If a point is labelled i in A and j in B and the mask is true for this | ||
* point, then i and j are equivalent labels and their groups are merged by | ||
* relabeling the elements of both groups to have the same label. The new label | ||
* is the smaller one from the original labels. | ||
* It is required that if the mask is true for a point, this point is labelled | ||
* (i.e its label is different than the special value MAX_LABEL). | ||
* | ||
* One use case is finding connected components: the two input label arrays can | ||
* represent the connected components of graphs G_A and G_B, and the output | ||
* would be the connected components labels of G_A \union G_B. | ||
* | ||
* @param[inout] labels_a First input, and output label array (in-place) | ||
* @param[in] labels_b Second input label array | ||
* @param[in] mask Core point mask | ||
* @param[out] R label equivalence map | ||
* @param[in] m Working flag | ||
* @param[in] N Number of points in the dataset | ||
* @param[in] stream CUDA stream | ||
*/ | ||
template <typename value_idx = int, int TPB_X = 256> | ||
void merge_labels(value_idx* labels_a, | ||
const value_idx* labels_b, | ||
const bool* mask, | ||
value_idx* R, | ||
bool* m, | ||
value_idx N, | ||
cudaStream_t stream) | ||
{ | ||
detail::merge_labels<value_idx, TPB_X>(labels_a, labels_b, mask, R, m, N, stream); | ||
} | ||
|
||
}; // namespace label | ||
}; // namespace raft |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.