Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hiding implementation details for lap, clustering, spectral, and label #477

Merged
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,12 @@
#include <raft/device_atomics.cuh>
#include <raft/handle.hpp>
#include <raft/linalg/cublas_wrappers.h>
#include <raft/spectral/matrix_wrappers.hpp>
#include <raft/spectral/warn_dbg.hpp>
#include <raft/spectral/detail/matrix_wrappers.hpp>
#include <raft/spectral/detail/warn_dbg.hpp>

namespace {

using namespace raft;
using namespace raft::linalg;
namespace raft {
namespace cluster {
namespace detail {
// =========================================================
// Useful grid settings
// =========================================================
Expand Down Expand Up @@ -657,20 +656,20 @@ static int updateCentroids(handle_t const& handle,
thrust::device_ptr<index_type_t> rows(work_int + d * n);

// Take transpose of observation matrix
RAFT_CUBLAS_TRY(cublasgeam(cublas_h,
CUBLAS_OP_T,
CUBLAS_OP_N,
n,
d,
&one,
obs,
d,
&zero,
(value_type_t*)NULL,
n,
thrust::raw_pointer_cast(obs_copy),
n,
stream));
RAFT_CUBLAS_TRY(linalg::cublasgeam(cublas_h,
CUBLAS_OP_T,
CUBLAS_OP_N,
n,
d,
&one,
obs,
d,
&zero,
(value_type_t*)NULL,
n,
thrust::raw_pointer_cast(obs_copy),
n,
stream));

// Cluster assigned to each observation matrix entry
thrust::sequence(thrust_exec_policy, rows, rows + d * n);
Expand Down Expand Up @@ -727,10 +726,6 @@ static int updateCentroids(handle_t const& handle,
return 0;
}

} // namespace

namespace raft {

// =========================================================
// k-means algorithm
// =========================================================
Expand Down Expand Up @@ -983,4 +978,6 @@ int kmeans(handle_t const& handle,
seed);
}

} // namespace detail
} // namespace cluster
} // namespace raft
65 changes: 65 additions & 0 deletions cpp/include/raft/cluster/kmeans.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <raft/cluster/detail/kmeans.hpp>

namespace raft {
namespace cluster {

/**
* @brief Find clusters with k-means algorithm.
* Initial centroids are chosen with k-means++ algorithm. Empty
* clusters are reinitialized by choosing new centroids with
* k-means++ algorithm.
* @tparam index_type_t the type of data used for indexing.
* @tparam value_type_t the type of data used for weights, distances.
* @param handle the raft handle.
* @param n Number of observation vectors.
* @param d Dimension of observation vectors.
* @param k Number of clusters.
* @param tol Tolerance for convergence. k-means stops when the
* change in residual divided by n is less than tol.
* @param maxiter Maximum number of k-means iterations.
* @param obs (Input, device memory, d*n entries) Observation
* matrix. Matrix is stored column-major and each column is an
* observation vector. Matrix dimensions are d x n.
* @param codes (Output, device memory, n entries) Cluster
* assignments.
* @param residual On exit, residual sum of squares (sum of squares
* of distances between observation vectors and centroids).
* @param iters on exit, number of k-means iterations.
* @param seed random seed to be used.
* @return error flag
*/
template <typename index_type_t, typename value_type_t>
int kmeans(handle_t const& handle,
index_type_t n,
index_type_t d,
index_type_t k,
value_type_t tol,
index_type_t maxiter,
const value_type_t* __restrict__ obs,
index_type_t* __restrict__ codes,
value_type_t& residual,
index_type_t& iters,
unsigned long long seed = 123456)
{
return detail::kmeans<index_type_t, value_type_t>(
handle, n, d, k, tol, maxiter, obs, codes, residual, iters, seed);
}
} // namespace cluster
} // namespace raft
1 change: 0 additions & 1 deletion cpp/include/raft/comms/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

#include <raft/comms/std_comms.hpp>
#include <raft/handle.hpp>
#include <raft/mr/device/buffer.hpp>

#include <iostream>
#include <nccl.h>
Expand Down
2 changes: 0 additions & 2 deletions cpp/include/raft/comms/std_comms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
#include <raft/comms/comms.hpp>
#include <raft/comms/detail/std_comms.hpp>

#include <raft/mr/device/buffer.hpp>

#include <iostream>
#include <nccl.h>
#include <ucp/api/ucp.h>
Expand Down
117 changes: 117 additions & 0 deletions cpp/include/raft/label/classlabels.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/label/detail/classlabels.cuh>

namespace raft {
namespace label {

/**
* Get unique class labels.
*
* The y array is assumed to store class labels. The unique values are selected
* from this array.
*
* \tparam value_t numeric type of the arrays with class labels
* \param [in] y device array of labels, size [n]
* \param [in] n number of labels
* \param [out] unique device array of unique labels, unallocated on entry,
* on exit it has size [n_unique]
* \param [out] n_unique number of unique labels
* \param [in] stream cuda stream
*/
template <typename value_t>
int getUniquelabels(rmm::device_uvector<value_t>& unique, value_t* y, size_t n, cudaStream_t stream)
{
return detail::getUniquelabels<value_t>(unique, y, n, stream);
}

/**
* Assign one versus rest labels.
*
* The output labels will have values +/-1:
* y_out = (y == y_unique[idx]) ? +1 : -1;
*
* The output type currently is set to value_t, but for SVM in principle we are
* free to choose other type for y_out (it should represent +/-1, and it is used
* in floating point arithmetics).
*
* \param [in] y device array if input labels, size [n]
* \param [in] n number of labels
* \param [in] y_unique device array of unique labels, size [n_classes]
* \param [in] n_classes number of unique labels
* \param [out] y_out device array of output labels
* \param [in] idx index of unique label that should be labeled as 1
* \param [in] stream cuda stream
*/
template <typename value_t>
void getOvrlabels(
value_t* y, int n, value_t* y_unique, int n_classes, value_t* y_out, int idx, cudaStream_t stream)
{
detail::getOvrlabels<value_t>(y, n, y_unique, n_classes, y_out, idx, stream);
}
/**
* Maps an input array containing a series of numbers into a new array
* where numbers have been mapped to a monotonically increasing set
* of labels. This can be useful in machine learning algorithms, for instance,
* where a given set of labels is not taken from a monotonically increasing
* set. This can happen if they are filtered or if only a subset of the
* total labels are used in a dataset. This is also useful in graph algorithms
* where a set of vertices need to be labeled in a monotonically increasing
* order.
* @tparam Type the numeric type of the input and output arrays
* @tparam Lambda the type of an optional filter function, which determines
* which items in the array to map.
* @param out the output monotonic array
* @param in input label array
* @param N number of elements in the input array
* @param stream cuda stream to use
* @param filter_op an optional function for specifying which values
* should have monotonically increasing labels applied to them.
*/
template <typename Type, typename Lambda>
void make_monotonic(
Type* out, Type* in, size_t N, cudaStream_t stream, Lambda filter_op, bool zero_based = false)
{
detail::make_monotonic<Type, Lambda>(out, in, N, stream, filter_op, zero_based);
}

/**
* Maps an input array containing a series of numbers into a new array
* where numbers have been mapped to a monotonically increasing set
* of labels. This can be useful in machine learning algorithms, for instance,
* where a given set of labels is not taken from a monotonically increasing
* set. This can happen if they are filtered or if only a subset of the
* total labels are used in a dataset. This is also useful in graph algorithms
* where a set of vertices need to be labeled in a monotonically increasing
* order.
* @tparam Type the numeric type of the input and output arrays
* @tparam Lambda the type of an optional filter function, which determines
* which items in the array to map.
* @param out output label array with labels assigned monotonically
* @param in input label array
* @param N number of elements in the input array
* @param stream cuda stream to use
*/
template <typename Type>
void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zero_based = false)
{
detail::make_monotonic<Type>(out, in, N, stream, zero_based);
}
}; // namespace label
}; // end namespace raft
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

namespace raft {
namespace label {
namespace detail {

/**
* Get unique class labels.
Expand Down Expand Up @@ -194,5 +195,7 @@ void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zer
make_monotonic<Type>(
out, in, N, stream, [] __device__(Type val) { return false; }, zero_based);
}

}; // namespace detail
}; // namespace label
}; // end namespace raft
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

namespace raft {
namespace label {
namespace detail {

/** Note: this is one possible implementation where we represent the label
* equivalence graph implicitly using labels_a, labels_b and mask.
Expand Down Expand Up @@ -153,5 +154,6 @@ void merge_labels(value_idx* labels_a,
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

} // namespace detail
}; // namespace label
}; // namespace raft
66 changes: 66 additions & 0 deletions cpp/include/raft/label/merge_labels.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/label/detail/merge_labels.cuh>

namespace raft {
namespace label {

/**
* @brief Merge two labellings in-place, according to a core mask
*
* A labelling is a representation of disjoint sets (groups) where points that
* belong to the same group have the same label. It is assumed that group
* labels take values between 1 and N. labels relate to points, i.e a label i+1
* means that you belong to the same group as the point i.
* The special value MAX_LABEL is used to mark points that are not labelled.
*
* The two label arrays A and B induce two sets of groups over points 0..N-1.
* If a point is labelled i in A and j in B and the mask is true for this
* point, then i and j are equivalent labels and their groups are merged by
* relabeling the elements of both groups to have the same label. The new label
* is the smaller one from the original labels.
* It is required that if the mask is true for a point, this point is labelled
* (i.e its label is different than the special value MAX_LABEL).
*
* One use case is finding connected components: the two input label arrays can
* represent the connected components of graphs G_A and G_B, and the output
* would be the connected components labels of G_A \union G_B.
*
* @param[inout] labels_a First input, and output label array (in-place)
* @param[in] labels_b Second input label array
* @param[in] mask Core point mask
* @param[out] R label equivalence map
* @param[in] m Working flag
* @param[in] N Number of points in the dataset
* @param[in] stream CUDA stream
*/
template <typename value_idx = int, int TPB_X = 256>
void merge_labels(value_idx* labels_a,
const value_idx* labels_b,
const bool* mask,
value_idx* R,
bool* m,
value_idx N,
cudaStream_t stream)
{
detail::merge_labels<value_idx, TPB_X>(labels_a, labels_b, mask, R, m, N, stream);
}

}; // namespace label
}; // namespace raft
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

#include <raft/cudart_utils.h>
#include <raft/handle.hpp>
#include <raft/lap/lap_kernels.cuh>
#include <raft/lap/detail/lap_kernels.cuh>
#include <rmm/device_scalar.hpp>
#include <rmm/device_uvector.hpp>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

#include <raft/cudart_utils.h>
#include <raft/handle.hpp>
#include <raft/mr/device/buffer.hpp>

#include <thrust/for_each.h>

Expand Down
Loading