Skip to content

Commit

Permalink
Hiding implementation details for lap, clustering, spectral, and label (
Browse files Browse the repository at this point in the history
#477)

Also managed to remove the raft host/device buffers in the process

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Divye Gala (https://github.com/divyegala)

URL: #477
  • Loading branch information
cjnolet authored Feb 9, 2022
1 parent 29718bd commit 2ebf89c
Show file tree
Hide file tree
Showing 46 changed files with 697 additions and 784 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,12 @@
#include <raft/device_atomics.cuh>
#include <raft/handle.hpp>
#include <raft/linalg/detail/cublas_wrappers.hpp>
#include <raft/spectral/matrix_wrappers.hpp>
#include <raft/spectral/warn_dbg.hpp>
#include <raft/spectral/detail/matrix_wrappers.cuh>
#include <raft/spectral/detail/warn_dbg.hpp>

namespace {

using namespace raft;
using namespace raft::linalg;
namespace raft {
namespace cluster {
namespace detail {
// =========================================================
// Useful grid settings
// =========================================================
Expand Down Expand Up @@ -728,10 +727,6 @@ static int updateCentroids(handle_t const& handle,
return 0;
}

} // namespace

namespace raft {

// =========================================================
// k-means algorithm
// =========================================================
Expand Down Expand Up @@ -986,4 +981,6 @@ int kmeans(handle_t const& handle,
seed);
}

} // namespace detail
} // namespace cluster
} // namespace raft
65 changes: 65 additions & 0 deletions cpp/include/raft/cluster/kmeans.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <raft/cluster/detail/kmeans.cuh>

namespace raft {
namespace cluster {

/**
* @brief Find clusters with k-means algorithm.
* Initial centroids are chosen with k-means++ algorithm. Empty
* clusters are reinitialized by choosing new centroids with
* k-means++ algorithm.
* @tparam index_type_t the type of data used for indexing.
* @tparam value_type_t the type of data used for weights, distances.
* @param handle the raft handle.
* @param n Number of observation vectors.
* @param d Dimension of observation vectors.
* @param k Number of clusters.
* @param tol Tolerance for convergence. k-means stops when the
* change in residual divided by n is less than tol.
* @param maxiter Maximum number of k-means iterations.
* @param obs (Input, device memory, d*n entries) Observation
* matrix. Matrix is stored column-major and each column is an
* observation vector. Matrix dimensions are d x n.
* @param codes (Output, device memory, n entries) Cluster
* assignments.
* @param residual On exit, residual sum of squares (sum of squares
* of distances between observation vectors and centroids).
* @param iters on exit, number of k-means iterations.
* @param seed random seed to be used.
* @return error flag
*/
template <typename index_type_t, typename value_type_t>
int kmeans(handle_t const& handle,
index_type_t n,
index_type_t d,
index_type_t k,
value_type_t tol,
index_type_t maxiter,
const value_type_t* __restrict__ obs,
index_type_t* __restrict__ codes,
value_type_t& residual,
index_type_t& iters,
unsigned long long seed = 123456)
{
return detail::kmeans<index_type_t, value_type_t>(
handle, n, d, k, tol, maxiter, obs, codes, residual, iters, seed);
}
} // namespace cluster
} // namespace raft
1 change: 0 additions & 1 deletion cpp/include/raft/comms/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

#include <raft/comms/std_comms.hpp>
#include <raft/handle.hpp>
#include <raft/mr/device/buffer.hpp>

#include <iostream>
#include <nccl.h>
Expand Down
2 changes: 0 additions & 2 deletions cpp/include/raft/comms/std_comms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
#include <raft/comms/comms.hpp>
#include <raft/comms/detail/std_comms.hpp>

#include <raft/mr/device/buffer.hpp>

#include <iostream>
#include <nccl.h>
#include <ucp/api/ucp.h>
Expand Down
117 changes: 117 additions & 0 deletions cpp/include/raft/label/classlabels.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/label/detail/classlabels.cuh>

namespace raft {
namespace label {

/**
* Get unique class labels.
*
* The y array is assumed to store class labels. The unique values are selected
* from this array.
*
* @tparam value_t numeric type of the arrays with class labels
* @param [inout] unique output unique labels
* @param [in] y device array of labels, size [n]
* @param [in] n number of labels
* @param [in] stream cuda stream
* @returns unique device array of unique labels, unallocated on entry,
* on exit it has size
*/
template <typename value_t>
int getUniquelabels(rmm::device_uvector<value_t>& unique, value_t* y, size_t n, cudaStream_t stream)
{
return detail::getUniquelabels<value_t>(unique, y, n, stream);
}

/**
* Assign one versus rest labels.
*
* The output labels will have values +/-1:
* y_out = (y == y_unique[idx]) ? +1 : -1;
*
* The output type currently is set to value_t, but for SVM in principle we are
* free to choose other type for y_out (it should represent +/-1, and it is used
* in floating point arithmetics).
*
* @param [in] y device array if input labels, size [n]
* @param [in] n number of labels
* @param [in] y_unique device array of unique labels, size [n_classes]
* @param [in] n_classes number of unique labels
* @param [out] y_out device array of output labels
* @param [in] idx index of unique label that should be labeled as 1
* @param [in] stream cuda stream
*/
template <typename value_t>
void getOvrlabels(
value_t* y, int n, value_t* y_unique, int n_classes, value_t* y_out, int idx, cudaStream_t stream)
{
detail::getOvrlabels<value_t>(y, n, y_unique, n_classes, y_out, idx, stream);
}
/**
* Maps an input array containing a series of numbers into a new array
* where numbers have been mapped to a monotonically increasing set
* of labels. This can be useful in machine learning algorithms, for instance,
* where a given set of labels is not taken from a monotonically increasing
* set. This can happen if they are filtered or if only a subset of the
* total labels are used in a dataset. This is also useful in graph algorithms
* where a set of vertices need to be labeled in a monotonically increasing
* order.
* @tparam Type the numeric type of the input and output arrays
* @tparam Lambda the type of an optional filter function, which determines
* which items in the array to map.
* @param[out] out the output monotonic array
* @param[in] in input label array
* @param[in] N number of elements in the input array
* @param[in] stream cuda stream to use
* @param[in] filter_op an optional function for specifying which values
* should have monotonically increasing labels applied to them.
* @param[in] zero_based force monotonic set to start at 0?
*/
template <typename Type, typename Lambda>
void make_monotonic(
Type* out, Type* in, size_t N, cudaStream_t stream, Lambda filter_op, bool zero_based = false)
{
detail::make_monotonic<Type, Lambda>(out, in, N, stream, filter_op, zero_based);
}

/**
* Maps an input array containing a series of numbers into a new array
* where numbers have been mapped to a monotonically increasing set
* of labels. This can be useful in machine learning algorithms, for instance,
* where a given set of labels is not taken from a monotonically increasing
* set. This can happen if they are filtered or if only a subset of the
* total labels are used in a dataset. This is also useful in graph algorithms
* where a set of vertices need to be labeled in a monotonically increasing
* order.
* @tparam Type the numeric type of the input and output arrays
* @param[out] out output label array with labels assigned monotonically
* @param[in] in input label array
* @param[in] N number of elements in the input array
* @param[in] stream cuda stream to use
* @param[in] zero_based force monotonic label set to start at 0?
*/
template <typename Type>
void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zero_based = false)
{
detail::make_monotonic<Type>(out, in, N, stream, zero_based);
}
}; // namespace label
}; // end namespace raft
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

namespace raft {
namespace label {
namespace detail {

/**
* Get unique class labels.
Expand Down Expand Up @@ -194,5 +195,7 @@ void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zer
make_monotonic<Type>(
out, in, N, stream, [] __device__(Type val) { return false; }, zero_based);
}

}; // namespace detail
}; // namespace label
}; // end namespace raft
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

namespace raft {
namespace label {
namespace detail {

/** Note: this is one possible implementation where we represent the label
* equivalence graph implicitly using labels_a, labels_b and mask.
Expand Down Expand Up @@ -153,5 +154,6 @@ void merge_labels(value_idx* labels_a,
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

} // namespace detail
}; // namespace label
}; // namespace raft
66 changes: 66 additions & 0 deletions cpp/include/raft/label/merge_labels.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/label/detail/merge_labels.cuh>

namespace raft {
namespace label {

/**
* @brief Merge two labellings in-place, according to a core mask
*
* A labelling is a representation of disjoint sets (groups) where points that
* belong to the same group have the same label. It is assumed that group
* labels take values between 1 and N. labels relate to points, i.e a label i+1
* means that you belong to the same group as the point i.
* The special value MAX_LABEL is used to mark points that are not labelled.
*
* The two label arrays A and B induce two sets of groups over points 0..N-1.
* If a point is labelled i in A and j in B and the mask is true for this
* point, then i and j are equivalent labels and their groups are merged by
* relabeling the elements of both groups to have the same label. The new label
* is the smaller one from the original labels.
* It is required that if the mask is true for a point, this point is labelled
* (i.e its label is different than the special value MAX_LABEL).
*
* One use case is finding connected components: the two input label arrays can
* represent the connected components of graphs G_A and G_B, and the output
* would be the connected components labels of G_A \union G_B.
*
* @param[inout] labels_a First input, and output label array (in-place)
* @param[in] labels_b Second input label array
* @param[in] mask Core point mask
* @param[out] R label equivalence map
* @param[in] m Working flag
* @param[in] N Number of points in the dataset
* @param[in] stream CUDA stream
*/
template <typename value_idx = int, int TPB_X = 256>
void merge_labels(value_idx* labels_a,
const value_idx* labels_b,
const bool* mask,
value_idx* R,
bool* m,
value_idx N,
cudaStream_t stream)
{
detail::merge_labels<value_idx, TPB_X>(labels_a, labels_b, mask, R, m, N, stream);
}

}; // namespace label
}; // namespace raft
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

#include <raft/cudart_utils.h>
#include <raft/handle.hpp>
#include <raft/lap/lap_kernels.cuh>
#include <raft/lap/detail/lap_kernels.cuh>
#include <rmm/device_scalar.hpp>
#include <rmm/device_uvector.hpp>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

#include <raft/cudart_utils.h>
#include <raft/handle.hpp>
#include <raft/mr/device/buffer.hpp>

#include <thrust/for_each.h>

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/raft/lap/lap.cuh → cpp/include/raft/lap/lap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
#include <raft/handle.hpp>
#include <rmm/device_uvector.hpp>

#include "d_structs.h"
#include "lap_functions.cuh"
#include "detail/d_structs.h"
#include "detail/lap_functions.cuh"

namespace raft {
namespace lap {
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/raft/linalg/detail/lanczos.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
#include "cublas_wrappers.hpp"
#include <raft/cudart_utils.h>
#include <raft/handle.hpp>
#include <raft/spectral/lapack.hpp>
#include <raft/spectral/matrix_wrappers.hpp>
#include <raft/spectral/warn_dbg.hpp>
#include <raft/spectral/detail/lapack.hpp>
#include <raft/spectral/detail/matrix_wrappers.cuh>
#include <raft/spectral/detail/warn_dbg.hpp>

namespace raft {

Expand Down
Loading

0 comments on commit 2ebf89c

Please sign in to comment.