rapidsai · rapids-bot · Feb 9, 2022 · Feb 1, 2022 · Feb 1, 2022 · Feb 1, 2022
diff --git a/cpp/include/raft/spectral/kmeans.hpp → cpp/include/raft/cluster/detail/kmeans.hpp b/cpp/include/raft/spectral/kmeans.hpp → cpp/include/raft/cluster/detail/kmeans.hpp
@@ -32,13 +32,12 @@
 #include <raft/device_atomics.cuh>
 #include <raft/handle.hpp>
 #include <raft/linalg/cublas_wrappers.h>
-#include <raft/spectral/matrix_wrappers.hpp>
-#include <raft/spectral/warn_dbg.hpp>
+#include <raft/spectral/detail/matrix_wrappers.hpp>
+#include <raft/spectral/detail/warn_dbg.hpp>
 
-namespace {
-
-using namespace raft;
-using namespace raft::linalg;
+namespace raft {
+namespace cluster {
+namespace detail {
 // =========================================================
 // Useful grid settings
 // =========================================================
@@ -657,20 +656,20 @@ static int updateCentroids(handle_t const& handle,
   thrust::device_ptr<index_type_t> rows(work_int + d * n);
 
   // Take transpose of observation matrix
-  RAFT_CUBLAS_TRY(cublasgeam(cublas_h,
-                             CUBLAS_OP_T,
-                             CUBLAS_OP_N,
-                             n,
-                             d,
-                             &one,
-                             obs,
-                             d,
-                             &zero,
-                             (value_type_t*)NULL,
-                             n,
-                             thrust::raw_pointer_cast(obs_copy),
-                             n,
-                             stream));
+  RAFT_CUBLAS_TRY(linalg::cublasgeam(cublas_h,
+                                     CUBLAS_OP_T,
+                                     CUBLAS_OP_N,
+                                     n,
+                                     d,
+                                     &one,
+                                     obs,
+                                     d,
+                                     &zero,
+                                     (value_type_t*)NULL,
+                                     n,
+                                     thrust::raw_pointer_cast(obs_copy),
+                                     n,
+                                     stream));
 
   // Cluster assigned to each observation matrix entry
   thrust::sequence(thrust_exec_policy, rows, rows + d * n);
@@ -727,10 +726,6 @@ static int updateCentroids(handle_t const& handle,
   return 0;
 }
 
-}  // namespace
-
-namespace raft {
-
 // =========================================================
 // k-means algorithm
 // =========================================================
@@ -983,4 +978,6 @@ int kmeans(handle_t const& handle,
                                             seed);
 }
 
+}  // namespace detail
+}  // namespace cluster
 }  // namespace raft
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/cluster/detail/kmeans.hpp>
+
+namespace raft {
+namespace cluster {
+
+/**
+ *  @brief Find clusters with k-means algorithm.
+ *    Initial centroids are chosen with k-means++ algorithm. Empty
+ *    clusters are reinitialized by choosing new centroids with
+ *    k-means++ algorithm.
+ *  @tparam index_type_t the type of data used for indexing.
+ *  @tparam value_type_t the type of data used for weights, distances.
+ *  @param handle the raft handle.
+ *  @param n Number of observation vectors.
+ *  @param d Dimension of observation vectors.
+ *  @param k Number of clusters.
+ *  @param tol Tolerance for convergence. k-means stops when the
+ *    change in residual divided by n is less than tol.
+ *  @param maxiter Maximum number of k-means iterations.
+ *  @param obs (Input, device memory, d*n entries) Observation
+ *    matrix. Matrix is stored column-major and each column is an
+ *    observation vector. Matrix dimensions are d x n.
+ *  @param codes (Output, device memory, n entries) Cluster
+ *    assignments.
+ *  @param residual On exit, residual sum of squares (sum of squares
+ *    of distances between observation vectors and centroids).
+ *  @param iters on exit, number of k-means iterations.
+ *  @param seed random seed to be used.
+ *  @return error flag
+ */
+template <typename index_type_t, typename value_type_t>
+int kmeans(handle_t const& handle,
+           index_type_t n,
+           index_type_t d,
+           index_type_t k,
+           value_type_t tol,
+           index_type_t maxiter,
+           const value_type_t* __restrict__ obs,
+           index_type_t* __restrict__ codes,
+           value_type_t& residual,
+           index_type_t& iters,
+           unsigned long long seed = 123456)
+{
+  return detail::kmeans<index_type_t, value_type_t>(
+    handle, n, d, k, tol, maxiter, obs, codes, residual, iters, seed);
+}
+}  // namespace cluster
+}  // namespace raft
@@ -18,7 +18,6 @@
 
 #include <raft/comms/std_comms.hpp>
 #include <raft/handle.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <iostream>
 #include <nccl.h>

@@ -21,8 +21,6 @@
 #include <raft/comms/comms.hpp>
 #include <raft/comms/detail/std_comms.hpp>
 
-#include <raft/mr/device/buffer.hpp>
-
 #include <iostream>
 #include <nccl.h>
 #include <ucp/api/ucp.h>

@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/label/detail/classlabels.cuh>
+
+namespace raft {
+namespace label {
+
+/**
+ * Get unique class labels.
+ *
+ * The y array is assumed to store class labels. The unique values are selected
+ * from this array.
+ *
+ * \tparam value_t numeric type of the arrays with class labels
+ * \param [in] y device array of labels, size [n]
+ * \param [in] n number of labels
+ * \param [out] unique device array of unique labels, unallocated on entry,
+ *   on exit it has size [n_unique]
+ * \param [out] n_unique number of unique labels
+ * \param [in] stream cuda stream
+ */
+template <typename value_t>
+int getUniquelabels(rmm::device_uvector<value_t>& unique, value_t* y, size_t n, cudaStream_t stream)
+{
+  return detail::getUniquelabels<value_t>(unique, y, n, stream);
+}
+
+/**
+ * Assign one versus rest labels.
+ *
+ * The output labels will have values +/-1:
+ * y_out = (y == y_unique[idx]) ? +1 : -1;
+ *
+ * The output type currently is set to value_t, but for SVM in principle we are
+ * free to choose other type for y_out (it should represent +/-1, and it is used
+ * in floating point arithmetics).
+ *
+ * \param [in] y device array if input labels, size [n]
+ * \param [in] n number of labels
+ * \param [in] y_unique device array of unique labels, size [n_classes]
+ * \param [in] n_classes number of unique labels
+ * \param [out] y_out device array of output labels
+ * \param [in] idx index of unique label that should be labeled as 1
+ * \param [in] stream cuda stream
+ */
+template <typename value_t>
+void getOvrlabels(
+  value_t* y, int n, value_t* y_unique, int n_classes, value_t* y_out, int idx, cudaStream_t stream)
+{
+  detail::getOvrlabels<value_t>(y, n, y_unique, n_classes, y_out, idx, stream);
+}
+/**
+ * Maps an input array containing a series of numbers into a new array
+ * where numbers have been mapped to a monotonically increasing set
+ * of labels. This can be useful in machine learning algorithms, for instance,
+ * where a given set of labels is not taken from a monotonically increasing
+ * set. This can happen if they are filtered or if only a subset of the
+ * total labels are used in a dataset. This is also useful in graph algorithms
+ * where a set of vertices need to be labeled in a monotonically increasing
+ * order.
+ * @tparam Type the numeric type of the input and output arrays
+ * @tparam Lambda the type of an optional filter function, which determines
+ * which items in the array to map.
+ * @param out the output monotonic array
+ * @param in input label array
+ * @param N number of elements in the input array
+ * @param stream cuda stream to use
+ * @param filter_op an optional function for specifying which values
+ * should have monotonically increasing labels applied to them.
+ */
+template <typename Type, typename Lambda>
+void make_monotonic(
+  Type* out, Type* in, size_t N, cudaStream_t stream, Lambda filter_op, bool zero_based = false)
+{
+  detail::make_monotonic<Type, Lambda>(out, in, N, stream, filter_op, zero_based);
+}
+
+/**
+ * Maps an input array containing a series of numbers into a new array
+ * where numbers have been mapped to a monotonically increasing set
+ * of labels. This can be useful in machine learning algorithms, for instance,
+ * where a given set of labels is not taken from a monotonically increasing
+ * set. This can happen if they are filtered or if only a subset of the
+ * total labels are used in a dataset. This is also useful in graph algorithms
+ * where a set of vertices need to be labeled in a monotonically increasing
+ * order.
+ * @tparam Type the numeric type of the input and output arrays
+ * @tparam Lambda the type of an optional filter function, which determines
+ * which items in the array to map.
+ * @param out output label array with labels assigned monotonically
+ * @param in input label array
+ * @param N number of elements in the input array
+ * @param stream cuda stream to use
+ */
+template <typename Type>
+void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zero_based = false)
+{
+  detail::make_monotonic<Type>(out, in, N, stream, zero_based);
+}
+};  // namespace label
+};  // end namespace raft
@@ -26,6 +26,7 @@
 
 namespace raft {
 namespace label {
+namespace detail {
 
 /**
  * Get unique class labels.
@@ -194,5 +195,7 @@ void make_monotonic(Type* out, Type* in, size_t N, cudaStream_t stream, bool zer
   make_monotonic<Type>(
     out, in, N, stream, [] __device__(Type val) { return false; }, zero_based);
 }
+
+};  // namespace detail
 };  // namespace label
 };  // end namespace raft
@@ -25,6 +25,7 @@
 
 namespace raft {
 namespace label {
+namespace detail {
 
 /** Note: this is one possible implementation where we represent the label
  *  equivalence graph implicitly using labels_a, labels_b and mask.
@@ -153,5 +154,6 @@ void merge_labels(value_idx* labels_a,
   RAFT_CUDA_TRY(cudaPeekAtLastError());
 }
 
+}  // namespace detail
 };  // namespace label
 };  // namespace raft
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/label/detail/merge_labels.cuh>
+
+namespace raft {
+namespace label {
+
+/**
+ * @brief Merge two labellings in-place, according to a core mask
+ *
+ * A labelling is a representation of disjoint sets (groups) where points that
+ * belong to the same group have the same label. It is assumed that group
+ * labels take values between 1 and N. labels relate to points, i.e a label i+1
+ * means that you belong to the same group as the point i.
+ * The special value MAX_LABEL is used to mark points that are not labelled.
+ *
+ * The two label arrays A and B induce two sets of groups over points 0..N-1.
+ * If a point is labelled i in A and j in B and the mask is true for this
+ * point, then i and j are equivalent labels and their groups are merged by
+ * relabeling the elements of both groups to have the same label. The new label
+ * is the smaller one from the original labels.
+ * It is required that if the mask is true for a point, this point is labelled
+ * (i.e its label is different than the special value MAX_LABEL).
+ *
+ * One use case is finding connected components: the two input label arrays can
+ * represent the connected components of graphs G_A and G_B, and the output
+ * would be the connected components labels of G_A \union G_B.
+ *
+ * @param[inout] labels_a    First input, and output label array (in-place)
+ * @param[in]    labels_b    Second input label array
+ * @param[in]    mask        Core point mask
+ * @param[out]   R           label equivalence map
+ * @param[in]    m           Working flag
+ * @param[in]    N           Number of points in the dataset
+ * @param[in]    stream      CUDA stream
+ */
+template <typename value_idx = int, int TPB_X = 256>
+void merge_labels(value_idx* labels_a,
+                  const value_idx* labels_b,
+                  const bool* mask,
+                  value_idx* R,
+                  bool* m,
+                  value_idx N,
+                  cudaStream_t stream)
+{
+  detail::merge_labels<value_idx, TPB_X>(labels_a, labels_b, mask, R, m, N, stream);
+}
+
+};  // namespace label
+};  // namespace raft
@@ -28,7 +28,7 @@
 
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/lap/lap_kernels.cuh>
+#include <raft/lap/detail/lap_kernels.cuh>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
 

@@ -28,7 +28,6 @@
 
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <thrust/for_each.h>