From 37f627c78d8b8ec10cef9a7b7a1207b7cafc621f Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 10 Feb 2022 19:20:45 -0500 Subject: [PATCH] Move `random` package of cuML to RAFT (#449) This includes: - [ ] make_arima - [x] make_blobs - [x] make_regression - [x] multi_variable_gaussian - [x] permute Authors: - Divye Gala (https://github.com/divyegala) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/449 --- .../raft/random/detail/curand_wrappers.hpp | 57 ++++ cpp/include/raft/random/detail/make_blobs.cuh | 179 ++++++++++ .../raft/random/detail/make_regression.cuh | 285 ++++++++++++++++ .../random/detail/multi_variable_gaussian.cuh | 290 ++++++++++++++++ cpp/include/raft/random/detail/permute.cuh | 162 +++++++++ cpp/include/raft/random/make_blobs.hpp | 91 +++++ cpp/include/raft/random/make_regression.hpp | 100 ++++++ .../raft/random/multi_variable_gaussian.hpp | 59 ++++ cpp/include/raft/random/permute.hpp | 58 ++++ cpp/test/CMakeLists.txt | 4 + cpp/test/random/make_blobs.cu | 263 +++++++++++++++ cpp/test/random/make_regression.cu | 163 +++++++++ cpp/test/random/multi_variable_gaussian.cu | 315 ++++++++++++++++++ cpp/test/random/permute.cu | 237 +++++++++++++ cpp/test/sparse/filter.cu | 2 +- cpp/test/sparse/norm.cu | 2 +- cpp/test/sparse/reduce.cu | 2 +- 17 files changed, 2266 insertions(+), 3 deletions(-) create mode 100644 cpp/include/raft/random/detail/curand_wrappers.hpp create mode 100644 cpp/include/raft/random/detail/make_blobs.cuh create mode 100644 cpp/include/raft/random/detail/make_regression.cuh create mode 100644 cpp/include/raft/random/detail/multi_variable_gaussian.cuh create mode 100644 cpp/include/raft/random/detail/permute.cuh create mode 100644 cpp/include/raft/random/make_blobs.hpp create mode 100644 cpp/include/raft/random/make_regression.hpp create mode 100644 cpp/include/raft/random/multi_variable_gaussian.hpp create mode 100644 cpp/include/raft/random/permute.hpp create mode 100644 cpp/test/random/make_blobs.cu create mode 100644 cpp/test/random/make_regression.cu create mode 100644 cpp/test/random/multi_variable_gaussian.cu create mode 100644 cpp/test/random/permute.cu diff --git a/cpp/include/raft/random/detail/curand_wrappers.hpp b/cpp/include/raft/random/detail/curand_wrappers.hpp new file mode 100644 index 0000000000..969d739cc1 --- /dev/null +++ b/cpp/include/raft/random/detail/curand_wrappers.hpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft::random { +namespace detail { + +// @todo: We probably want to scrape through and replace any consumers of +// these wrappers with our RNG +/** check for curand runtime API errors and assert accordingly */ +#define CURAND_CHECK(call) \ + do { \ + curandStatus_t status = call; \ + ASSERT(status == CURAND_STATUS_SUCCESS, "FAIL: curand-call='%s'. Reason:%d\n", #call, status); \ + } while (0) + +/** + * @defgroup normal curand normal random number generation operations + * @{ + */ +template +curandStatus_t curandGenerateNormal( + curandGenerator_t generator, T* outputPtr, size_t n, T mean, T stddev); + +template <> +inline curandStatus_t curandGenerateNormal( + curandGenerator_t generator, float* outputPtr, size_t n, float mean, float stddev) +{ + return curandGenerateNormal(generator, outputPtr, n, mean, stddev); +} + +template <> +inline curandStatus_t curandGenerateNormal( + curandGenerator_t generator, double* outputPtr, size_t n, double mean, double stddev) +{ + return curandGenerateNormalDouble(generator, outputPtr, n, mean, stddev); +} +/** @} */ + +}; // end namespace detail +}; // end namespace raft::random \ No newline at end of file diff --git a/cpp/include/raft/random/detail/make_blobs.cuh b/cpp/include/raft/random/detail/make_blobs.cuh new file mode 100644 index 0000000000..528d20a284 --- /dev/null +++ b/cpp/include/raft/random/detail/make_blobs.cuh @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace raft::random { +namespace detail { + +namespace { + +// generate the labels first and shuffle them instead of shuffling the dataset +template +void generate_labels(IdxT* labels, + IdxT n_rows, + IdxT n_clusters, + bool shuffle, + raft::random::Rng& r, + cudaStream_t stream) +{ + IdxT a, b; + r.affine_transform_params(n_clusters, a, b); + auto op = [=] __device__(IdxT * ptr, IdxT idx) { + if (shuffle) { idx = IdxT((a * int64_t(idx)) + b); } + idx %= n_clusters; + // in the unlikely case of n_clusters > n_rows, make sure that the writes + // do not go out-of-bounds + if (idx < n_rows) { *ptr = idx; } + }; + raft::linalg::writeOnlyUnaryOp(labels, n_rows, op, stream); +} + +template +DI void get_mu_sigma(DataT& mu, + DataT& sigma, + IdxT idx, + const IdxT* labels, + bool row_major, + const DataT* centers, + const DataT* cluster_std, + DataT cluster_std_scalar, + IdxT n_rows, + IdxT n_cols, + IdxT n_clusters) +{ + IdxT cid, fid; + if (row_major) { + cid = idx / n_cols; + fid = idx % n_cols; + } else { + cid = idx % n_rows; + fid = idx / n_rows; + } + IdxT center_id; + if (cid < n_rows) { + center_id = labels[cid]; + } else { + center_id = 0; + } + + if (fid >= n_cols) { fid = 0; } + + if (row_major) { + center_id = center_id * n_cols + fid; + } else { + center_id += fid * n_clusters; + } + sigma = cluster_std == nullptr ? cluster_std_scalar : cluster_std[cid]; + mu = centers[center_id]; +} + +template +void generate_data(DataT* out, + const IdxT* labels, + IdxT n_rows, + IdxT n_cols, + IdxT n_clusters, + cudaStream_t stream, + bool row_major, + const DataT* centers, + const DataT* cluster_std, + const DataT cluster_std_scalar, + raft::random::Rng& rng) +{ + auto op = [=] __device__(DataT & val1, DataT & val2, IdxT idx1, IdxT idx2) { + DataT mu1, sigma1, mu2, sigma2; + get_mu_sigma(mu1, + sigma1, + idx1, + labels, + row_major, + centers, + cluster_std, + cluster_std_scalar, + n_rows, + n_cols, + n_clusters); + get_mu_sigma(mu2, + sigma2, + idx2, + labels, + row_major, + centers, + cluster_std, + cluster_std_scalar, + n_rows, + n_cols, + n_clusters); + raft::random::box_muller_transform(val1, val2, sigma1, mu1, sigma2, mu2); + }; + rng.custom_distribution2(out, n_rows * n_cols, op, stream); +} + +} // namespace + +template +void make_blobs_caller(DataT* out, + IdxT* labels, + IdxT n_rows, + IdxT n_cols, + IdxT n_clusters, + cudaStream_t stream, + bool row_major = true, + const DataT* centers = nullptr, + const DataT* cluster_std = nullptr, + const DataT cluster_std_scalar = (DataT)1.0, + bool shuffle = true, + DataT center_box_min = (DataT)-10.0, + DataT center_box_max = (DataT)10.0, + uint64_t seed = 0ULL, + raft::random::GeneratorType type = raft::random::GenPhilox) +{ + raft::random::Rng r(seed, type); + // use the right centers buffer for data generation + rmm::device_uvector rand_centers(0, stream); + const DataT* _centers; + if (centers == nullptr) { + rand_centers.resize(n_clusters * n_cols, stream); + r.uniform(rand_centers.data(), n_clusters * n_cols, center_box_min, center_box_max, stream); + _centers = rand_centers.data(); + } else { + _centers = centers; + } + generate_labels(labels, n_rows, n_clusters, shuffle, r, stream); + generate_data(out, + labels, + n_rows, + n_cols, + n_clusters, + stream, + row_major, + _centers, + cluster_std, + cluster_std_scalar, + r); +} + +} // end namespace detail +} // end namespace raft::random \ No newline at end of file diff --git a/cpp/include/raft/random/detail/make_regression.cuh b/cpp/include/raft/random/detail/make_regression.cuh new file mode 100644 index 0000000000..eb8eaf565e --- /dev/null +++ b/cpp/include/raft/random/detail/make_regression.cuh @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Adapted from scikit-learn + * https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/datasets/_samples_generator.py + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft::random { +namespace detail { + +/* Internal auxiliary function to help build the singular profile */ +template +static __global__ void _singular_profile_kernel(DataT* out, IdxT n, DataT tail_strength, IdxT rank) +{ + IdxT tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid < n) { + DataT sval = static_cast(tid) / rank; + DataT low_rank = ((DataT)1.0 - tail_strength) * raft::myExp(-sval * sval); + DataT tail = tail_strength * raft::myExp((DataT)-0.1 * sval); + out[tid] = low_rank + tail; + } +} + +/* Internal auxiliary function to generate a low-rank matrix */ +template +static void _make_low_rank_matrix(const raft::handle_t& handle, + DataT* out, + IdxT n_rows, + IdxT n_cols, + IdxT effective_rank, + DataT tail_strength, + raft::random::Rng& r, + cudaStream_t stream) +{ + cusolverDnHandle_t cusolver_handle = handle.get_cusolver_dn_handle(); + cublasHandle_t cublas_handle = handle.get_cublas_handle(); + + IdxT n = std::min(n_rows, n_cols); + + // Generate random (ortho normal) vectors with QR decomposition + rmm::device_uvector rd_mat_0(n_rows * n, stream); + rmm::device_uvector rd_mat_1(n_cols * n, stream); + r.normal(rd_mat_0.data(), n_rows * n, (DataT)0.0, (DataT)1.0, stream); + r.normal(rd_mat_1.data(), n_cols * n, (DataT)0.0, (DataT)1.0, stream); + rmm::device_uvector q0(n_rows * n, stream); + rmm::device_uvector q1(n_cols * n, stream); + raft::linalg::qrGetQ(handle, rd_mat_0.data(), q0.data(), n_rows, n, stream); + raft::linalg::qrGetQ(handle, rd_mat_1.data(), q1.data(), n_cols, n, stream); + + // Build the singular profile by assembling signal and noise components + rmm::device_uvector singular_vec(n, stream); + _singular_profile_kernel<<(n, 256), 256, 0, stream>>>( + singular_vec.data(), n, tail_strength, effective_rank); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + rmm::device_uvector singular_mat(n * n, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(singular_mat.data(), 0, n * n * sizeof(DataT), stream)); + raft::matrix::initializeDiagonalMatrix(singular_vec.data(), singular_mat.data(), n, n, stream); + + // Generate the column-major matrix + rmm::device_uvector temp_q0s(n_rows * n, stream); + rmm::device_uvector temp_out(n_rows * n_cols, stream); + DataT alpha = 1.0, beta = 0.0; + raft::linalg::detail::cublasgemm(cublas_handle, + CUBLAS_OP_N, + CUBLAS_OP_N, + n_rows, + n, + n, + &alpha, + q0.data(), + n_rows, + singular_mat.data(), + n, + &beta, + temp_q0s.data(), + n_rows, + stream); + raft::linalg::detail::cublasgemm(cublas_handle, + CUBLAS_OP_N, + CUBLAS_OP_T, + n_rows, + n_cols, + n, + &alpha, + temp_q0s.data(), + n_rows, + q1.data(), + n_cols, + &beta, + temp_out.data(), + n_rows, + stream); + + // Transpose from column-major to row-major + raft::linalg::transpose(handle, temp_out.data(), out, n_rows, n_cols, stream); +} + +/* Internal auxiliary function to permute rows in the given matrix according + * to a given permutation vector */ +template +static __global__ void _gather2d_kernel( + DataT* out, const DataT* in, const IdxT* perms, IdxT n_rows, IdxT n_cols) +{ + IdxT tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid < n_rows) { + const DataT* row_in = in + n_cols * perms[tid]; + DataT* row_out = out + n_cols * tid; + + for (IdxT i = 0; i < n_cols; i++) { + row_out[i] = row_in[i]; + } + } +} + +template +void make_regression_caller(const raft::handle_t& handle, + DataT* out, + DataT* values, + IdxT n_rows, + IdxT n_cols, + IdxT n_informative, + cudaStream_t stream, + DataT* coef = nullptr, + IdxT n_targets = (IdxT)1, + DataT bias = (DataT)0.0, + IdxT effective_rank = (IdxT)-1, + DataT tail_strength = (DataT)0.5, + DataT noise = (DataT)0.0, + bool shuffle = true, + uint64_t seed = 0ULL, + raft::random::GeneratorType type = raft::random::GenPhilox) +{ + n_informative = std::min(n_informative, n_cols); + + cusolverDnHandle_t cusolver_handle = handle.get_cusolver_dn_handle(); + cublasHandle_t cublas_handle = handle.get_cublas_handle(); + + cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST); + raft::random::Rng r(seed, type); + + if (effective_rank < 0) { + // Randomly generate a well conditioned input set + r.normal(out, n_rows * n_cols, (DataT)0.0, (DataT)1.0, stream); + } else { + // Randomly generate a low rank, fat tail input set + _make_low_rank_matrix(handle, out, n_rows, n_cols, effective_rank, tail_strength, r, stream); + } + + // Use the right output buffer for the values + rmm::device_uvector tmp_values(0, stream); + DataT* _values; + if (shuffle) { + tmp_values.resize(n_rows * n_targets, stream); + _values = tmp_values.data(); + } else { + _values = values; + } + // Create a column-major matrix of output values only if it has more + // than 1 column + rmm::device_uvector values_col(0, stream); + DataT* _values_col; + if (n_targets > 1) { + values_col.resize(n_rows * n_targets, stream); + _values_col = values_col.data(); + } else { + _values_col = _values; + } + + // Use the right buffer for the coefficients + rmm::device_uvector tmp_coef(0, stream); + DataT* _coef; + if (coef != nullptr && !shuffle) { + _coef = coef; + } else { + tmp_coef.resize(n_cols * n_targets, stream); + _coef = tmp_coef.data(); + } + + // Generate a ground truth model with only n_informative features + r.uniform(_coef, n_informative * n_targets, (DataT)1.0, (DataT)100.0, stream); + if (coef && n_informative != n_cols) { + RAFT_CUDA_TRY(cudaMemsetAsync(_coef + n_informative * n_targets, + 0, + (n_cols - n_informative) * n_targets * sizeof(DataT), + stream)); + } + + // Compute the output values + DataT alpha = (DataT)1.0, beta = (DataT)0.0; + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublas_handle, + CUBLAS_OP_T, + CUBLAS_OP_T, + n_rows, + n_targets, + n_informative, + &alpha, + out, + n_cols, + _coef, + n_targets, + &beta, + _values_col, + n_rows, + stream)); + + // Transpose the values from column-major to row-major if needed + if (n_targets > 1) { + raft::linalg::transpose(handle, _values_col, _values, n_rows, n_targets, stream); + } + + if (bias != 0.0) { + // Add bias + raft::linalg::addScalar(_values, _values, bias, n_rows * n_targets, stream); + } + + rmm::device_uvector white_noise(0, stream); + if (noise != 0.0) { + // Add white noise + white_noise.resize(n_rows * n_targets, stream); + r.normal(white_noise.data(), n_rows * n_targets, (DataT)0.0, noise, stream); + raft::linalg::add(_values, _values, white_noise.data(), n_rows * n_targets, stream); + } + + if (shuffle) { + rmm::device_uvector tmp_out(n_rows * n_cols, stream); + rmm::device_uvector perms_samples(n_rows, stream); + rmm::device_uvector perms_features(n_cols, stream); + + constexpr IdxT Nthreads = 256; + + // Shuffle the samples from out to tmp_out + raft::random::permute( + perms_samples.data(), tmp_out.data(), out, n_cols, n_rows, true, stream); + IdxT nblks_rows = raft::ceildiv(n_rows, Nthreads); + _gather2d_kernel<<>>( + values, _values, perms_samples.data(), n_rows, n_targets); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + // Shuffle the features from tmp_out to out + raft::random::permute( + perms_features.data(), out, tmp_out.data(), n_rows, n_cols, false, stream); + + // Shuffle the coefficients accordingly + if (coef != nullptr) { + IdxT nblks_cols = raft::ceildiv(n_cols, Nthreads); + _gather2d_kernel<<>>( + coef, _coef, perms_features.data(), n_cols, n_targets); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } + } +} + +} // namespace detail +} // namespace raft::random \ No newline at end of file diff --git a/cpp/include/raft/random/detail/multi_variable_gaussian.cuh b/cpp/include/raft/random/detail/multi_variable_gaussian.cuh new file mode 100644 index 0000000000..bf79b3cb71 --- /dev/null +++ b/cpp/include/raft/random/detail/multi_variable_gaussian.cuh @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include "curand_wrappers.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// mvg.cuh takes in matrices that are colomn major (as in fortan) +#define IDX2C(i, j, ld) (j * ld + i) + +namespace raft::random { +namespace detail { + +enum Filler : unsigned char { + LOWER, // = 0 + UPPER // = 1 +}; // used in memseting upper/lower matrix + +/** + * @brief Reset values within the epsilon absolute range to zero + * @tparam T the data type + * @param eig the array + * @param epsilon the range + * @param size length of the array + * @param stream cuda stream + */ +template +void epsilonToZero(T* eig, T epsilon, int size, cudaStream_t stream) +{ + raft::linalg::unaryOp( + eig, + eig, + size, + [epsilon] __device__(T in) { return (in < epsilon && in > -epsilon) ? T(0.0) : in; }, + stream); +} + +/** + * @brief Broadcast addition of vector onto a matrix + * @tparam the data type + * @param out the output matrix + * @param in_m the input matrix + * @param in_v the input vector + * @param scalar scalar multiplier + * @param rows number of rows in the input matrix + * @param cols number of cols in the input matrix + * @param stream cuda stream + */ +template +void matVecAdd( + T* out, const T* in_m, const T* in_v, T scalar, int rows, int cols, cudaStream_t stream) +{ + raft::linalg::matrixVectorOp( + out, + in_m, + in_v, + cols, + rows, + true, + true, + [=] __device__(T mat, T vec) { return mat + scalar * vec; }, + stream); +} + +// helper kernels +template +__global__ void combined_dot_product(int rows, int cols, const T* W, T* matrix, int* check) +{ + int m_i = threadIdx.x + blockDim.x * blockIdx.x; + int Wi = m_i / cols; + if (m_i < cols * rows) { + if (W[Wi] >= 0.0) + matrix[m_i] = pow(W[Wi], 0.5) * (matrix[m_i]); + else + check[0] = Wi; // reports Wi'th eigen values is negative. + } +} + +template // if uplo = 0, lower part of dim x dim matrix set to +// value +__global__ void fill_uplo(int dim, Filler uplo, T value, T* A) +{ + int j = threadIdx.x + blockDim.x * blockIdx.x; + int i = threadIdx.y + blockDim.y * blockIdx.y; + if (i < dim && j < dim) { + // making off-diagonals == value + if (i < j) { + if (uplo == 1) A[IDX2C(i, j, dim)] = value; + } else if (i > j) { + if (uplo == 0) A[IDX2C(i, j, dim)] = value; + } + } +} + +template +class multi_variable_gaussian_impl { + public: + enum Decomposer : unsigned char { chol_decomp, jacobi, qr }; + + private: + // adjustable stuff + const int dim; + const int nPoints = 1; + const double tol = 1.e-7; + const T epsilon = 1.e-12; + const int max_sweeps = 100; + cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER; + const Decomposer method; + + // not so much + T *P = 0, *X = 0, *x = 0, *workspace_decomp = 0, *eig = 0; + int *info, Lwork, info_h; + syevjInfo_t syevj_params = NULL; + curandGenerator_t gen; + const raft::handle_t& handle; + cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR; + bool deinitilized = false; + + public: // functions + multi_variable_gaussian_impl() = delete; + multi_variable_gaussian_impl(const raft::handle_t& handle, const int dim, Decomposer method) + : handle(handle), dim(dim), method(method) + { + auto cusolverHandle = handle.get_cusolver_dn_handle(); + + CURAND_CHECK(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT)); + CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(gen, 28)); // SEED + if (method == chol_decomp) { + RAFT_CUSOLVER_TRY(raft::linalg::detail::cusolverDnpotrf_bufferSize( + cusolverHandle, uplo, dim, P, dim, &Lwork)); + } else if (method == jacobi) { // jacobi init + RAFT_CUSOLVER_TRY(cusolverDnCreateSyevjInfo(&syevj_params)); + RAFT_CUSOLVER_TRY(cusolverDnXsyevjSetTolerance(syevj_params, tol)); + RAFT_CUSOLVER_TRY(cusolverDnXsyevjSetMaxSweeps(syevj_params, max_sweeps)); + RAFT_CUSOLVER_TRY(raft::linalg::detail::cusolverDnsyevj_bufferSize( + cusolverHandle, jobz, uplo, dim, P, dim, eig, &Lwork, syevj_params)); + } else { // method == qr + RAFT_CUSOLVER_TRY(raft::linalg::detail::cusolverDnsyevd_bufferSize( + cusolverHandle, jobz, uplo, dim, P, dim, eig, &Lwork)); + } + } + + std::size_t get_workspace_size() + { + // malloc workspace_decomp + std::size_t granuality = 256, offset = 0; + workspace_decomp = (T*)offset; + offset += raft::alignTo(sizeof(T) * Lwork, granuality); + eig = (T*)offset; + offset += raft::alignTo(sizeof(T) * dim, granuality); + info = (int*)offset; + offset += raft::alignTo(sizeof(int), granuality); + return offset; + } + + void set_workspace(T* workarea) + { + workspace_decomp = (T*)((std::size_t)workspace_decomp + (std::size_t)workarea); + eig = (T*)((std::size_t)eig + (std::size_t)workarea); + info = (int*)((std::size_t)info + (std::size_t)workarea); + } + + void give_gaussian(const int nPoints, T* P, T* X, const T* x = 0) + { + auto cusolverHandle = handle.get_cusolver_dn_handle(); + auto cublasHandle = handle.get_cublas_handle(); + auto cudaStream = handle.get_stream(); + if (method == chol_decomp) { + // lower part will contains chol_decomp + RAFT_CUSOLVER_TRY(raft::linalg::detail::cusolverDnpotrf( + cusolverHandle, uplo, dim, P, dim, workspace_decomp, Lwork, info, cudaStream)); + } else if (method == jacobi) { + RAFT_CUSOLVER_TRY( + raft::linalg::detail::cusolverDnsyevj(cusolverHandle, + jobz, + uplo, + dim, + P, + dim, + eig, + workspace_decomp, + Lwork, + info, + syevj_params, + cudaStream)); // vectors stored as cols. & col major + } else { // qr + RAFT_CUSOLVER_TRY(raft::linalg::detail::cusolverDnsyevd( + cusolverHandle, jobz, uplo, dim, P, dim, eig, workspace_decomp, Lwork, info, cudaStream)); + } + raft::update_host(&info_h, info, 1, cudaStream); + RAFT_CUDA_TRY(cudaStreamSynchronize(cudaStream)); + ASSERT(info_h == 0, "mvg: error in syevj/syevd/potrf, info=%d | expected=0", info_h); + T mean = 0.0, stddv = 1.0; + // generate nxN gaussian nums in X + CURAND_CHECK( + detail::curandGenerateNormal(gen, X, (nPoints * dim) + (nPoints * dim) % 2, mean, stddv)); + T alfa = 1.0, beta = 0.0; + if (method == chol_decomp) { + // upper part (0) being filled with 0.0 + dim3 block(32, 32); + dim3 grid(raft::ceildiv(dim, (int)block.x), raft::ceildiv(dim, (int)block.y)); + fill_uplo<<>>(dim, UPPER, (T)0.0, P); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + // P is lower triangular chol decomp mtrx + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublasHandle, + CUBLAS_OP_N, + CUBLAS_OP_N, + dim, + nPoints, + dim, + &alfa, + P, + dim, + X, + dim, + &beta, + X, + dim, + cudaStream)); + } else { + epsilonToZero(eig, epsilon, dim, cudaStream); + dim3 block(64); + dim3 grid(raft::ceildiv(dim, (int)block.x)); + RAFT_CUDA_TRY(cudaMemsetAsync(info, 0, sizeof(int), cudaStream)); + grid.x = raft::ceildiv(dim * dim, (int)block.x); + combined_dot_product<<>>(dim, dim, eig, P, info); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + // checking if any eigen vals were negative + raft::update_host(&info_h, info, 1, cudaStream); + RAFT_CUDA_TRY(cudaStreamSynchronize(cudaStream)); + ASSERT(info_h == 0, "mvg: Cov matrix has %dth Eigenval negative", info_h); + + // Got Q = eigvect*eigvals.sqrt in P, Q*X in X below + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublasHandle, + CUBLAS_OP_N, + CUBLAS_OP_N, + dim, + nPoints, + dim, + &alfa, + P, + dim, + X, + dim, + &beta, + X, + dim, + cudaStream)); + } + // working to make mean not 0 + // since we are working with column-major, nPoints and dim are swapped + if (x != NULL) matVecAdd(X, X, x, T(1.0), nPoints, dim, cudaStream); + } + + void deinit() + { + if (deinitilized) return; + CURAND_CHECK(curandDestroyGenerator(gen)); + RAFT_CUSOLVER_TRY(cusolverDnDestroySyevjInfo(syevj_params)); + deinitilized = true; + } + + ~multi_variable_gaussian_impl() { deinit(); } +}; // end of multi_variable_gaussian_impl + +}; // end of namespace detail +}; // end of namespace raft::random \ No newline at end of file diff --git a/cpp/include/raft/random/detail/permute.cuh b/cpp/include/raft/random/detail/permute.cuh new file mode 100644 index 0000000000..28eaf9136c --- /dev/null +++ b/cpp/include/raft/random/detail/permute.cuh @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace raft::random { +namespace detail { + +template +__global__ void permuteKernel( + IntType* perms, Type* out, const Type* in, IdxType a, IdxType b, IdxType N, IdxType D) +{ + namespace cg = cooperative_groups; + const int WARP_SIZE = 32; + + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + // having shuffled input indices and coalesced output indices appears + // to be preferrable to the reverse, especially for column major + IntType inIdx = ((a * int64_t(tid)) + b) % N; + IntType outIdx = tid; + + if (perms != nullptr && tid < N) { perms[outIdx] = inIdx; } + + if (out == nullptr || in == nullptr) { return; } + + if (rowMajor) { + cg::thread_block_tile warp = cg::tiled_partition(cg::this_thread_block()); + + __shared__ IntType inIdxShm[TPB]; + __shared__ IntType outIdxShm[TPB]; + inIdxShm[threadIdx.x] = inIdx; + outIdxShm[threadIdx.x] = outIdx; + warp.sync(); + + int warpID = threadIdx.x / WARP_SIZE; + int laneID = threadIdx.x % WARP_SIZE; + for (int i = warpID * WARP_SIZE; i < warpID * WARP_SIZE + WARP_SIZE; ++i) { + if (outIdxShm[i] < N) { +#pragma unroll + for (int j = laneID; j < D; j += WARP_SIZE) { + out[outIdxShm[i] * D + j] = in[inIdxShm[i] * D + j]; + } + } + } + } else { +#pragma unroll + for (int j = 0; j < D; ++j) { + if (tid < N) { out[outIdx + j * N] = in[inIdx + j * N]; } + } + } +} + +// This is wrapped in a type to allow for partial template specialization +template +struct permute_impl_t { + static void permuteImpl(IntType* perms, + Type* out, + const Type* in, + IdxType N, + IdxType D, + int nblks, + IdxType a, + IdxType b, + cudaStream_t stream) + { + // determine vector type and set new pointers + typedef typename raft::IOType::Type VType; + VType* vout = reinterpret_cast(out); + const VType* vin = reinterpret_cast(in); + + // check if we can execute at this vector length + if (D % VLen == 0 && raft::is_aligned(vout, sizeof(VType)) && + raft::is_aligned(vin, sizeof(VType))) { + permuteKernel + <<>>(perms, vout, vin, a, b, N, D / VLen); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } else { // otherwise try the next lower vector length + permute_impl_t::permuteImpl( + perms, out, in, N, D, nblks, a, b, stream); + } + } +}; + +// at vector length 1 we just execute a scalar version to break the recursion +template +struct permute_impl_t { + static void permuteImpl(IntType* perms, + Type* out, + const Type* in, + IdxType N, + IdxType D, + int nblks, + IdxType a, + IdxType b, + cudaStream_t stream) + { + permuteKernel + <<>>(perms, out, in, a, b, N, D); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } +}; + +template +void permute(IntType* perms, + Type* out, + const Type* in, + IntType D, + IntType N, + bool rowMajor, + cudaStream_t stream) +{ + auto nblks = raft::ceildiv(N, (IntType)TPB); + + // always keep 'a' to be coprime to N + IdxType a = rand() % N; + while (raft::gcd(a, N) != 1) + a = (a + 1) % N; + IdxType b = rand() % N; + + if (rowMajor) { + permute_impl_t 0) ? 16 / sizeof(Type) : 1>::permuteImpl(perms, + out, + in, + N, + D, + nblks, + a, + b, + stream); + } else { + permute_impl_t::permuteImpl( + perms, out, in, N, D, nblks, a, b, stream); + } +} + +}; // end namespace detail +}; // end namespace raft::random \ No newline at end of file diff --git a/cpp/include/raft/random/make_blobs.hpp b/cpp/include/raft/random/make_blobs.hpp new file mode 100644 index 0000000000..afdabfe55b --- /dev/null +++ b/cpp/include/raft/random/make_blobs.hpp @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "detail/make_blobs.cuh" + +namespace raft::random { + +/** + * @brief GPU-equivalent of sklearn.datasets.make_blobs + * + * @tparam DataT output data type + * @tparam IdxT indexing arithmetic type + * + * @param[out] out generated data [on device] + * [dim = n_rows x n_cols] + * @param[out] labels labels for the generated data [on device] + * [len = n_rows] + * @param[in] n_rows number of rows in the generated data + * @param[in] n_cols number of columns in the generated data + * @param[in] n_clusters number of clusters (or classes) to generate + * @param[in] stream cuda stream to schedule the work on + * @param[in] row_major whether input `centers` and output `out` + * buffers are to be stored in row or column + * major layout + * @param[in] centers centers of each of the cluster, pass a nullptr + * if you need this also to be generated randomly + * [on device] [dim = n_clusters x n_cols] + * @param[in] cluster_std standard deviation of each cluster center, + * pass a nullptr if this is to be read from the + * `cluster_std_scalar`. [on device] + * [len = n_clusters] + * @param[in] cluster_std_scalar if 'cluster_std' is nullptr, then use this as + * the std-dev across all dimensions. + * @param[in] shuffle shuffle the generated dataset and labels + * @param[in] center_box_min min value of box from which to pick cluster + * centers. Useful only if 'centers' is nullptr + * @param[in] center_box_max max value of box from which to pick cluster + * centers. Useful only if 'centers' is nullptr + * @param[in] seed seed for the RNG + * @param[in] type RNG type + */ +template +void make_blobs(DataT* out, + IdxT* labels, + IdxT n_rows, + IdxT n_cols, + IdxT n_clusters, + cudaStream_t stream, + bool row_major = true, + const DataT* centers = nullptr, + const DataT* cluster_std = nullptr, + const DataT cluster_std_scalar = (DataT)1.0, + bool shuffle = true, + DataT center_box_min = (DataT)-10.0, + DataT center_box_max = (DataT)10.0, + uint64_t seed = 0ULL, + GeneratorType type = GenPhilox) +{ + detail::make_blobs_caller(out, + labels, + n_rows, + n_cols, + n_clusters, + stream, + row_major, + centers, + cluster_std, + cluster_std_scalar, + shuffle, + center_box_min, + center_box_max, + seed, + type); +} + +} // end namespace raft::random \ No newline at end of file diff --git a/cpp/include/raft/random/make_regression.hpp b/cpp/include/raft/random/make_regression.hpp new file mode 100644 index 0000000000..d6fceff466 --- /dev/null +++ b/cpp/include/raft/random/make_regression.hpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Adapted from scikit-learn + * https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/datasets/_samples_generator.py + */ + +#pragma once + +#include + +#include "detail/make_regression.cuh" + +namespace raft::random { + +/** + * @brief GPU-equivalent of sklearn.datasets.make_regression as documented at: + * https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_regression.html + * + * @tparam DataT Scalar type + * @tparam IdxT Index type + * + * @param[in] handle RAFT handle + * @param[out] out Row-major (samples, features) matrix to store + * the problem data + * @param[out] values Row-major (samples, targets) matrix to store + * the values for the regression problem + * @param[in] n_rows Number of samples + * @param[in] n_cols Number of features + * @param[in] n_informative Number of informative features (non-zero + * coefficients) + * @param[in] stream CUDA stream + * @param[out] coef Row-major (features, targets) matrix to store + * the coefficients used to generate the values + * for the regression problem. If nullptr is + * given, nothing will be written + * @param[in] n_targets Number of targets (generated values per sample) + * @param[in] bias A scalar that will be added to the values + * @param[in] effective_rank The approximate rank of the data matrix (used + * to create correlations in the data). -1 is the + * code to use well-conditioned data + * @param[in] tail_strength The relative importance of the fat noisy tail + * of the singular values profile if + * effective_rank is not -1 + * @param[in] noise Standard deviation of the gaussian noise + * applied to the output + * @param[in] shuffle Shuffle the samples and the features + * @param[in] seed Seed for the random number generator + * @param[in] type Random generator type + */ +template +void make_regression(const raft::handle_t& handle, + DataT* out, + DataT* values, + IdxT n_rows, + IdxT n_cols, + IdxT n_informative, + cudaStream_t stream, + DataT* coef = nullptr, + IdxT n_targets = (IdxT)1, + DataT bias = (DataT)0.0, + IdxT effective_rank = (IdxT)-1, + DataT tail_strength = (DataT)0.5, + DataT noise = (DataT)0.0, + bool shuffle = true, + uint64_t seed = 0ULL, + GeneratorType type = GenPhilox) +{ + detail::make_regression_caller(handle, + out, + values, + n_rows, + n_cols, + n_informative, + stream, + coef, + n_targets, + bias, + effective_rank, + tail_strength, + noise, + shuffle, + seed, + type); +} + +} // namespace raft::random \ No newline at end of file diff --git a/cpp/include/raft/random/multi_variable_gaussian.hpp b/cpp/include/raft/random/multi_variable_gaussian.hpp new file mode 100644 index 0000000000..c2af52322a --- /dev/null +++ b/cpp/include/raft/random/multi_variable_gaussian.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "detail/multi_variable_gaussian.cuh" + +namespace raft::random { + +template +class multi_variable_gaussian : public detail::multi_variable_gaussian_impl { + public: + // using Decomposer = typename detail::multi_variable_gaussian_impl::Decomposer; + // using detail::multi_variable_gaussian_impl::Decomposer::chol_decomp; + // using detail::multi_variable_gaussian_impl::Decomposer::jacobi; + // using detail::multi_variable_gaussian_impl::Decomposer::qr; + + multi_variable_gaussian() = delete; + multi_variable_gaussian(const raft::handle_t& handle, + const int dim, + typename detail::multi_variable_gaussian_impl::Decomposer method) + : detail::multi_variable_gaussian_impl{handle, dim, method} + { + } + + std::size_t get_workspace_size() + { + return detail::multi_variable_gaussian_impl::get_workspace_size(); + } + + void set_workspace(T* workarea) + { + detail::multi_variable_gaussian_impl::set_workspace(workarea); + } + + void give_gaussian(const int nPoints, T* P, T* X, const T* x = 0) + { + detail::multi_variable_gaussian_impl::give_gaussian(nPoints, P, X, x); + } + + void deinit() { detail::multi_variable_gaussian_impl::deinit(); } + + ~multi_variable_gaussian() { deinit(); } +}; // end of multi_variable_gaussian + +}; // end of namespace raft::random \ No newline at end of file diff --git a/cpp/include/raft/random/permute.hpp b/cpp/include/raft/random/permute.hpp new file mode 100644 index 0000000000..32ed3779e4 --- /dev/null +++ b/cpp/include/raft/random/permute.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "detail/permute.cuh" + +namespace raft::random { + +/** + * @brief Generate permutations of the input array. Pretty useful primitive for + * shuffling the input datasets in ML algos. See note at the end for some of its + * limitations! + * @tparam Type Data type of the array to be shuffled + * @tparam IntType Integer type used for ther perms array + * @tparam IdxType Integer type used for addressing indices + * @tparam TPB threads per block + * @param perms the output permutation indices. Typically useful only when + * one wants to refer back. If you don't need this, pass a nullptr + * @param out the output shuffled array. Pass nullptr if you don't want this to + * be written. For eg: when you only want the perms array to be filled. + * @param in input array (in-place is not supported due to race conditions!) + * @param D number of columns of the input array + * @param N length of the input array (or number of rows) + * @param rowMajor whether the input/output matrices are row or col major + * @param stream cuda stream where to launch the work + * + * @note This is NOT a uniform permutation generator! In fact, it only generates + * very small percentage of permutations. If your application really requires a + * high quality permutation generator, it is recommended that you pick + * Knuth Shuffle. + */ +template +void permute(IntType* perms, + Type* out, + const Type* in, + IntType D, + IntType N, + bool rowMajor, + cudaStream_t stream) +{ + detail::permute(perms, out, in, D, N, rowMajor, stream); +} + +}; // end namespace raft::random \ No newline at end of file diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 75b415814a..654ab73f84 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -74,6 +74,10 @@ add_executable(test_raft test/mr/host/buffer.cpp test/mr/device/buffer.cpp test/mst.cu + test/random/make_blobs.cu + test/random/make_regression.cu + test/random/multi_variable_gaussian.cu + test/random/permute.cu test/random/rng.cu test/random/rng_int.cu test/random/sample_without_replacement.cu diff --git a/cpp/test/random/make_blobs.cu b/cpp/test/random/make_blobs.cu new file mode 100644 index 0000000000..8c7e440d0e --- /dev/null +++ b/cpp/test/random/make_blobs.cu @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include +#include +#include +#include +#include + +namespace raft::random { + +template +__global__ void meanKernel(T* out, + int* lens, + const T* data, + const int* labels, + int nrows, + int ncols, + int nclusters, + bool row_major) +{ + int tid = threadIdx.x + blockIdx.x * blockDim.x; + int rowid = row_major ? tid / ncols : tid % nrows; + int colid = row_major ? tid % ncols : tid / nrows; + if (rowid < nrows && colid < ncols) { + T val = data[tid]; + int label = labels[rowid]; + int idx = row_major ? label * ncols + colid : colid * nclusters + label; + raft::myAtomicAdd(out + idx * 2, val); + raft::myAtomicAdd(out + idx * 2 + 1, val * val); + if (colid == 0) { raft::myAtomicAdd(lens + label, 1); } + } +} + +template +__global__ void compute_mean_var( + T* out, const T* stats, int* lens, int nrows, int ncols, bool row_major) +{ + int tid = threadIdx.x + blockIdx.x * blockDim.x; + int rowid = row_major ? tid / ncols : tid % nrows; + int colid = row_major ? tid % ncols : tid / nrows; + int stride = nrows * ncols; + if (rowid < nrows && colid < ncols) { + int len = lens[rowid]; + auto mean = stats[tid * 2] / len; + out[tid] = mean; + out[tid + stride] = (stats[tid * 2 + 1] / len) - (mean * mean); + } +} + +template +struct MakeBlobsInputs { + T tolerance; + int rows, cols, n_clusters; + T std; + bool row_major, shuffle; + raft::random::GeneratorType gtype; + uint64_t seed; +}; + +template +class MakeBlobsTest : public ::testing::TestWithParam> { + public: + MakeBlobsTest() + : params(::testing::TestWithParam>::GetParam()), + stream(handle.get_stream()), + mu_vec(params.cols * params.n_clusters, stream), + mean_var(2 * params.n_clusters * params.cols, stream) + { + } + + protected: + void SetUp() override + { + // Tests are configured with their expected test-values sigma. For example, + // 4 x sigma indicates the test shouldn't fail 99.9% of the time. + num_sigma = 50; + auto len = params.rows * params.cols; + raft::random::Rng r(params.seed, params.gtype); + + rmm::device_uvector data(len, stream); + rmm::device_uvector labels(params.rows, stream); + rmm::device_uvector stats(2 * params.n_clusters * params.cols, stream); + rmm::device_uvector lens(params.n_clusters, stream); + + RAFT_CUDA_TRY(cudaMemsetAsync(stats.data(), 0, stats.size() * sizeof(T), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(mean_var.data(), 0, mean_var.size() * sizeof(T), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(lens.data(), 0, lens.size() * sizeof(int), stream)); + + r.uniform(mu_vec.data(), params.cols * params.n_clusters, T(-10.0), T(10.0), stream); + T* sigma_vec = nullptr; + make_blobs(data.data(), + labels.data(), + params.rows, + params.cols, + params.n_clusters, + stream, + params.row_major, + mu_vec.data(), + sigma_vec, + params.std, + params.shuffle, + T(-10.0), + T(10.0), + params.seed, + params.gtype); + static const int threads = 128; + meanKernel<<>>(stats.data(), + lens.data(), + data.data(), + labels.data(), + params.rows, + params.cols, + params.n_clusters, + params.row_major); + int len1 = params.n_clusters * params.cols; + compute_mean_var<<>>( + mean_var.data(), stats.data(), lens.data(), params.n_clusters, params.cols, params.row_major); + } + + void check() + { + int len = params.n_clusters * params.cols; + auto compare = raft::CompareApprox(num_sigma * params.tolerance); + ASSERT_TRUE(raft::devArrMatch(mu_vec.data(), mean_var.data(), len, compare, stream)); + ASSERT_TRUE(raft::devArrMatch(params.std, mean_var.data() + len, len, compare, stream)); + } + + protected: + MakeBlobsInputs params; + raft::handle_t handle; + cudaStream_t stream = 0; + + rmm::device_uvector mu_vec, mean_var; + int num_sigma; +}; + +typedef MakeBlobsTest MakeBlobsTestF; +const std::vector> inputsf_t = { + {0.0055, 1024, 32, 3, 1.f, true, false, raft::random::GenPhilox, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, true, false, raft::random::GenPhilox, 1234ULL}, + {0.0055, 1024, 32, 3, 1.f, true, false, raft::random::GenTaps, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, true, false, raft::random::GenTaps, 1234ULL}, + {0.0055, 1024, 32, 3, 1.f, true, false, raft::random::GenKiss99, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, true, false, raft::random::GenKiss99, 1234ULL}, + {0.0055, 1024, 32, 3, 1.f, false, false, raft::random::GenPhilox, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, false, false, raft::random::GenPhilox, 1234ULL}, + {0.0055, 1024, 32, 3, 1.f, false, false, raft::random::GenTaps, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, false, false, raft::random::GenTaps, 1234ULL}, + {0.0055, 1024, 32, 3, 1.f, false, false, raft::random::GenKiss99, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, false, false, raft::random::GenKiss99, 1234ULL}, + {0.0055, 1024, 32, 3, 1.f, true, true, raft::random::GenPhilox, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, true, true, raft::random::GenPhilox, 1234ULL}, + {0.0055, 1024, 32, 3, 1.f, true, true, raft::random::GenTaps, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, true, true, raft::random::GenTaps, 1234ULL}, + {0.0055, 1024, 32, 3, 1.f, true, true, raft::random::GenKiss99, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, true, true, raft::random::GenKiss99, 1234ULL}, + {0.0055, 1024, 32, 3, 1.f, false, true, raft::random::GenPhilox, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, false, true, raft::random::GenPhilox, 1234ULL}, + {0.0055, 1024, 32, 3, 1.f, false, true, raft::random::GenTaps, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, false, true, raft::random::GenTaps, 1234ULL}, + {0.0055, 1024, 32, 3, 1.f, false, true, raft::random::GenKiss99, 1234ULL}, + {0.011, 1024, 8, 3, 1.f, false, true, raft::random::GenKiss99, 1234ULL}, + + {0.0055, 5003, 32, 5, 1.f, true, false, raft::random::GenPhilox, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, true, false, raft::random::GenPhilox, 1234ULL}, + {0.0055, 5003, 32, 5, 1.f, true, false, raft::random::GenTaps, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, true, false, raft::random::GenTaps, 1234ULL}, + {0.0055, 5003, 32, 5, 1.f, true, false, raft::random::GenKiss99, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, true, false, raft::random::GenKiss99, 1234ULL}, + {0.0055, 5003, 32, 5, 1.f, false, false, raft::random::GenPhilox, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, false, false, raft::random::GenPhilox, 1234ULL}, + {0.0055, 5003, 32, 5, 1.f, false, false, raft::random::GenTaps, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, false, false, raft::random::GenTaps, 1234ULL}, + {0.0055, 5003, 32, 5, 1.f, false, false, raft::random::GenKiss99, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, false, false, raft::random::GenKiss99, 1234ULL}, + {0.0055, 5003, 32, 5, 1.f, true, true, raft::random::GenPhilox, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, true, true, raft::random::GenPhilox, 1234ULL}, + {0.0055, 5003, 32, 5, 1.f, true, true, raft::random::GenTaps, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, true, true, raft::random::GenTaps, 1234ULL}, + {0.0055, 5003, 32, 5, 1.f, true, true, raft::random::GenKiss99, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, true, true, raft::random::GenKiss99, 1234ULL}, + {0.0055, 5003, 32, 5, 1.f, false, true, raft::random::GenPhilox, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, false, true, raft::random::GenPhilox, 1234ULL}, + {0.0055, 5003, 32, 5, 1.f, false, true, raft::random::GenTaps, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, false, true, raft::random::GenTaps, 1234ULL}, + {0.0055, 5003, 32, 5, 1.f, false, true, raft::random::GenKiss99, 1234ULL}, + {0.011, 5003, 8, 5, 1.f, false, true, raft::random::GenKiss99, 1234ULL}, +}; + +TEST_P(MakeBlobsTestF, Result) { check(); } +INSTANTIATE_TEST_CASE_P(MakeBlobsTests, MakeBlobsTestF, ::testing::ValuesIn(inputsf_t)); + +typedef MakeBlobsTest MakeBlobsTestD; +const std::vector> inputsd_t = { + {0.0055, 1024, 32, 3, 1.0, true, false, raft::random::GenPhilox, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, true, false, raft::random::GenPhilox, 1234ULL}, + {0.0055, 1024, 32, 3, 1.0, true, false, raft::random::GenTaps, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, true, false, raft::random::GenTaps, 1234ULL}, + {0.0055, 1024, 32, 3, 1.0, true, false, raft::random::GenKiss99, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, true, false, raft::random::GenKiss99, 1234ULL}, + {0.0055, 1024, 32, 3, 1.0, false, false, raft::random::GenPhilox, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, false, false, raft::random::GenPhilox, 1234ULL}, + {0.0055, 1024, 32, 3, 1.0, false, false, raft::random::GenTaps, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, false, false, raft::random::GenTaps, 1234ULL}, + {0.0055, 1024, 32, 3, 1.0, false, false, raft::random::GenKiss99, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, false, false, raft::random::GenKiss99, 1234ULL}, + {0.0055, 1024, 32, 3, 1.0, true, true, raft::random::GenPhilox, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, true, true, raft::random::GenPhilox, 1234ULL}, + {0.0055, 1024, 32, 3, 1.0, true, true, raft::random::GenTaps, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, true, true, raft::random::GenTaps, 1234ULL}, + {0.0055, 1024, 32, 3, 1.0, true, true, raft::random::GenKiss99, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, true, true, raft::random::GenKiss99, 1234ULL}, + {0.0055, 1024, 32, 3, 1.0, false, true, raft::random::GenPhilox, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, false, true, raft::random::GenPhilox, 1234ULL}, + {0.0055, 1024, 32, 3, 1.0, false, true, raft::random::GenTaps, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, false, true, raft::random::GenTaps, 1234ULL}, + {0.0055, 1024, 32, 3, 1.0, false, true, raft::random::GenKiss99, 1234ULL}, + {0.011, 1024, 8, 3, 1.0, false, true, raft::random::GenKiss99, 1234ULL}, + + {0.0055, 5003, 32, 5, 1.0, true, false, raft::random::GenPhilox, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, true, false, raft::random::GenPhilox, 1234ULL}, + {0.0055, 5003, 32, 5, 1.0, true, false, raft::random::GenTaps, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, true, false, raft::random::GenTaps, 1234ULL}, + {0.0055, 5003, 32, 5, 1.0, true, false, raft::random::GenKiss99, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, true, false, raft::random::GenKiss99, 1234ULL}, + {0.0055, 5003, 32, 5, 1.0, false, false, raft::random::GenPhilox, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, false, false, raft::random::GenPhilox, 1234ULL}, + {0.0055, 5003, 32, 5, 1.0, false, false, raft::random::GenTaps, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, false, false, raft::random::GenTaps, 1234ULL}, + {0.0055, 5003, 32, 5, 1.0, false, false, raft::random::GenKiss99, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, false, false, raft::random::GenKiss99, 1234ULL}, + {0.0055, 5003, 32, 5, 1.0, true, true, raft::random::GenPhilox, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, true, true, raft::random::GenPhilox, 1234ULL}, + {0.0055, 5003, 32, 5, 1.0, true, true, raft::random::GenTaps, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, true, true, raft::random::GenTaps, 1234ULL}, + {0.0055, 5003, 32, 5, 1.0, true, true, raft::random::GenKiss99, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, true, true, raft::random::GenKiss99, 1234ULL}, + {0.0055, 5003, 32, 5, 1.0, false, true, raft::random::GenPhilox, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, false, true, raft::random::GenPhilox, 1234ULL}, + {0.0055, 5003, 32, 5, 1.0, false, true, raft::random::GenTaps, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, false, true, raft::random::GenTaps, 1234ULL}, + {0.0055, 5003, 32, 5, 1.0, false, true, raft::random::GenKiss99, 1234ULL}, + {0.011, 5003, 8, 5, 1.0, false, true, raft::random::GenKiss99, 1234ULL}, +}; +TEST_P(MakeBlobsTestD, Result) { check(); } +INSTANTIATE_TEST_CASE_P(MakeBlobsTests, MakeBlobsTestD, ::testing::ValuesIn(inputsd_t)); + +} // end namespace raft::random \ No newline at end of file diff --git a/cpp/test/random/make_regression.cu b/cpp/test/random/make_regression.cu new file mode 100644 index 0000000000..01c3008cd3 --- /dev/null +++ b/cpp/test/random/make_regression.cu @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include + +namespace raft::random { + +template +struct MakeRegressionInputs { + T tolerance; + int n_samples, n_features, n_informative, n_targets, effective_rank; + T bias; + bool shuffle; + raft::random::GeneratorType gtype; + uint64_t seed; +}; + +template +class MakeRegressionTest : public ::testing::TestWithParam> { + public: + MakeRegressionTest() + : params(::testing::TestWithParam>::GetParam()), + stream(handle.get_stream()), + values_ret(params.n_samples * params.n_targets, stream), + values_prod(params.n_samples * params.n_targets, stream) + { + } + + protected: + void SetUp() override + { + // Noise must be zero to compare the actual and expected values + T noise = (T)0.0, tail_strength = (T)0.5; + + rmm::device_uvector data(params.n_samples * params.n_features, stream); + rmm::device_uvector values_cm(params.n_samples * params.n_targets, stream); + rmm::device_uvector coef(params.n_features * params.n_targets, stream); + + // Create the regression problem + make_regression(handle, + data.data(), + values_ret.data(), + params.n_samples, + params.n_features, + params.n_informative, + stream, + coef.data(), + params.n_targets, + params.bias, + params.effective_rank, + tail_strength, + noise, + params.shuffle, + params.seed, + params.gtype); + + // Calculate the values from the data and coefficients (column-major) + T alpha = (T)1.0, beta = (T)0.0; + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_T, + params.n_samples, + params.n_targets, + params.n_features, + &alpha, + data.data(), + params.n_features, + coef.data(), + params.n_targets, + &beta, + values_cm.data(), + params.n_samples, + stream)); + + // Transpose the values to row-major + raft::linalg::transpose( + handle, values_cm.data(), values_prod.data(), params.n_samples, params.n_targets, stream); + + // Add the bias + raft::linalg::addScalar(values_prod.data(), + values_prod.data(), + params.bias, + params.n_samples * params.n_targets, + stream); + + // Count the number of zeroes in the coefficients + thrust::device_ptr __coef = thrust::device_pointer_cast(coef.data()); + zero_count = thrust::count(__coef, __coef + params.n_features * params.n_targets, (T)0.0); + } + + protected: + raft::handle_t handle; + cudaStream_t stream = 0; + + MakeRegressionInputs params; + rmm::device_uvector values_ret, values_prod; + int zero_count; +}; + +typedef MakeRegressionTest MakeRegressionTestF; +const std::vector> inputsf_t = { + {0.01f, 256, 32, 16, 1, -1, 0.f, true, raft::random::GenPhilox, 1234ULL}, + {0.01f, 1000, 100, 47, 4, 65, 4.2f, true, raft::random::GenPhilox, 1234ULL}, + {0.01f, 20000, 500, 450, 13, -1, -3.f, false, raft::random::GenPhilox, 1234ULL}}; + +TEST_P(MakeRegressionTestF, Result) +{ + ASSERT_TRUE(match(params.n_targets * (params.n_features - params.n_informative), + zero_count, + raft::Compare())); + ASSERT_TRUE(devArrMatch(values_ret.data(), + values_prod.data(), + params.n_samples, + params.n_targets, + raft::CompareApprox(params.tolerance), + stream)); +} +INSTANTIATE_TEST_CASE_P(MakeRegressionTests, MakeRegressionTestF, ::testing::ValuesIn(inputsf_t)); + +typedef MakeRegressionTest MakeRegressionTestD; +const std::vector> inputsd_t = { + {0.01, 256, 32, 16, 1, -1, 0.0, true, raft::random::GenPhilox, 1234ULL}, + {0.01, 1000, 100, 47, 4, 65, 4.2, true, raft::random::GenPhilox, 1234ULL}, + {0.01, 20000, 500, 450, 13, -1, -3.0, false, raft::random::GenPhilox, 1234ULL}}; + +TEST_P(MakeRegressionTestD, Result) +{ + ASSERT_TRUE(match(params.n_targets * (params.n_features - params.n_informative), + zero_count, + raft::Compare())); + ASSERT_TRUE(devArrMatch(values_ret.data(), + values_prod.data(), + params.n_samples, + params.n_targets, + raft::CompareApprox(params.tolerance), + stream)); +} +INSTANTIATE_TEST_CASE_P(MakeRegressionTests, MakeRegressionTestD, ::testing::ValuesIn(inputsd_t)); + +} // end namespace raft::random \ No newline at end of file diff --git a/cpp/test/random/multi_variable_gaussian.cu b/cpp/test/random/multi_variable_gaussian.cu new file mode 100644 index 0000000000..daafdbc754 --- /dev/null +++ b/cpp/test/random/multi_variable_gaussian.cu @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include +#include + +// mvg.h takes in matrices that are colomn major (as in fortan) +#define IDX2C(i, j, ld) (j * ld + i) + +namespace raft::random { + +// helper kernels +/// @todo Duplicate called vctwiseAccumulate in utils.h (Kalman Filters, +// i think that is much better to use., more general) +template +__global__ void En_KF_accumulate(const int nPoints, const int dim, const T* X, T* x) +{ + int idx = threadIdx.x + blockDim.x * blockIdx.x; + int col = idx % dim; + int row = idx / dim; + if (col < dim && row < nPoints) raft::myAtomicAdd(x + col, X[idx]); +} + +template +__global__ void En_KF_normalize(const int divider, const int dim, T* x) +{ + int xi = threadIdx.x + blockDim.x * blockIdx.x; + if (xi < dim) x[xi] = x[xi] / divider; +} + +template +__global__ void En_KF_dif(const int nPoints, const int dim, const T* X, const T* x, T* X_diff) +{ + int idx = threadIdx.x + blockDim.x * blockIdx.x; + int col = idx % dim; + int row = idx / dim; + if (col < dim && row < nPoints) X_diff[idx] = X[idx] - x[col]; +} + +// for specialising tests +enum Correlation : unsigned char { + CORRELATED, // = 0 + UNCORRELATED +}; + +template +struct MVGInputs { + T tolerance; + typename multi_variable_gaussian::Decomposer method; + Correlation corr; + int dim, nPoints; + unsigned long long int seed; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const MVGInputs& dims) +{ + return os; +} + +template +class MVGTest : public ::testing::TestWithParam> { + protected: + MVGTest() + : workspace_d(0, handle.get_stream()), + P_d(0, handle.get_stream()), + x_d(0, handle.get_stream()), + X_d(0, handle.get_stream()), + Rand_cov(0, handle.get_stream()), + Rand_mean(0, handle.get_stream()) + { + } + + void SetUp() override + { + // getting params + params = ::testing::TestWithParam>::GetParam(); + dim = params.dim; + nPoints = params.nPoints; + method = params.method; + corr = params.corr; + tolerance = params.tolerance; + + auto cublasH = handle.get_cublas_handle(); + auto cusolverH = handle.get_cusolver_dn_handle(); + auto stream = handle.get_stream(); + + // preparing to store stuff + P.resize(dim * dim); + x.resize(dim); + X.resize(dim * nPoints); + P_d.resize(dim * dim, stream); + X_d.resize(nPoints * dim, stream); + x_d.resize(dim, stream); + Rand_cov.resize(dim * dim, stream); + Rand_mean.resize(dim, stream); + + // generating random mean and cov. + srand(params.seed); + for (int j = 0; j < dim; j++) + x.data()[j] = rand() % 100 + 5.0f; + + // for random Cov. martix + std::default_random_engine generator(params.seed); + std::uniform_real_distribution distribution(0.0, 1.0); + + // P (developing a +ve definite symm matrix) + for (int j = 0; j < dim; j++) { + for (int i = 0; i < j + 1; i++) { + T k = distribution(generator); + if (corr == UNCORRELATED) k = 0.0; + P.data()[IDX2C(i, j, dim)] = k; + P.data()[IDX2C(j, i, dim)] = k; + if (i == j) P.data()[IDX2C(i, j, dim)] += dim; + } + } + + // porting inputs to gpu + raft::update_device(P_d.data(), P.data(), dim * dim, stream); + raft::update_device(x_d.data(), x.data(), dim, stream); + + // initilizing the mvg + mvg = new multi_variable_gaussian(handle, dim, method); + std::size_t o = mvg->get_workspace_size(); + + // give the workspace area to mvg + workspace_d.resize(o, stream); + mvg->set_workspace(workspace_d.data()); + + // get gaussians in X_d | P_d is destroyed. + mvg->give_gaussian(nPoints, P_d.data(), X_d.data(), x_d.data()); + + // saving the mean of the randoms in Rand_mean + //@todo can be swapped with a API that calculates mean + RAFT_CUDA_TRY(cudaMemset(Rand_mean.data(), 0, dim * sizeof(T))); + dim3 block = (64); + dim3 grid = (raft::ceildiv(nPoints * dim, (int)block.x)); + En_KF_accumulate<<>>(nPoints, dim, X_d.data(), Rand_mean.data()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + grid = (raft::ceildiv(dim, (int)block.x)); + En_KF_normalize<<>>(nPoints, dim, Rand_mean.data()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + // storing the error wrt random point mean in X_d + grid = (raft::ceildiv(dim * nPoints, (int)block.x)); + En_KF_dif<<>>(nPoints, dim, X_d.data(), Rand_mean.data(), X_d.data()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + // finding the cov matrix, placing in Rand_cov + T alfa = 1.0 / (nPoints - 1), beta = 0.0; + + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(cublasH, + CUBLAS_OP_N, + CUBLAS_OP_T, + dim, + dim, + nPoints, + &alfa, + X_d.data(), + dim, + X_d.data(), + dim, + &beta, + Rand_cov.data(), + dim, + stream)); + + // restoring cov provided into P_d + raft::update_device(P_d.data(), P.data(), dim * dim, stream); + } + + void TearDown() override + { + // deleting mvg + delete mvg; + } + + protected: + MVGInputs params; + std::vector P, x, X; + rmm::device_uvector workspace_d, P_d, x_d, X_d, Rand_cov, Rand_mean; + int dim, nPoints; + typename multi_variable_gaussian::Decomposer method; + Correlation corr; + multi_variable_gaussian* mvg = NULL; + T tolerance; + raft::handle_t handle; +}; // end of MVGTest class + +///@todo find out the reason that Un-correlated covs are giving problems (in qr) +// Declare your inputs +const std::vector> inputsf = { + {0.3f, + multi_variable_gaussian::Decomposer::chol_decomp, + Correlation::CORRELATED, + 5, + 30000, + 6ULL}, + {0.1f, + multi_variable_gaussian::Decomposer::chol_decomp, + Correlation::UNCORRELATED, + 5, + 30000, + 6ULL}, + {0.25f, + multi_variable_gaussian::Decomposer::jacobi, + Correlation::CORRELATED, + 5, + 30000, + 6ULL}, + {0.1f, + multi_variable_gaussian::Decomposer::jacobi, + Correlation::UNCORRELATED, + 5, + 30000, + 6ULL}, + {0.2f, multi_variable_gaussian::Decomposer::qr, Correlation::CORRELATED, 5, 30000, 6ULL}, + // { 0.2f, multi_variable_gaussian::Decomposer::qr, + // Correlation::UNCORRELATED, 5, 30000, 6ULL} +}; +const std::vector> inputsd = { + {0.25, + multi_variable_gaussian::Decomposer::chol_decomp, + Correlation::CORRELATED, + 10, + 3000000, + 6ULL}, + {0.1, + multi_variable_gaussian::Decomposer::chol_decomp, + Correlation::UNCORRELATED, + 10, + 3000000, + 6ULL}, + {0.25, + multi_variable_gaussian::Decomposer::jacobi, + Correlation::CORRELATED, + 10, + 3000000, + 6ULL}, + {0.1, + multi_variable_gaussian::Decomposer::jacobi, + Correlation::UNCORRELATED, + 10, + 3000000, + 6ULL}, + {0.2, + multi_variable_gaussian::Decomposer::qr, + Correlation::CORRELATED, + 10, + 3000000, + 6ULL}, + // { 0.2, multi_variable_gaussian::Decomposer::qr, + // Correlation::UNCORRELATED, 10, 3000000, 6ULL} +}; + +// make the tests +typedef MVGTest MVGTestF; +typedef MVGTest MVGTestD; +TEST_P(MVGTestF, MeanIsCorrectF) +{ + EXPECT_TRUE(raft::devArrMatch( + x_d.data(), Rand_mean.data(), dim, raft::CompareApprox(tolerance), handle.get_stream())) + << " in MeanIsCorrect"; +} +TEST_P(MVGTestF, CovIsCorrectF) +{ + EXPECT_TRUE(raft::devArrMatch(P_d.data(), + Rand_cov.data(), + dim, + dim, + raft::CompareApprox(tolerance), + handle.get_stream())) + << " in CovIsCorrect"; +} +TEST_P(MVGTestD, MeanIsCorrectD) +{ + EXPECT_TRUE(raft::devArrMatch( + x_d.data(), Rand_mean.data(), dim, raft::CompareApprox(tolerance), handle.get_stream())) + << " in MeanIsCorrect"; +} +TEST_P(MVGTestD, CovIsCorrectD) +{ + EXPECT_TRUE(raft::devArrMatch(P_d.data(), + Rand_cov.data(), + dim, + dim, + raft::CompareApprox(tolerance), + handle.get_stream())) + << " in CovIsCorrect"; +} + +// call the tests +INSTANTIATE_TEST_CASE_P(MVGTests, MVGTestF, ::testing::ValuesIn(inputsf)); +INSTANTIATE_TEST_CASE_P(MVGTests, MVGTestD, ::testing::ValuesIn(inputsd)); + +}; // end of namespace raft::random \ No newline at end of file diff --git a/cpp/test/random/permute.cu b/cpp/test/random/permute.cu new file mode 100644 index 0000000000..294444d409 --- /dev/null +++ b/cpp/test/random/permute.cu @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2018-2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace random { + +template +struct PermInputs { + int N, D; + bool needPerms, needShuffle, rowMajor; + unsigned long long int seed; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const PermInputs& dims) +{ + return os; +} + +template +class PermTest : public ::testing::TestWithParam> { + protected: + PermTest() : in(0, stream), out(0, stream), outPerms(0, stream) {} + + void SetUp() override + { + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + params = ::testing::TestWithParam>::GetParam(); + // forcefully set needPerms, since we need it for unit-testing! + if (params.needShuffle) { params.needPerms = true; } + raft::random::Rng r(params.seed); + int N = params.N; + int D = params.D; + int len = N * D; + cudaStream_t stream = 0; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + if (params.needPerms) { + outPerms.resize(N, stream); + outPerms_ptr = outPerms.data(); + } + if (params.needShuffle) { + in.resize(len, stream); + out.resize(len, stream); + in_ptr = in.data(); + out_ptr = out.data(); + r.uniform(in_ptr, len, T(-1.0), T(1.0), stream); + } + permute(outPerms_ptr, out_ptr, in_ptr, D, N, params.rowMajor, stream); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + } + + void TearDown() override { RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } + + protected: + PermInputs params; + rmm::device_uvector in, out; + T* in_ptr = nullptr; + T* out_ptr = nullptr; + rmm::device_uvector outPerms; + int* outPerms_ptr = nullptr; + cudaStream_t stream = 0; +}; + +template +::testing::AssertionResult devArrMatchRange( + const T* actual, size_t size, T start, L eq_compare, bool doSort = true, cudaStream_t stream = 0) +{ + std::vector act_h(size); + raft::update_host(&(act_h[0]), actual, size, stream); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + if (doSort) std::sort(act_h.begin(), act_h.end()); + for (size_t i(0); i < size; ++i) { + auto act = act_h[i]; + auto expected = start + i; + if (!eq_compare(expected, act)) { + return ::testing::AssertionFailure() + << "actual=" << act << " != expected=" << expected << " @" << i; + } + } + return ::testing::AssertionSuccess(); +} + +template +::testing::AssertionResult devArrMatchShuffle(const int* perms, + const T* out, + const T* in, + int D, + int N, + bool rowMajor, + L eq_compare, + cudaStream_t stream = 0) +{ + std::vector h_perms(N); + raft::update_host(&(h_perms[0]), perms, N, stream); + std::vector h_out(N * D), h_in(N * D); + raft::update_host(&(h_out[0]), out, N * D, stream); + raft::update_host(&(h_in[0]), in, N * D, stream); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + for (int i = 0; i < N; ++i) { + for (int j = 0; j < D; ++j) { + int outPos = rowMajor ? i * D + j : j * N + i; + int inPos = rowMajor ? h_perms[i] * D + j : j * N + h_perms[i]; + auto act = h_out[outPos]; + auto expected = h_in[inPos]; + if (!eq_compare(expected, act)) { + return ::testing::AssertionFailure() + << "actual=" << act << " != expected=" << expected << " @" << i << ", " << j; + } + } + } + return ::testing::AssertionSuccess(); +} + +const std::vector> inputsf = { + // only generate permutations + {32, 8, true, false, true, 1234ULL}, + {32, 8, true, false, true, 1234567890ULL}, + {1024, 32, true, false, true, 1234ULL}, + {1024, 32, true, false, true, 1234567890ULL}, + {2 * 1024, 32, true, false, true, 1234ULL}, + {2 * 1024, 32, true, false, true, 1234567890ULL}, + {2 * 1024 + 500, 32, true, false, true, 1234ULL}, + {2 * 1024 + 500, 32, true, false, true, 1234567890ULL}, + {100000, 32, true, false, true, 1234ULL}, + {100000, 32, true, false, true, 1234567890ULL}, + {100001, 33, true, false, true, 1234567890ULL}, + // permute and shuffle the data row major + {32, 8, true, true, true, 1234ULL}, + {32, 8, true, true, true, 1234567890ULL}, + {1024, 32, true, true, true, 1234ULL}, + {1024, 32, true, true, true, 1234567890ULL}, + {2 * 1024, 32, true, true, true, 1234ULL}, + {2 * 1024, 32, true, true, true, 1234567890ULL}, + {2 * 1024 + 500, 32, true, true, true, 1234ULL}, + {2 * 1024 + 500, 32, true, true, true, 1234567890ULL}, + {100000, 32, true, true, true, 1234ULL}, + {100000, 32, true, true, true, 1234567890ULL}, + {100001, 31, true, true, true, 1234567890ULL}, + // permute and shuffle the data column major + {32, 8, true, true, false, 1234ULL}, + {32, 8, true, true, false, 1234567890ULL}, + {1024, 32, true, true, false, 1234ULL}, + {1024, 32, true, true, false, 1234567890ULL}, + {2 * 1024, 32, true, true, false, 1234ULL}, + {2 * 1024, 32, true, true, false, 1234567890ULL}, + {2 * 1024 + 500, 32, true, true, false, 1234ULL}, + {2 * 1024 + 500, 32, true, true, false, 1234567890ULL}, + {100000, 32, true, true, false, 1234ULL}, + {100000, 32, true, true, false, 1234567890ULL}, + {100001, 33, true, true, false, 1234567890ULL}}; + +typedef PermTest PermTestF; +TEST_P(PermTestF, Result) +{ + if (params.needPerms) { + ASSERT_TRUE(devArrMatchRange(outPerms_ptr, params.N, 0, raft::Compare())); + } + if (params.needShuffle) { + ASSERT_TRUE(devArrMatchShuffle( + outPerms_ptr, out_ptr, in_ptr, params.D, params.N, params.rowMajor, raft::Compare())); + } +} +INSTANTIATE_TEST_CASE_P(PermTests, PermTestF, ::testing::ValuesIn(inputsf)); + +const std::vector> inputsd = { + // only generate permutations + {32, 8, true, false, true, 1234ULL}, + {32, 8, true, false, true, 1234567890ULL}, + {1024, 32, true, false, true, 1234ULL}, + {1024, 32, true, false, true, 1234567890ULL}, + {2 * 1024, 32, true, false, true, 1234ULL}, + {2 * 1024, 32, true, false, true, 1234567890ULL}, + {2 * 1024 + 500, 32, true, false, true, 1234ULL}, + {2 * 1024 + 500, 32, true, false, true, 1234567890ULL}, + {100000, 32, true, false, true, 1234ULL}, + {100000, 32, true, false, true, 1234567890ULL}, + {100001, 33, true, false, true, 1234567890ULL}, + // permute and shuffle the data row major + {32, 8, true, true, true, 1234ULL}, + {32, 8, true, true, true, 1234567890ULL}, + {1024, 32, true, true, true, 1234ULL}, + {1024, 32, true, true, true, 1234567890ULL}, + {2 * 1024, 32, true, true, true, 1234ULL}, + {2 * 1024, 32, true, true, true, 1234567890ULL}, + {2 * 1024 + 500, 32, true, true, true, 1234ULL}, + {2 * 1024 + 500, 32, true, true, true, 1234567890ULL}, + {100000, 32, true, true, true, 1234ULL}, + {100000, 32, true, true, true, 1234567890ULL}, + {100001, 31, true, true, true, 1234567890ULL}, + // permute and shuffle the data column major + {32, 8, true, true, false, 1234ULL}, + {32, 8, true, true, false, 1234567890ULL}, + {1024, 32, true, true, false, 1234ULL}, + {1024, 32, true, true, false, 1234567890ULL}, + {2 * 1024, 32, true, true, false, 1234ULL}, + {2 * 1024, 32, true, true, false, 1234567890ULL}, + {2 * 1024 + 500, 32, true, true, false, 1234ULL}, + {2 * 1024 + 500, 32, true, true, false, 1234567890ULL}, + {100000, 32, true, true, false, 1234ULL}, + {100000, 32, true, true, false, 1234567890ULL}, + {100001, 33, true, true, false, 1234567890ULL}}; +typedef PermTest PermTestD; +TEST_P(PermTestD, Result) +{ + if (params.needPerms) { + ASSERT_TRUE(devArrMatchRange(outPerms_ptr, params.N, 0, raft::Compare())); + } + if (params.needShuffle) { + ASSERT_TRUE(devArrMatchShuffle( + outPerms_ptr, out_ptr, in_ptr, params.D, params.N, params.rowMajor, raft::Compare())); + } +} +INSTANTIATE_TEST_CASE_P(PermTests, PermTestD, ::testing::ValuesIn(inputsd)); + +} // end namespace random +} // end namespace raft \ No newline at end of file diff --git a/cpp/test/sparse/filter.cu b/cpp/test/sparse/filter.cu index 210eefa850..5a389b8c87 100644 --- a/cpp/test/sparse/filter.cu +++ b/cpp/test/sparse/filter.cu @@ -96,7 +96,7 @@ TEST_P(COORemoveZeros, Result) raft::update_device(out_ref.vals(), out_vals_ref_h, 2, stream); op::coo_remove_zeros(&in, &out, stream); - CUDA_CHECK(cudaStreamSynchronize(stream)); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); ASSERT_TRUE(raft::devArrMatch(out_ref.rows(), out.rows(), 2, raft::Compare())); ASSERT_TRUE(raft::devArrMatch(out_ref.cols(), out.cols(), 2, raft::Compare())); diff --git a/cpp/test/sparse/norm.cu b/cpp/test/sparse/norm.cu index 8d61ca06a8..ac5443d43b 100644 --- a/cpp/test/sparse/norm.cu +++ b/cpp/test/sparse/norm.cu @@ -73,7 +73,7 @@ class CSRRowNormalizeTest : public ::testing::TestWithParam(verify.data(), result.data(), nnz, raft::Compare())); diff --git a/cpp/test/sparse/reduce.cu b/cpp/test/sparse/reduce.cu index b550857797..edf7432c49 100644 --- a/cpp/test/sparse/reduce.cu +++ b/cpp/test/sparse/reduce.cu @@ -78,7 +78,7 @@ class SparseReduceTest : public ::testing::TestWithParam( out_rows.data(), out.rows(), out.nnz, raft::Compare())); ASSERT_TRUE(raft::devArrMatch(