From ab65a7c434f41ab992bb3bc81d3dbda65e29a87f Mon Sep 17 00:00:00 2001 From: Mark Hoemmen Date: Wed, 7 Sep 2022 12:09:26 -0700 Subject: [PATCH] mdspan-ify make_regression Add an overload of make_regression that takes mdspan, instead of raw pointers. The overload does not increase generality (e.g., it still requires row-major mdspan). Part of #535. --- cpp/include/raft/random/make_regression.cuh | 90 ++++++++++++++- cpp/test/random/make_regression.cu | 116 ++++++++++++++++++++ 2 files changed, 204 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/random/make_regression.cuh b/cpp/include/raft/random/make_regression.cuh index 4fbb48fa35..9d5e918adc 100644 --- a/cpp/include/raft/random/make_regression.cuh +++ b/cpp/include/raft/random/make_regression.cuh @@ -24,6 +24,8 @@ #pragma once #include +#include +#include #include "detail/make_regression.cuh" @@ -58,7 +60,7 @@ namespace raft::random { * @param[in] tail_strength The relative importance of the fat noisy tail * of the singular values profile if * effective_rank is not -1 - * @param[in] noise Standard deviation of the gaussian noise + * @param[in] noise Standard deviation of the Gaussian noise * applied to the output * @param[in] shuffle Shuffle the samples and the features * @param[in] seed Seed for the random number generator @@ -100,6 +102,90 @@ void make_regression(const raft::handle_t& handle, type); } +/** + * @brief GPU-equivalent of sklearn.datasets.make_regression as documented at: + * https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_regression.html + * + * @tparam DataT Scalar type + * @tparam IdxT Index type + * + * @param[in] handle RAFT handle + * @param[out] out Row-major (samples, features) matrix to store + * the problem data + * @param[out] values Row-major (samples, targets) matrix to store + * the values for the regression problem + * @param[in] n_informative Number of informative features (non-zero + * coefficients) + * @param[in] stream CUDA stream + * @param[out] coef If present, a row-major (features, targets) matrix + * to store the coefficients used to generate the values + * for the regression problem + * @param[in] bias A scalar that will be added to the values + * @param[in] effective_rank The approximate rank of the data matrix (used + * to create correlations in the data). -1 is the + * code to use well-conditioned data + * @param[in] tail_strength The relative importance of the fat noisy tail + * of the singular values profile if + * effective_rank is not -1 + * @param[in] noise Standard deviation of the Gaussian noise + * applied to the output + * @param[in] shuffle Shuffle the samples and the features + * @param[in] seed Seed for the random number generator + * @param[in] type Random generator type + */ +template +void make_regression(const raft::handle_t& handle, + raft::device_matrix_view, + raft::row_major> out, + raft::device_matrix_view, + raft::row_major> values, + IdxT n_informative, + cudaStream_t stream, + std::optional< + raft::device_matrix_view, + raft::row_major>> coef, + DataT bias = DataT{}, + IdxT effective_rank = static_cast(-1), + DataT tail_strength = DataT{0.5}, + DataT noise = DataT{}, + bool shuffle = true, + uint64_t seed = 0ULL, + GeneratorType type = GenPhilox) +{ + const auto n_samples = out.extent(0); + assert(values.extent(0) == n_samples); + const auto n_features = out.extent(1); + const auto n_targets = values.extent(1); + + const bool have_coef = coef.has_value(); + if(have_coef) { + const auto coef_ref = *coef; + assert(coef_ref.extent(0) == n_features); + assert(coef_ref.extent(1) == n_targets); + } + DataT* coef_ptr = have_coef ? (*coef).data_handle() : nullptr; + + detail::make_regression_caller(handle, + out.data_handle(), + values.data_handle(), + n_samples, + n_features, + n_informative, + stream, + coef_ptr, + n_targets, + bias, + effective_rank, + tail_strength, + noise, + shuffle, + seed, + type); +} + } // namespace raft::random -#endif \ No newline at end of file +#endif diff --git a/cpp/test/random/make_regression.cu b/cpp/test/random/make_regression.cu index 32c156f34f..691deb1bbe 100644 --- a/cpp/test/random/make_regression.cu +++ b/cpp/test/random/make_regression.cu @@ -78,6 +78,8 @@ class MakeRegressionTest : public ::testing::TestWithParam +class MakeRegressionMdspanTest : public ::testing::TestWithParam> { +public: + MakeRegressionMdspanTest() = default; + +protected: + void SetUp() override + { + // Noise must be zero to compare the actual and expected values + T noise = (T)0.0, tail_strength = (T)0.5; + + rmm::device_uvector data(params.n_samples * params.n_features, stream); + rmm::device_uvector values_cm(params.n_samples * params.n_targets, stream); + rmm::device_uvector coef(params.n_features * params.n_targets, stream); + + using index_type = typename rmm::device_uvector::index_type; + using matrix_view = raft::device_matrix_view, raft::row_major>; + matrix_view out_mat(data.data(), params.n_samples, params.n_features); + matrix_view values_mat(values_ret.data(), params.n_samples, params.n_targets); + matrix_view coef_mat(coef.data(), params.n_features, params.n_targets); + + // Create the regression problem + make_regression(handle, out_mat, values_mat, + params.n_informative, + stream, + coef_mat, + params.bias, + params.effective_rank, + tail_strength, + noise, + params.shuffle, + params.seed, + params.gtype); + + // FIXME (mfh 2022/09/07) This test passes even if I don't call make_regression. + + // Calculate the values from the data and coefficients (column-major) + T alpha{}; + T beta{}; + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemm(handle.get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_T, + params.n_samples, + params.n_targets, + params.n_features, + &alpha, + data.data(), + params.n_features, + coef.data(), + params.n_targets, + &beta, + values_cm.data(), + params.n_samples, + stream)); + + // Transpose the values to row-major + raft::linalg::transpose( + handle, values_cm.data(), values_prod.data(), params.n_samples, params.n_targets, stream); + + // Add the bias + raft::linalg::addScalar(values_prod.data(), + values_prod.data(), + params.bias, + params.n_samples * params.n_targets, + stream); + + // Count the number of zeroes in the coefficients + thrust::device_ptr __coef = thrust::device_pointer_cast(coef.data()); + constexpr T ZERO{}; + zero_count = thrust::count(__coef, __coef + params.n_features * params.n_targets, ZERO); + } + +private: + MakeRegressionInputs params{ + ::testing::TestWithParam>::GetParam()}; + raft::handle_t handle; + cudaStream_t stream{handle.get_stream()}; + rmm::device_uvector values_ret{params.n_samples * params.n_targets, stream}; + rmm::device_uvector values_prod{params.n_samples * params.n_targets, stream}; + int zero_count = -1; +}; + +using MakeRegressionMdspanTestF = MakeRegressionTest; + +TEST_P(MakeRegressionMdspanTestF, Result) +{ + ASSERT_TRUE(match(params.n_targets * (params.n_features - params.n_informative), + zero_count, + raft::Compare())); + ASSERT_TRUE(devArrMatch(values_ret.data(), + values_prod.data(), + params.n_samples, + params.n_targets, + raft::CompareApprox(params.tolerance), + stream)); +} +INSTANTIATE_TEST_CASE_P(MakeRegressionMdspanTests, MakeRegressionMdspanTestF, ::testing::ValuesIn(inputsf_t)); + +using MakeRegressionMdspanTestD = MakeRegressionTest; + +TEST_P(MakeRegressionMdspanTestD, Result) +{ + ASSERT_TRUE(match(params.n_targets * (params.n_features - params.n_informative), + zero_count, + raft::Compare())); + ASSERT_TRUE(devArrMatch(values_ret.data(), + values_prod.data(), + params.n_samples, + params.n_targets, + raft::CompareApprox(params.tolerance), + stream)); +} +INSTANTIATE_TEST_CASE_P(MakeRegressionMdspanTests, MakeRegressionMdspanTestD, ::testing::ValuesIn(inputsd_t)); + } // end namespace raft::random