Skip to content

Commit

Permalink
add doxygen
Browse files Browse the repository at this point in the history
  • Loading branch information
lijinf2 committed Jul 5, 2023
1 parent 9720eb1 commit cd22d55
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 72 deletions.
33 changes: 15 additions & 18 deletions cpp/include/cuml/linear_model/qn_mg.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,33 +27,30 @@ namespace ML {
namespace GLM {
namespace opg {

/**
* @brief performs MNMG fit operation for the logistic regression using quasi newton methods
* @param[in] handle: the internal cuml handle object
* @param[in] input_data: vector holding all partitions for that rank
* @param[in] input_desc: PartDescriptor object for the input
* @param[in] labels: labels data
* @param[out] coef: learned coefficients
* @param[in] params: model parameters
* @param[in] X_col_major: true if X is stored column-major
* @param[in] n_classes: number of outputs (number of classes or `1` for regression)
* @param[out] f: host pointer holding the final objective value
* @param[out] num_iters: host pointer holding the actual number of iterations taken
*/
void qnFit(raft::handle_t& handle,
std::vector<Matrix::Data<float>*>& input_data,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels,
float* coef,
const qn_params& pams,
bool X_col_major,
int n_classes,
float* f,
int* num_iters);

/*
void qnFit(const raft::handle_t &handle,
const qn_params& pams,
float* X,
bool X_col_major,
float *y,
int N,
int D,
int C,
float* w0,
float* f,
int* num_iters,
int n_samples,
int rank,
int n_ranks);
*/

}; // namespace opg
}; // namespace GLM
}; // namespace ML
19 changes: 16 additions & 3 deletions cpp/src/glm/qn/glm_base_mg.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -74,7 +74,21 @@ inline void linearBwdMG(const raft::handle_t& handle,

template <typename T, class GLMObjective>
struct GLMWithDataMG : ML::GLM::detail::GLMWithData<T, GLMObjective> {

/**
* @brief Aggregates local gradient vectors and loss values from local training data. This
* class is the multi-node-multi-gpu version of GLMWithData.
*
* The implementation overrides existing GLMWithData::() function. The purpose is to
* aggregate local gradient vectors and loss values from distributed X, y, where X represents the input vectors
* and y represents labels.
*
* GLMWithData::() currently invokes three functions: linearFwd, getLossAndDz and linearBwd.
* linearFwd multiplies local input vectors with the coefficient vector (i.e. coef_), so does not require communication.
* getLossAndDz calculates local loss so requires allreduce to obtain a global loss.
* linearBwd calculates local gradient vector so requires allreduce to obtain a global gradient vector.
* The global loss and the global gradient vector will be used my min_lbfgs to update coefficient.
* The update runs individually on every GPU and when finished, all GPUs have the same value of coefficient.
*/
const raft::handle_t* handle_p;
int rank;
int64_t n_samples;
Expand Down Expand Up @@ -108,7 +122,6 @@ struct GLMWithDataMG : ML::GLM::detail::GLMWithData<T, GLMObjective> {
raft::update_host(&reg_host, dev_scalar, 1, stream);
// note: avoid syncing here because there's a sync before reg_host is used.


// apply linearFwd, getLossAndDz, linearBwd
ML::GLM::detail::linearFwd(lossFunc->handle, *(this->Z), *(this->X), W); // linear part: forward pass

Expand Down
53 changes: 6 additions & 47 deletions cpp/src/glm/qn_mg.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -36,45 +36,6 @@ namespace ML {
namespace GLM {
namespace opg {

/*
void toy(const raft::handle_t &handle,
std::vector<Matrix::Data<float>*>& input_data,
Matrix::PartDescriptor& input_desc,
std::vector<Matrix::Data<float>*>& labels,
float* coef)
{
ASSERT(input_data.size() == 1, "qn_mg.cu currently does not accept more than one input matrix");
ASSERT(labels.size() == input_data.size(), "labels size does not input_data size ");
cudaStream_t stream = raft::resource::get_cuda_stream(handle);
std::cout << "entered toy:input_data.size() " << input_data.size() << std::endl;
auto X = input_data[0];
std::cout << "X.numElements: " << X->numElements() << ", totalSize: " << X->totalSize << std::endl;
std::cout << "X: " << raft::arr2Str(X->ptr, 4, "first X", stream).c_str() << std::endl;
auto y = labels[0];
std::cout << "y.numElements: " << y->numElements() << ", totalSize: " << y->totalSize << std::endl;
std::cout << "first_y.ptr: " << raft::arr2Str(y->ptr, 2, "first_y ", stream).c_str() << std::endl;
int N = input_desc.M;
int D = input_desc.N;
int rank = input_desc.rank;
int n_ranks = input_desc.partsToRanks.size();
size_t n_samples = 0;
for (auto p : input_desc.partsToRanks) {
n_samples += p->size;
}
std::cout << "report: N: " << N << ", D: " << D << ", rank: " << rank << ", n_ranks " << n_ranks << ", n_samples " << n_samples << std::endl;
}
*/

template<typename T>
void qnFit_impl(const raft::handle_t &handle,
const qn_params& pams,
Expand Down Expand Up @@ -105,11 +66,6 @@ void qnFit_impl(const raft::handle_t &handle,
auto y_simple = SimpleVec<T>(y, N);
SimpleVec<T> coef_simple(w0, D + pams.fit_intercept);

//std::cout << "rank " << rank << ", N " << N << ", D " << D << ", n_samples " << n_samples << std::endl;
//std::cout << "ranl " << rank << raft::arr2Str(X_simple.data, N * D, "X_simple", stream) << std::endl;
//std::cout << "ranl " << rank << raft::arr2Str(y_simple.data, N, "y_simple", stream) << std::endl;
//std::cout << "ranl " << rank << raft::arr2Str(coef_simple.data, D + pams.fit_intercept, "y_simple", stream) << std::endl;

ML::GLM::detail::LBFGSParam<T> opt_param(pams);

// prepare regularizer regularizer_obj
Expand Down Expand Up @@ -147,6 +103,7 @@ void qnFit_impl(raft::handle_t& handle,
T* coef,
const qn_params& pams,
bool X_col_major,
int n_classes,
T* f,
int* num_iters)
{
Expand All @@ -169,7 +126,7 @@ void qnFit_impl(raft::handle_t& handle,
data_y->ptr,
input_desc.totalElementsOwnedBy(input_desc.rank),
input_desc.N,
2, // TODO: support multiple classes
n_classes,
coef,
f,
num_iters,
Expand All @@ -185,17 +142,19 @@ void qnFit(raft::handle_t& handle,
float* coef,
const qn_params& pams,
bool X_col_major,
int n_classes,
float* f,
int* num_iters)
{
qnFit_impl(
qnFit_impl<float>(
handle,
input_data,
input_desc,
labels,
coef,
pams,
X_col_major,
n_classes,
f,
num_iters
);
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/dask/linear_model/logistic_regression.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
6 changes: 4 additions & 2 deletions python/cuml/linear_model/logistic_regression_mg.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -79,6 +79,7 @@ cdef extern from "cuml/linear_model/qn_mg.hpp" namespace "ML::GLM::opg" nogil:
float *coef,
const qn_params& pams,
bool X_col_major,
int n_classes,
float *f,
int *num_iters) except +

Expand Down Expand Up @@ -178,7 +179,7 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):
cdef int num_iters


self._num_classes = 2
self._num_classes = 2 # TODO: calculate _num_classes at runtime
self.prepare_for_fit(self._num_classes)
cdef uintptr_t mat_coef_ptr = self.coef_.ptr

Expand All @@ -192,6 +193,7 @@ class LogisticRegressionMG(MGFitMixin, LogisticRegression):
<float*>mat_coef_ptr,
qnpams,
self.is_col_major,
self._num_classes,
<float*> &objective32,
<int*> &num_iters)

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/tests/dask/test_dask_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def imp():
@pytest.mark.mg
@pytest.mark.parametrize("nrows", [1e5])
@pytest.mark.parametrize("ncols", [20])
@pytest.mark.parametrize("n_parts", [2])
@pytest.mark.parametrize("n_parts", [2, 23])
@pytest.mark.parametrize("datatype", [np.float32])
def test_lbfgs(
nrows, ncols, n_parts, datatype, client
Expand Down

0 comments on commit cd22d55

Please sign in to comment.