Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Moving MNMG decomp to cuml #2427

Merged
merged 21 commits into from
Jul 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
- PR #2340: Import ARIMA in the root init file and fix the `test_fit_function` test
- PR #2408: Install meta packages for dependencies
- PR #2417: Move doc customization scripts to Jenkins
- PR #2427: Moving MNMG decomposition to cuml
- PR #2433: Add libcumlprims_mg to CMake
- PR #2420: Add and set convert_dtype default to True in estimator fit methods
- PR #2411: Refactor Mixin classes and use in classifier/regressor estimators
Expand Down
8 changes: 4 additions & 4 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ if hasArg --singlegpu; then
SINGLEGPU_PYTHON_FLAG="--singlegpu"
SINGLEGPU_CPP_FLAG=ON
fi
if hasArg mgtests; then
if hasArg cpp-mgtests; then
BUILD_CPP_MG_TESTS=ON
fi
if hasArg --nvtx; then
Expand Down Expand Up @@ -149,7 +149,7 @@ fi

################################################################################
# Configure for building all C++ targets
if completeBuild || hasArg libcuml || hasArg prims || hasArg bench || hasArg prims-bench || hasArg cppdocs; then
if completeBuild || hasArg libcuml || hasArg prims || hasArg bench || hasArg prims-bench || hasArg cppdocs || hasArg cpp-mgtests; then
if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
GPU_ARCH=""
echo "Building for the architecture of the GPU in the system..."
Expand Down Expand Up @@ -185,7 +185,7 @@ MAKE_TARGETS=
if hasArg libcuml; then
MAKE_TARGETS="${MAKE_TARGETS}cuml++ cuml ml"
fi
if hasArg mgtests; then
if hasArg cpp-mgtests; then
MAKE_TARGETS="${MAKE_TARGETS} ml_mg"
fi
if hasArg prims; then
Expand All @@ -199,7 +199,7 @@ if hasArg prims-bench; then
fi

# If `./build.sh cuml` is called, don't build C/C++ components
if completeBuild || hasArg libcuml || hasArg prims || hasArg bench; then
if completeBuild || hasArg libcuml || hasArg prims || hasArg bench || hasArg cpp-mgtests; then
# If there are no targets specified when calling build.sh, it will
# just call `make -j`. This avoids a lot of extra printing
cd ${LIBCUML_BUILD_DIR}
Expand Down
59 changes: 38 additions & 21 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ option(BUILD_CUML_CPP_LIBRARY "Build libcuml++ shared library" ON)

option(BUILD_CUML_TESTS "Build cuML algorithm tests" ON)

option(BUILD_CUML_MG_TESTS "Build cuML multigpu algorithm tests" ON)
option(BUILD_CUML_MG_TESTS "Build cuML multigpu algorithm tests" OFF)

option(BUILD_PRIMS_TESTS "Build ml-prim tests" ON)

Expand Down Expand Up @@ -156,10 +156,10 @@ if(SINGLEGPU)
set(WITH_UCX OFF)
endif(SINGLEGPU)

if(NOT BUILD_CUML_MPI_COMMS AND NOT SINGLEGPU)
message(STATUS "Detected BUILD_CUML_MPI_COMMS set to OFF. Disabling BUILD_CUML_MG_TESTS")
set(BUILD_CUML_MG_TESTS OFF)
endif(NOT BUILD_CUML_MPI_COMMS AND NOT SINGLEGPU)
if(BUILD_CUML_MG_TESTS AND NOT SINGLEGPU)
message(STATUS "Detected BUILD_CUML_MG_TESTS set to ON. Enabling BUILD_CUML_MPI_COMMS")
set(BUILD_CUML_MPI_COMMS ON)
endif(BUILD_CUML_MG_TESTS AND NOT SINGLEGPU)

##############################################################################
# - Requirements -------------------------------------------------------------
Expand Down Expand Up @@ -295,7 +295,6 @@ include(cmake/Dependencies.cmake)

set(CUML_INCLUDE_DIRECTORIES
${CUML_INCLUDE_DIR}
${CUMLPRIMS_MG_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/src
${CMAKE_CURRENT_SOURCE_DIR}/src_prims
${CMAKE_CURRENT_SOURCE_DIR}/test/prims
Expand All @@ -304,16 +303,33 @@ set(CUML_INCLUDE_DIRECTORIES
${CUTLASS_DIR}/src/cutlass
${CUB_DIR}/src/cub
${SPDLOG_DIR}/src/spdlog/include
${RAFT_DIR}/cpp/include)
${RAFT_DIR}/cpp/include
)

set(CUML_LINK_LIBRARIES
set(CUML_PUBLIC_LINK_LIBRARIES
${CUDA_cublas_LIBRARY}
${CUDA_curand_LIBRARY}
${CUDA_cusolver_LIBRARY}
${CUDA_CUDART_LIBRARY}
${CUDA_cusparse_LIBRARY}
${CUDA_nvgraph_LIBRARY}
${CUMLPRIMS_MG_LIBRARIES})
)

set(CUML_PRIVATE_LINK_LIBRARIES
${Protobuf_LIBRARIES}
faisslib
treelite::treelite
treelite::treelite_runtime
)

if(ENABLE_CUMLPRIMS_MG)
list(APPEND CUML_INCLUDE_DIRECTORIES
${CUMLPRIMS_MG_INCLUDE_DIRS})

list(APPEND CUML_PRIVATE_LINK_LIBRARIES
CUMLPRIMS_MG::CUMLPRIMS_MG)

endif(ENABLE_CUMLPRIMS_MG)

##############################################################################
# - build libcuml++ shared library -------------------------------------------
Expand Down Expand Up @@ -359,18 +375,21 @@ if(BUILD_CUML_CPP_LIBRARY)

# mnmg components

# if(NOT SINGLEGPU)
# target_sources(${CUML_CPP_TARGET}
# PRIVATE src/kmeans/kmeans_mg.cu
# )
# endif(NOT SINGLEGPU)
if(NOT SINGLEGPU)
target_sources(${CUML_CPP_TARGET}
PRIVATE
src/pca/pca_mg.cu
src/pca/sign_flip_mg.cu
src/tsvd/tsvd_mg.cu
)
endif(NOT SINGLEGPU)

if(OPENMP_FOUND)
set(CUML_LINK_LIBRARIES ${CUML_LINK_LIBRARIES} ${OpenMP_CXX_LIB_NAMES} Threads::Threads)
set(CUML_PUBLIC_LINK_LIBRARIES ${CUML_PUBLIC_LINK_LIBRARIES} ${OpenMP_CXX_LIB_NAMES} Threads::Threads)
endif(OPENMP_FOUND)

if(NVTX)
set(CUML_LINK_LIBRARIES ${CUML_LINK_LIBRARIES} nvToolsExt)
set(CUML_PUBLIC_LINK_LIBRARIES ${CUML_PUBLIC_LINK_LIBRARIES} nvToolsExt)
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
endif(NVTX)

Expand All @@ -379,12 +398,10 @@ if(BUILD_CUML_CPP_LIBRARY)

target_link_libraries(${CUML_CPP_TARGET}
PUBLIC
${CUML_LINK_LIBRARIES}
${CUML_PUBLIC_LINK_LIBRARIES}
PRIVATE
${Protobuf_LIBRARIES}
faisslib
treelite::treelite
treelite::treelite_runtime)
${CUML_PRIVATE_LINK_LIBRARIES}
)
# If we export the libdmlc symbols, they can lead to weird crashes with other
# libraries that use libdmlc. This just hides the symbols internally.
target_link_options(${CUML_CPP_TARGET} PRIVATE "-Wl,--exclude-libs,libdmlc.a")
Expand Down
4 changes: 2 additions & 2 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ Current cmake offers the following configuration options:
| --- | --- | --- | --- |
| BUILD_CUML_CPP_LIBRARY | [ON, OFF] | ON | Enable/disable building libcuml++ shared library. Setting this variable to `OFF` sets the variables BUILD_CUML_TESTS, BUILD_CUML_MG_TESTS and BUILD_CUML_EXAMPLES to `OFF` |
| BUILD_CUML_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `ml_test`. |
| BUILD_CUML_MG_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `ml_mg_test`. Requires MPI installed and turning BUILD_CUML_MPI_COMMS to ON. |
| BUILD_CUML_MG_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `ml_mg_test`. Requires MPI to be installed. When enabled, BUILD_CUML_MPI_COMMS will be automatically set to ON. |
| BUILD_PRIMS_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `prims_test`. |
| BUILD_CUML_STD_COMMS | [ON, OFF] | ON | Enable/disable building cuML NCCL+UCX communicator for running multi-node multi-GPU algorithms. Note that UCX support can also be enabled/disabled (see below). The standard communicator and MPI communicator are not mutually exclusive and can both be installed at the same time. |
| WITH_UCX | [ON, OFF] | OFF | Enable/disable UCX support in the standard cuML communicator. Algorithms requiring point-to-point messaging will not work when this is disabled. This flag is ignored if BUILD_CUML_STD_COMMS is set to OFF. |
| BUILD_CUML_MPI_COMMS | [ON, OFF] | OFF | Enable/disable building cuML MPI+NCCL communicator for running multi-node multi-GPU C++ tests. MPI communicator and STD communicator are not mutually exclusive and can both be installed at the same time. If OFF, it overrides BUILD_CUML_MG_TESTS to be OFF as well. |
| BUILD_CUML_MPI_COMMS | [ON, OFF] | OFF | Enable/disable building cuML MPI+NCCL communicator for running multi-node multi-GPU C++ tests. MPI communicator and STD communicator may both be installed at the same time. If OFF, it overrides BUILD_CUML_MG_TESTS to be OFF as well. |
| SINGLEGPU | [ON, OFF] | OFF | Disable all mnmg components. Disables building of all multi-GPU algorithms and all comms library components. Removes libcumlprims, UCX-py and NCCL dependencies. Overrides values of BUILD_CUML_MG_TESTS, BUILD_CUML_STD_COMMS, WITH_UCX and BUILD_CUML_MPI_COMMS. |
| BUILD_CUML_EXAMPLES | [ON, OFF] | ON | Enable/disable building cuML C++ API usage examples. |
| BUILD_CUML_BENCH | [ON, OFF] | ON | Enable/disable building oc cuML C++ benchark. |
Expand Down
150 changes: 150 additions & 0 deletions cpp/include/cuml/decomposition/pca_mg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <opg/matrix/data.hpp>
#include <opg/matrix/part_descriptor.hpp>
#include "pca.hpp"

#include <common/cumlHandle.hpp>

namespace ML {

enum class mg_solver { COV_EIG_DQ, COV_EIG_JACOBI, QR };

typedef paramsTSVDTemplate<mg_solver> paramsTSVDMG;

typedef paramsPCATemplate<mg_solver> paramsPCAMG;

namespace PCA {
namespace opg {

/**
* @brief performs MNMG fit operation for the pca
* @input param handle: the internal cuml handle object
* @input param rank_sizes: includes all the partition size information for the rank
* @input param n_parts: number of partitions
* @input param input: input data
* @input param components: principal components of the input data
* @output param explained_var: explained var
* @output param explained_var_ratio: the explained var ratio
* @output param singular_vals: singular values of the data
* @output param mu: mean of every column in input
* @output param noise_vars: variance of the noise
* @input param prms: data structure that includes all the parameters from input size to algorithm
* @input param verbose
*/
void fit(cumlHandle &handle,
std::vector<MLCommon::Matrix::Data<float> *> &input_data,
MLCommon::Matrix::PartDescriptor &input_desc, float *components,
float *explained_var, float *explained_var_ratio, float *singular_vals,
float *mu, float *noise_vars, paramsPCAMG prms, bool verbose = false);

void fit(cumlHandle &handle,
std::vector<MLCommon::Matrix::Data<double> *> &input_data,
MLCommon::Matrix::PartDescriptor &input_desc, double *components,
double *explained_var, double *explained_var_ratio,
double *singular_vals, double *mu, double *noise_vars,
paramsPCAMG prms, bool verbose = false);

/**
* @brief performs MNMG fit and transform operation for the pca
* @input param handle: the internal cuml handle object
* @input param rank_sizes: includes all the partition size information for the rank
* @input param n_parts: number of partitions
* @input param input: input data
* @output param trans_input: transformed input data
* @output param components: principal components of the input data
* @output param explained_var: explained var
* @output param explained_var_ratio: the explained var ratio
* @output param singular_vals: singular values of the data
* @output param mu: mean of every column in input
* @output param noise_vars: variance of the noise
* @input param prms: data structure that includes all the parameters from input size to algorithm
* @input param verbose
*/
void fit_transform(cumlHandle &handle,
MLCommon::Matrix::RankSizePair **rank_sizes, size_t n_parts,
MLCommon::Matrix::floatData_t **input,
MLCommon::Matrix::floatData_t **trans_input,
float *components, float *explained_var,
float *explained_var_ratio, float *singular_vals, float *mu,
float *noise_vars, paramsPCAMG prms, bool verbose);

void fit_transform(cumlHandle &handle,
MLCommon::Matrix::RankSizePair **rank_sizes, size_t n_parts,
MLCommon::Matrix::doubleData_t **input,
MLCommon::Matrix::doubleData_t **trans_input,
double *components, double *explained_var,
double *explained_var_ratio, double *singular_vals,
double *mu, double *noise_vars, paramsPCAMG prms,
bool verbose);

/**
* @brief performs MNMG transform operation for the pca
* @input param handle: the internal cuml handle object
* @input param rank_sizes: includes all the partition size information for the rank
* @input param n_parts: number of partitions
* @input param input: input data
* @input param components: principal components of the input data
* @output param trans_input: transformed input data
* @input param singular_vals: singular values of the data
* @input param mu: mean of every column in input
* @input param prms: data structure that includes all the parameters from input size to algorithm
* @input param verbose
*/
void transform(cumlHandle &handle, MLCommon::Matrix::RankSizePair **rank_sizes,
size_t n_parts, MLCommon::Matrix::Data<float> **input,
float *components, MLCommon::Matrix::Data<float> **trans_input,
float *singular_vals, float *mu, paramsPCAMG prms, bool verbose);

void transform(cumlHandle &handle, MLCommon::Matrix::RankSizePair **rank_sizes,
size_t n_parts, MLCommon::Matrix::Data<double> **input,
double *components, MLCommon::Matrix::Data<double> **trans_input,
double *singular_vals, double *mu, paramsPCAMG prms,
bool verbose);

/**
* @brief performs MNMG inverse transform operation for the pca
* @input param handle: the internal cuml handle object
* @input param rank_sizes: includes all the partition size information for the rank
* @input param n_parts: number of partitions
* @input param trans_input: transformed input data
* @input param components: principal components of the input data
* @output param input: input data
* @input param singular_vals: singular values of the data
* @input param mu: mean of every column in input
* @input param prms: data structure that includes all the parameters from input size to algorithm
* @input param verbose
*/
void inverse_transform(cumlHandle &handle,
MLCommon::Matrix::RankSizePair **rank_sizes,
size_t n_parts,
MLCommon::Matrix::Data<float> **trans_input,
float *components, MLCommon::Matrix::Data<float> **input,
float *singular_vals, float *mu, paramsPCAMG prms,
bool verbose);

void inverse_transform(
cumlHandle &handle, MLCommon::Matrix::RankSizePair **rank_sizes,
size_t n_parts, MLCommon::Matrix::Data<double> **trans_input,
double *components, MLCommon::Matrix::Data<double> **input,
double *singular_vals, double *mu, paramsPCAMG prms, bool verbose);

}; // end namespace opg
}; // end namespace PCA
}; // end namespace ML
50 changes: 50 additions & 0 deletions cpp/include/cuml/decomposition/sign_flip_mg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <common/cumlHandle.hpp>
#include <opg/matrix/data.hpp>
#include <opg/matrix/part_descriptor.hpp>

namespace ML {
namespace PCA {
namespace opg {

/**
* @brief sign flip for PCA and tSVD. This is used to stabilize the sign of column major eigen vectors
* @input param handle: the internal cuml handle object
* @input/output param input param input: input matrix that will be used to determine the sign.
* @input param input_desc: MNMG description of the input
* @input/output param components: components matrix.
* @input param n_components: number of columns of components matrix
* @input param streams: cuda streams
* @input param n_streams: number of streams
* @{
*/
void sign_flip(cumlHandle &handle,
std::vector<MLCommon::Matrix::Data<float> *> &input_data,
MLCommon::Matrix::PartDescriptor &input_desc, float *components,
int n_components, cudaStream_t *streams, int n_stream);

void sign_flip(cumlHandle &handle,
std::vector<MLCommon::Matrix::Data<double> *> &input_data,
MLCommon::Matrix::PartDescriptor &input_desc, double *components,
int n_components, cudaStream_t *streams, int n_stream);

}; // end namespace opg
}; // end namespace PCA
}; // end namespace ML
Loading