Skip to content

Commit

Permalink
Merge pull request #2427 from divyegala/mnmg-decomp
Browse files Browse the repository at this point in the history
[REVIEW] Moving MNMG decomp to cuml
  • Loading branch information
cjnolet authored Jul 2, 2020
2 parents 6874d8b + de6cce2 commit 6223eb8
Show file tree
Hide file tree
Showing 15 changed files with 1,937 additions and 37 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
- PR #2340: Import ARIMA in the root init file and fix the `test_fit_function` test
- PR #2408: Install meta packages for dependencies
- PR #2417: Move doc customization scripts to Jenkins
- PR #2427: Moving MNMG decomposition to cuml
- PR #2433: Add libcumlprims_mg to CMake
- PR #2420: Add and set convert_dtype default to True in estimator fit methods
- PR #2411: Refactor Mixin classes and use in classifier/regressor estimators
Expand Down
8 changes: 4 additions & 4 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ if hasArg --singlegpu; then
SINGLEGPU_PYTHON_FLAG="--singlegpu"
SINGLEGPU_CPP_FLAG=ON
fi
if hasArg mgtests; then
if hasArg cpp-mgtests; then
BUILD_CPP_MG_TESTS=ON
fi
if hasArg --nvtx; then
Expand Down Expand Up @@ -149,7 +149,7 @@ fi

################################################################################
# Configure for building all C++ targets
if completeBuild || hasArg libcuml || hasArg prims || hasArg bench || hasArg prims-bench || hasArg cppdocs; then
if completeBuild || hasArg libcuml || hasArg prims || hasArg bench || hasArg prims-bench || hasArg cppdocs || hasArg cpp-mgtests; then
if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
GPU_ARCH=""
echo "Building for the architecture of the GPU in the system..."
Expand Down Expand Up @@ -185,7 +185,7 @@ MAKE_TARGETS=
if hasArg libcuml; then
MAKE_TARGETS="${MAKE_TARGETS}cuml++ cuml ml"
fi
if hasArg mgtests; then
if hasArg cpp-mgtests; then
MAKE_TARGETS="${MAKE_TARGETS} ml_mg"
fi
if hasArg prims; then
Expand All @@ -199,7 +199,7 @@ if hasArg prims-bench; then
fi

# If `./build.sh cuml` is called, don't build C/C++ components
if completeBuild || hasArg libcuml || hasArg prims || hasArg bench; then
if completeBuild || hasArg libcuml || hasArg prims || hasArg bench || hasArg cpp-mgtests; then
# If there are no targets specified when calling build.sh, it will
# just call `make -j`. This avoids a lot of extra printing
cd ${LIBCUML_BUILD_DIR}
Expand Down
59 changes: 38 additions & 21 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ option(BUILD_CUML_CPP_LIBRARY "Build libcuml++ shared library" ON)

option(BUILD_CUML_TESTS "Build cuML algorithm tests" ON)

option(BUILD_CUML_MG_TESTS "Build cuML multigpu algorithm tests" ON)
option(BUILD_CUML_MG_TESTS "Build cuML multigpu algorithm tests" OFF)

option(BUILD_PRIMS_TESTS "Build ml-prim tests" ON)

Expand Down Expand Up @@ -156,10 +156,10 @@ if(SINGLEGPU)
set(WITH_UCX OFF)
endif(SINGLEGPU)

if(NOT BUILD_CUML_MPI_COMMS AND NOT SINGLEGPU)
message(STATUS "Detected BUILD_CUML_MPI_COMMS set to OFF. Disabling BUILD_CUML_MG_TESTS")
set(BUILD_CUML_MG_TESTS OFF)
endif(NOT BUILD_CUML_MPI_COMMS AND NOT SINGLEGPU)
if(BUILD_CUML_MG_TESTS AND NOT SINGLEGPU)
message(STATUS "Detected BUILD_CUML_MG_TESTS set to ON. Enabling BUILD_CUML_MPI_COMMS")
set(BUILD_CUML_MPI_COMMS ON)
endif(BUILD_CUML_MG_TESTS AND NOT SINGLEGPU)

##############################################################################
# - Requirements -------------------------------------------------------------
Expand Down Expand Up @@ -295,7 +295,6 @@ include(cmake/Dependencies.cmake)

set(CUML_INCLUDE_DIRECTORIES
${CUML_INCLUDE_DIR}
${CUMLPRIMS_MG_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/src
${CMAKE_CURRENT_SOURCE_DIR}/src_prims
${CMAKE_CURRENT_SOURCE_DIR}/test/prims
Expand All @@ -304,16 +303,33 @@ set(CUML_INCLUDE_DIRECTORIES
${CUTLASS_DIR}/src/cutlass
${CUB_DIR}/src/cub
${SPDLOG_DIR}/src/spdlog/include
${RAFT_DIR}/cpp/include)
${RAFT_DIR}/cpp/include
)

set(CUML_LINK_LIBRARIES
set(CUML_PUBLIC_LINK_LIBRARIES
${CUDA_cublas_LIBRARY}
${CUDA_curand_LIBRARY}
${CUDA_cusolver_LIBRARY}
${CUDA_CUDART_LIBRARY}
${CUDA_cusparse_LIBRARY}
${CUDA_nvgraph_LIBRARY}
${CUMLPRIMS_MG_LIBRARIES})
)

set(CUML_PRIVATE_LINK_LIBRARIES
${Protobuf_LIBRARIES}
faisslib
treelite::treelite
treelite::treelite_runtime
)

if(ENABLE_CUMLPRIMS_MG)
list(APPEND CUML_INCLUDE_DIRECTORIES
${CUMLPRIMS_MG_INCLUDE_DIRS})

list(APPEND CUML_PRIVATE_LINK_LIBRARIES
CUMLPRIMS_MG::CUMLPRIMS_MG)

endif(ENABLE_CUMLPRIMS_MG)

##############################################################################
# - build libcuml++ shared library -------------------------------------------
Expand Down Expand Up @@ -360,18 +376,21 @@ if(BUILD_CUML_CPP_LIBRARY)

# mnmg components

# if(NOT SINGLEGPU)
# target_sources(${CUML_CPP_TARGET}
# PRIVATE src/kmeans/kmeans_mg.cu
# )
# endif(NOT SINGLEGPU)
if(NOT SINGLEGPU)
target_sources(${CUML_CPP_TARGET}
PRIVATE
src/pca/pca_mg.cu
src/pca/sign_flip_mg.cu
src/tsvd/tsvd_mg.cu
)
endif(NOT SINGLEGPU)

if(OPENMP_FOUND)
set(CUML_LINK_LIBRARIES ${CUML_LINK_LIBRARIES} ${OpenMP_CXX_LIB_NAMES} Threads::Threads)
set(CUML_PUBLIC_LINK_LIBRARIES ${CUML_PUBLIC_LINK_LIBRARIES} ${OpenMP_CXX_LIB_NAMES} Threads::Threads)
endif(OPENMP_FOUND)

if(NVTX)
set(CUML_LINK_LIBRARIES ${CUML_LINK_LIBRARIES} nvToolsExt)
set(CUML_PUBLIC_LINK_LIBRARIES ${CUML_PUBLIC_LINK_LIBRARIES} nvToolsExt)
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
endif(NVTX)

Expand All @@ -380,12 +399,10 @@ if(BUILD_CUML_CPP_LIBRARY)

target_link_libraries(${CUML_CPP_TARGET}
PUBLIC
${CUML_LINK_LIBRARIES}
${CUML_PUBLIC_LINK_LIBRARIES}
PRIVATE
${Protobuf_LIBRARIES}
faisslib
treelite::treelite
treelite::treelite_runtime)
${CUML_PRIVATE_LINK_LIBRARIES}
)
# If we export the libdmlc symbols, they can lead to weird crashes with other
# libraries that use libdmlc. This just hides the symbols internally.
target_link_options(${CUML_CPP_TARGET} PRIVATE "-Wl,--exclude-libs,libdmlc.a")
Expand Down
4 changes: 2 additions & 2 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ Current cmake offers the following configuration options:
| --- | --- | --- | --- |
| BUILD_CUML_CPP_LIBRARY | [ON, OFF] | ON | Enable/disable building libcuml++ shared library. Setting this variable to `OFF` sets the variables BUILD_CUML_TESTS, BUILD_CUML_MG_TESTS and BUILD_CUML_EXAMPLES to `OFF` |
| BUILD_CUML_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `ml_test`. |
| BUILD_CUML_MG_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `ml_mg_test`. Requires MPI installed and turning BUILD_CUML_MPI_COMMS to ON. |
| BUILD_CUML_MG_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `ml_mg_test`. Requires MPI to be installed. When enabled, BUILD_CUML_MPI_COMMS will be automatically set to ON. |
| BUILD_PRIMS_TESTS | [ON, OFF] | ON | Enable/disable building cuML algorithm test executable `prims_test`. |
| BUILD_CUML_STD_COMMS | [ON, OFF] | ON | Enable/disable building cuML NCCL+UCX communicator for running multi-node multi-GPU algorithms. Note that UCX support can also be enabled/disabled (see below). The standard communicator and MPI communicator are not mutually exclusive and can both be installed at the same time. |
| WITH_UCX | [ON, OFF] | OFF | Enable/disable UCX support in the standard cuML communicator. Algorithms requiring point-to-point messaging will not work when this is disabled. This flag is ignored if BUILD_CUML_STD_COMMS is set to OFF. |
| BUILD_CUML_MPI_COMMS | [ON, OFF] | OFF | Enable/disable building cuML MPI+NCCL communicator for running multi-node multi-GPU C++ tests. MPI communicator and STD communicator are not mutually exclusive and can both be installed at the same time. If OFF, it overrides BUILD_CUML_MG_TESTS to be OFF as well. |
| BUILD_CUML_MPI_COMMS | [ON, OFF] | OFF | Enable/disable building cuML MPI+NCCL communicator for running multi-node multi-GPU C++ tests. MPI communicator and STD communicator may both be installed at the same time. If OFF, it overrides BUILD_CUML_MG_TESTS to be OFF as well. |
| SINGLEGPU | [ON, OFF] | OFF | Disable all mnmg components. Disables building of all multi-GPU algorithms and all comms library components. Removes libcumlprims, UCX-py and NCCL dependencies. Overrides values of BUILD_CUML_MG_TESTS, BUILD_CUML_STD_COMMS, WITH_UCX and BUILD_CUML_MPI_COMMS. |
| BUILD_CUML_EXAMPLES | [ON, OFF] | ON | Enable/disable building cuML C++ API usage examples. |
| BUILD_CUML_BENCH | [ON, OFF] | ON | Enable/disable building oc cuML C++ benchark. |
Expand Down
150 changes: 150 additions & 0 deletions cpp/include/cuml/decomposition/pca_mg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <opg/matrix/data.hpp>
#include <opg/matrix/part_descriptor.hpp>
#include "pca.hpp"

#include <common/cumlHandle.hpp>

namespace ML {

enum class mg_solver { COV_EIG_DQ, COV_EIG_JACOBI, QR };

typedef paramsTSVDTemplate<mg_solver> paramsTSVDMG;

typedef paramsPCATemplate<mg_solver> paramsPCAMG;

namespace PCA {
namespace opg {

/**
* @brief performs MNMG fit operation for the pca
* @input param handle: the internal cuml handle object
* @input param rank_sizes: includes all the partition size information for the rank
* @input param n_parts: number of partitions
* @input param input: input data
* @input param components: principal components of the input data
* @output param explained_var: explained var
* @output param explained_var_ratio: the explained var ratio
* @output param singular_vals: singular values of the data
* @output param mu: mean of every column in input
* @output param noise_vars: variance of the noise
* @input param prms: data structure that includes all the parameters from input size to algorithm
* @input param verbose
*/
void fit(cumlHandle &handle,
std::vector<MLCommon::Matrix::Data<float> *> &input_data,
MLCommon::Matrix::PartDescriptor &input_desc, float *components,
float *explained_var, float *explained_var_ratio, float *singular_vals,
float *mu, float *noise_vars, paramsPCAMG prms, bool verbose = false);

void fit(cumlHandle &handle,
std::vector<MLCommon::Matrix::Data<double> *> &input_data,
MLCommon::Matrix::PartDescriptor &input_desc, double *components,
double *explained_var, double *explained_var_ratio,
double *singular_vals, double *mu, double *noise_vars,
paramsPCAMG prms, bool verbose = false);

/**
* @brief performs MNMG fit and transform operation for the pca
* @input param handle: the internal cuml handle object
* @input param rank_sizes: includes all the partition size information for the rank
* @input param n_parts: number of partitions
* @input param input: input data
* @output param trans_input: transformed input data
* @output param components: principal components of the input data
* @output param explained_var: explained var
* @output param explained_var_ratio: the explained var ratio
* @output param singular_vals: singular values of the data
* @output param mu: mean of every column in input
* @output param noise_vars: variance of the noise
* @input param prms: data structure that includes all the parameters from input size to algorithm
* @input param verbose
*/
void fit_transform(cumlHandle &handle,
MLCommon::Matrix::RankSizePair **rank_sizes, size_t n_parts,
MLCommon::Matrix::floatData_t **input,
MLCommon::Matrix::floatData_t **trans_input,
float *components, float *explained_var,
float *explained_var_ratio, float *singular_vals, float *mu,
float *noise_vars, paramsPCAMG prms, bool verbose);

void fit_transform(cumlHandle &handle,
MLCommon::Matrix::RankSizePair **rank_sizes, size_t n_parts,
MLCommon::Matrix::doubleData_t **input,
MLCommon::Matrix::doubleData_t **trans_input,
double *components, double *explained_var,
double *explained_var_ratio, double *singular_vals,
double *mu, double *noise_vars, paramsPCAMG prms,
bool verbose);

/**
* @brief performs MNMG transform operation for the pca
* @input param handle: the internal cuml handle object
* @input param rank_sizes: includes all the partition size information for the rank
* @input param n_parts: number of partitions
* @input param input: input data
* @input param components: principal components of the input data
* @output param trans_input: transformed input data
* @input param singular_vals: singular values of the data
* @input param mu: mean of every column in input
* @input param prms: data structure that includes all the parameters from input size to algorithm
* @input param verbose
*/
void transform(cumlHandle &handle, MLCommon::Matrix::RankSizePair **rank_sizes,
size_t n_parts, MLCommon::Matrix::Data<float> **input,
float *components, MLCommon::Matrix::Data<float> **trans_input,
float *singular_vals, float *mu, paramsPCAMG prms, bool verbose);

void transform(cumlHandle &handle, MLCommon::Matrix::RankSizePair **rank_sizes,
size_t n_parts, MLCommon::Matrix::Data<double> **input,
double *components, MLCommon::Matrix::Data<double> **trans_input,
double *singular_vals, double *mu, paramsPCAMG prms,
bool verbose);

/**
* @brief performs MNMG inverse transform operation for the pca
* @input param handle: the internal cuml handle object
* @input param rank_sizes: includes all the partition size information for the rank
* @input param n_parts: number of partitions
* @input param trans_input: transformed input data
* @input param components: principal components of the input data
* @output param input: input data
* @input param singular_vals: singular values of the data
* @input param mu: mean of every column in input
* @input param prms: data structure that includes all the parameters from input size to algorithm
* @input param verbose
*/
void inverse_transform(cumlHandle &handle,
MLCommon::Matrix::RankSizePair **rank_sizes,
size_t n_parts,
MLCommon::Matrix::Data<float> **trans_input,
float *components, MLCommon::Matrix::Data<float> **input,
float *singular_vals, float *mu, paramsPCAMG prms,
bool verbose);

void inverse_transform(
cumlHandle &handle, MLCommon::Matrix::RankSizePair **rank_sizes,
size_t n_parts, MLCommon::Matrix::Data<double> **trans_input,
double *components, MLCommon::Matrix::Data<double> **input,
double *singular_vals, double *mu, paramsPCAMG prms, bool verbose);

}; // end namespace opg
}; // end namespace PCA
}; // end namespace ML
50 changes: 50 additions & 0 deletions cpp/include/cuml/decomposition/sign_flip_mg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <common/cumlHandle.hpp>
#include <opg/matrix/data.hpp>
#include <opg/matrix/part_descriptor.hpp>

namespace ML {
namespace PCA {
namespace opg {

/**
* @brief sign flip for PCA and tSVD. This is used to stabilize the sign of column major eigen vectors
* @input param handle: the internal cuml handle object
* @input/output param input param input: input matrix that will be used to determine the sign.
* @input param input_desc: MNMG description of the input
* @input/output param components: components matrix.
* @input param n_components: number of columns of components matrix
* @input param streams: cuda streams
* @input param n_streams: number of streams
* @{
*/
void sign_flip(cumlHandle &handle,
std::vector<MLCommon::Matrix::Data<float> *> &input_data,
MLCommon::Matrix::PartDescriptor &input_desc, float *components,
int n_components, cudaStream_t *streams, int n_stream);

void sign_flip(cumlHandle &handle,
std::vector<MLCommon::Matrix::Data<double> *> &input_data,
MLCommon::Matrix::PartDescriptor &input_desc, double *components,
int n_components, cudaStream_t *streams, int n_stream);

}; // end namespace opg
}; // end namespace PCA
}; // end namespace ML
Loading

0 comments on commit 6223eb8

Please sign in to comment.