From b28c1b1a0e00d5d7984b89a7200265c203030c8a Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Wed, 7 Feb 2018 01:44:21 -0800 Subject: [PATCH 1/2] "add comment" --- cmake/external/nccl.cmake | 19 ++++++++++++++++--- paddle/platform/dynload/nccl.h | 15 +++++++++++++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/cmake/external/nccl.cmake b/cmake/external/nccl.cmake index fc43766efafc3..852bd0c37fb95 100644 --- a/cmake/external/nccl.cmake +++ b/cmake/external/nccl.cmake @@ -20,20 +20,32 @@ include(ExternalProject) set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl) -include_directories(${NCCL_SOURCE_DIR}/src/extern_nccl/src) +# https://github.com/PaddlePaddle/Paddle/issues/8195 +# Note: nccl2.1.4 seems works well on cuda9, but not compatible with cuda8 +# TODO(dzhwinter): disable the NCCL DSO temporarily, should be removed +# also the commented out code in nccl.h +set(WITH_DSO OFF) if(WITH_DSO) + # If we use DSO, we use system default nccl.h + set(NCCL_ROOT "/usr" CACHE PATH "NCCL ROOT") + find_path(NCCL_INCLUDE_DIR nccl.h + PATHS ${NCCL_ROOT} ${NCCL_ROOT}/include + $ENV{NCCL_ROOT} $ENV{NCCL_ROOT}/include ${CUDA_TOOLKIT_INCLUDE} + NO_DEFAULT_PATH + ) # If we use DSO, we do not build nccl, just download the dependencies set(NCCL_BUILD_COMMAND "") set(NCCL_INSTALL_COMMAND "") set(NCCL_INSTALL_DIR "") else() # otherwise, we build nccl and link it. + include_directories(${NCCL_SOURCE_DIR}/src/extern_nccl/src) set(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl) # Note: cuda 8.0 is needed to make nccl # When cuda is not installed on the system directory, need to set CUDA_HOME to your cuda root - set(NCCL_BUILD_COMMAND "make -j 8") - set(NCCL_INSTALL_COMMAND "make install PREFIX=${NCCL_INSTALL_DIR}") + set(NCCL_BUILD_COMMAND make -j 8) + set(NCCL_INSTALL_COMMAND make install PREFIX=${NCCL_INSTALL_DIR}) endif() ExternalProject_Add( @@ -44,6 +56,7 @@ ExternalProject_Add( PREFIX "${NCCL_SOURCE_DIR}" UPDATE_COMMAND "" CONFIGURE_COMMAND "" + BUILD_IN_SOURCE 1 BUILD_COMMAND "${NCCL_BUILD_COMMAND}" INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}" INSTALL_DIR "${NCCL_INSTALL_DIR}" diff --git a/paddle/platform/dynload/nccl.h b/paddle/platform/dynload/nccl.h index 6c776afc97a53..7ecb00ce59c10 100644 --- a/paddle/platform/dynload/nccl.h +++ b/paddle/platform/dynload/nccl.h @@ -27,6 +27,7 @@ namespace dynload { extern std::once_flag nccl_dso_flag; extern void* nccl_dso_handle; +/* #ifdef PADDLE_USE_DSO extern void LoadNCCLDSO(); @@ -42,6 +43,8 @@ extern void LoadNCCLDSO(); }; \ extern DynLoad__##__name __name #else +#endif +*/ #define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ struct DynLoad__##__name { \ template \ @@ -50,13 +53,19 @@ extern void LoadNCCLDSO(); } \ }; \ extern DynLoad__##__name __name -#endif +#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP_NO_RETURN(__name) \ + struct DynLoad__##__name { \ + template \ + void operator()(Args... args) { \ + __name(args...); \ + } \ + }; \ + extern DynLoad__##__name __name #define NCCL_RAND_ROUTINE_EACH(__macro) \ __macro(ncclCommInitAll); \ __macro(ncclGetUniqueId); \ __macro(ncclCommInitRank); \ - __macro(ncclCommDestroy); \ __macro(ncclCommCount); \ __macro(ncclCommCuDevice); \ __macro(ncclCommUserRank); \ @@ -70,6 +79,8 @@ extern void LoadNCCLDSO(); NCCL_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NCCL_WRAP) +DECLARE_DYNAMIC_LOAD_NCCL_WRAP_NO_RETURN(ncclCommDestroy); + } // namespace dynload } // namespace platform } // namespace paddle From 0f5bf8aba9add8c80f107b60146b231e549e9c23 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Wed, 7 Feb 2018 01:57:01 -0800 Subject: [PATCH 2/2] "remove nccl2 macro" --- paddle/platform/dynload/nccl.cc | 1 + paddle/platform/dynload/nccl.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle/platform/dynload/nccl.cc b/paddle/platform/dynload/nccl.cc index 4cec829a8ad89..865b62324bb70 100644 --- a/paddle/platform/dynload/nccl.cc +++ b/paddle/platform/dynload/nccl.cc @@ -24,6 +24,7 @@ void *nccl_dso_handle; #define DEFINE_WRAP(__name) DynLoad__##__name __name NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP); +DEFINE_WRAP(ncclCommDestroy); void LoadNCCLDSO() { platform::call_once(nccl_dso_flag, diff --git a/paddle/platform/dynload/nccl.h b/paddle/platform/dynload/nccl.h index 7ecb00ce59c10..2539b2644df34 100644 --- a/paddle/platform/dynload/nccl.h +++ b/paddle/platform/dynload/nccl.h @@ -72,8 +72,6 @@ extern void LoadNCCLDSO(); __macro(ncclAllReduce); \ __macro(ncclBcast); \ __macro(ncclAllGather); \ - __macro(ncclGroupStart); \ - __macro(ncclGroupEnd); \ __macro(ncclReduce); \ __macro(ncclGetErrorString);