Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fdy change dyn load #1301

Merged
merged 21 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/_runs-on-nv-step1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,37 @@ jobs:
"""
fi

# open job after dynamic torch ready (with out unique + gen diopi suffix lib)
Build-torch-dynamic:
if: false
name: Build-torch-dynamic
runs-on: ${{ inputs.runner }}
env:
GETRUNNER: ${{ inputs.runner }}
DEEPLINK_PATH: ${{ inputs.deeplink_path }}
ENV_PATH: ${{ inputs.env_path }}
SLURM_PAR_V100: "pat_dev"
BUILD_TEST2: "build_test_dyn"
steps:
- name: build
run: |
if [[ "${GETRUNNER}" == *sco* ]];then
set -e
cd ${DEEPLINK_PATH}/ && ls -al && find ${DEEPLINK_PATH}/ -maxdepth 1 -mmin +300 -type d |xargs rm -rf
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${BUILD_TEST2} && cp -R source ${BUILD_TEST2} && cd ${BUILD_TEST2}
srun --job-name=${GITHUB_JOB} bash -c "export USE_COVERAGE=ON && cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/${BUILD_TEST2} \
&& source ${ENV_PATH}/pt2.0_diopi \
&& cd impl && bash scripts/build_impl.sh torch_dyload" || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${BUILD_TEST2} && exit 1 )
else
ssh SH1424 """
set -e
cd ${DEEPLINK_PATH}/ && ls -al && find ${DEEPLINK_PATH}/ -maxdepth 1 -mmin +300 -type d |xargs rm -rf
source ${ENV_PATH}/pt2.0_diopi
cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER} && rm -rf ${BUILD_TEST2} && cp -R source ${BUILD_TEST2} && cd ${BUILD_TEST2}
srun --job-name=${GITHUB_JOB} --partition=${SLURM_PAR_V100} --time=20 bash -c 'cd impl && bash scripts/build_impl.sh torch_dyload' || ( cd ${DEEPLINK_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${BUILD_TEST2} && exit 1 )
"""
fi

Gen-Data:
name: Gen-Data
needs: [Build-Nvidia]
Expand Down
56 changes: 56 additions & 0 deletions impl/cmake/ImplHelper.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
macro(diopi_use_adapter cmd_extra_config)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

用 macro 是特意的吗,里面定义了很多变量,容易污染相关脚本;另外也通过参数以外的方式直接引用了一些外部定义的变量名,挺容易出错的。

# dependency
file(GLOB ADAPTOR_TEMPLATE_CODE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ADAPTOR_DIR}/codegen/*.py)
add_custom_target(adaptor_gen_dependency DEPENDS ${ADAPTOR_TEMPLATE_CODE})
set(ADAPTOR_CSRC_PATH "${ADAPTOR_DIR}/csrc")

separate_arguments(GenArgs UNIX_COMMAND "--diopi_dir=${DIOPI_IMPL_DIR}/../ --output_dir=${ADAPTOR_CSRC_PATH} ${cmd_extra_config}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

直接用 cmake 内置的 list 就行?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

用 list (set)的话传参的地方得改, 不能把几个参数放到一个变量里一起传了, 还要解析参数,

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个名字起的有些问题, 我改叫 cmd_extra_config's' 吧

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

用 list (set)的话传参的地方得改, 不能把几个参数放到一个变量里一起传了, 还要解析参数,

其实是可以的,CMake 里面形如 --a=1;--b=2 会被认为是 ; 分割的数组`

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

确实, PS: 杨波要改些东西, 等他改完我把这个改了吧

message(STATUS "diopi_use_adapter GenArgs is:" "${GenArgs}")
set(ADAPTER_GEN_FILES ${ADAPTOR_CSRC_PATH}/diopi_adaptor.cpp ${ADAPTOR_CSRC_PATH}/impl_functions.hpp)
add_custom_target(adaptor_code_gen
COMMAND python3 ${ADAPTOR_DIR}/codegen/gen.py ${GenArgs}
BYPRODUCTS ${ADAPTER_GEN_FILES}
DEPENDS adaptor_gen_dependency
VERBATIM
)
list(APPEND REAL_IMPL_SRC ${ADAPTOR_CSRC_PATH}/convert.cpp ${ADAPTOR_CSRC_PATH}/diopi_adaptor.cpp ${ADAPTOR_CSRC_PATH}/composite_ops.cpp)
Copy link
Contributor

@wiryls wiryls Jul 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

要不换成 concat 或者 set?万一 REAL_IMPL_SRC 这个名字被别的地方用了,不知道会构造出什么样的 list。如果在 function 之内的 scope 就没这个顾虑了,主要是它在 macro 之中

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

用 macro 主要是改动少, 我直接把原来外边的代码搬过来了. 不用改啥东西. 你要有空都改了, 可以改成 function.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

我找时间改下吧. macro 确实不太好

endmacro()


macro(prep_dyn_load if_dynload)
if (${if_dynload})
set(DYN_GEN_FILE ${CMAKE_BINARY_DIR}/src/impl/wrap_function.cpp)
set(DYN_HELP_DIR ${DIOPI_IMPL_DIR}/scripts/dyn_load_helper)
file(GLOB DYN_GEN_DEPS ${DYN_HELP_DIR}/dyn_wrap_gen.py)

add_custom_target(dyn_wrap_gen ALL
COMMAND python ${DYN_HELP_DIR}/dyn_wrap_gen.py -o ${DYN_GEN_FILE}
DEPENDS ${DYN_GEN_DEPS}
BYPRODUCTS ${DYN_GEN_FILE}
WORKING_DIRECTORY ${DYN_HELP_DIR})
set(DYN_SRC ${DYN_GEN_FILE} ${DYN_HELP_DIR}/dyn_helper.cpp)

set(REAL_IMPL diopi_real_impl)
add_library(${DEVICEIMPL} SHARED ${DYN_SRC})
target_link_libraries(${DEVICEIMPL} -ldl)
target_include_directories(${DEVICEIMPL} PRIVATE ${DYN_HELP_DIR})
add_dependencies(${DEVICEIMPL} dyn_wrap_gen)
else()
set(REAL_IMPL ${DEVICEIMPL})
endif()
endmacro()

macro(handle_dyn_torch if_dynload torch_dir)
if (${if_dynload})
add_custom_target(dyn_torch
COMMAND ${DIOPI_IMPL_DIR}/scripts/dyn_load_helper/dyn_torch_handler.sh patch_diopi
${LIBRARY_OUTPUT_PATH} ${torch_dir}/lib
DEPENDS ${REAL_IMPL}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}

)
message(STATUS "handle_dyn_torch with torch: ${torch_dir}")
add_dependencies(${DEVICEIMPL} dyn_torch)
endif()
endmacro()

25 changes: 25 additions & 0 deletions impl/cmake/TorchBaseFunc.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

macro(diopi_find_torch)
execute_process(
COMMAND sh -c "python -c 'import torch;print(torch.utils.cmake_prefix_path)'"
wiryls marked this conversation as resolved.
Show resolved Hide resolved
OUTPUT_VARIABLE DIOPI_TORCH_CMAKE_PREFIX
OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "DIOPI_TORCH_CMAKE_PREFIX:${DIOPI_TORCH_CMAKE_PREFIX}")
if(DIOPI_TORCH_CMAKE_PREFIX)
list(APPEND CMAKE_PREFIX_PATH ${DIOPI_TORCH_CMAKE_PREFIX})
endif()

find_package(Torch REQUIRED)
if (Torch_FOUND)
message(STATUS "TORCH_CXX_FLAGS: ${TORCH_CXX_FLAGS}")
message(STATUS "TORCH_LIBRARIES: ${TORCH_LIBRARIES}")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
add_definitions(-DTORCH_VERSION_MAJOR=${Torch_VERSION_MAJOR})
wiryls marked this conversation as resolved.
Show resolved Hide resolved
add_definitions(-DTORCH_VERSION_MINOR=${Torch_VERSION_MINOR})
add_definitions(-DTORCH_VERSION_PATCH=${Torch_VERSION_PATCH})
add_definitions(-DTORCH_VERSION=${Torch_VERSION})
message(STATUS "Found Torch Version: ${Torch_VERSION}")
endif()

endmacro()
50 changes: 18 additions & 32 deletions impl/muxi/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,15 @@ project(muxi_impl)
add_compile_definitions(USE_MACA=1)
set(USE_MACA ON)

set(BASE_TORCH_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../torch")
include(${BASE_TORCH_DIR}/cmake/TorchBaseFunc.cmake)
InitFindTorch()
include(../cmake/ImplHelper.cmake)
include(../cmake/TorchBaseFunc.cmake)
diopi_find_torch()

find_package(Torch REQUIRED)
if (Torch_FOUND)
message(STATUS "TORCH_CXX_FLAGS: ${TORCH_CXX_FLAGS}")
message(STATUS "TORCH_LIBRARIES: ${TORCH_LIBRARIES}")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
add_definitions(-DTORCH_VERSION_MAJOR=${Torch_VERSION_MAJOR})
add_definitions(-DTORCH_VERSION_MINOR=${Torch_VERSION_MINOR})
add_definitions(-DTORCH_VERSION_PATCH=${Torch_VERSION_PATCH})
add_definitions(-DTORCH_VERSION=${Torch_VERSION})
message(STATUS "Found Torch Version: ${Torch_VERSION}")
endif()
# TODO: Report bugs to muxi
# There has conflict when muxi runtime used together with pip installed torch_cpu.
# so to use dipu with torch_cpu in muxi, maunual compile torch cpu with export BLAS=OpenBLAS.

set(BASE_TORCH_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../torch")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")

Expand Down Expand Up @@ -48,30 +41,23 @@ if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/convert_config.yaml")
endif()

if(USE_ADAPTOR)
# dependency
file(GLOB ADAPTOR_TEMPLATE_CODE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ADAPTOR_DIR}/codegen/*.py)
add_custom_target(adaptor_gen_dependency DEPENDS ${ADAPTOR_TEMPLATE_CODE})

set(ADAPTOR_CSRC_PATH "${ADAPTOR_DIR}/csrc")
set(GEN_FILES ${ADAPTOR_CSRC_PATH}/diopi_adaptor.cpp ${ADAPTOR_CSRC_PATH}/impl_functions.hpp)
add_custom_target(adaptor_code_gen
COMMAND python3 ${ADAPTOR_DIR}/codegen/gen.py --diopi_dir=${DIOPI_IMPL_DIR}/../ --output_dir=${ADAPTOR_CSRC_PATH}
--config_device=muxi --base_device=torch
BYPRODUCTS ${GEN_FILES}
DEPENDS adaptor_gen_dependency)
list(APPEND REAL_IMPL_SRC ${ADAPTOR_CSRC_PATH}/convert.cpp ${ADAPTOR_CSRC_PATH}/diopi_adaptor.cpp ${ADAPTOR_CSRC_PATH}/composite_ops.cpp)
diopi_use_adapter("--config_device=muxi --base_device=torch")
endif()

cuda_add_library(${DEVICEIMPL} SHARED ${REAL_IMPL_SRC})
target_link_libraries(${DEVICEIMPL} ${TORCH_LIBRARIES})
prep_dyn_load(${DYLOAD})

cuda_add_library(${REAL_IMPL} SHARED ${REAL_IMPL_SRC})
target_link_libraries(${REAL_IMPL} ${TORCH_LIBRARIES})
add_subdirectory(functions/functions_ext/flash-attention)
target_link_libraries(${DEVICEIMPL} diopi_torch_ext_flash_attn)
target_include_directories(${DEVICEIMPL} PRIVATE ${BASE_TORCH_DIR})
target_link_libraries(${REAL_IMPL} diopi_torch_ext_flash_attn)
target_include_directories(${REAL_IMPL} PRIVATE ${BASE_TORCH_DIR})

if(USE_ADAPTOR)
add_dependencies(${DEVICEIMPL} adaptor_code_gen)
add_dependencies(${REAL_IMPL} adaptor_code_gen)
endif()

handle_dyn_torch(${DYLOAD} ${TORCH_INSTALL_PREFIX})

if (TEST)
add_subdirectory(test)
endif()
6 changes: 6 additions & 0 deletions impl/scripts/build_impl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ case $1 in
-DENABLE_COVERAGE=${USE_COVERAGE}
make_maca -j8
;;
muxi_dyload)
mkdir -p build && cd build
cmake_maca .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=muxi -DCMAKE_BUILD_TYPE=Release -DDYLOAD=ON -DTEST=ON \
&& make_maca -j8
mkdir -p ${DIOPI_TEST_PATH}/lib && ln -sf ${CURRENT_DIR}/../lib/libdiopi_real_impl.so ${DIOPI_TEST_PATH}/lib
;;
camb_pytorch)
mkdir -p build && cd build
cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=camb_pytorch -DCMAKE_BUILD_TYPE=Release -DTEST=ON \
Expand Down
29 changes: 29 additions & 0 deletions impl/scripts/dyn_load_helper/dyn_helper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include <dlfcn.h>

#include <cstdio>
#include <filesystem>
#include <stdexcept>

void* dynLoadFile(const char* diopiRealName) {
namespace fs = std::filesystem;
void* handle = dlopen(diopiRealName, RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND);
if (!handle) {
Dl_info info;
if (dladdr(reinterpret_cast<void*>(dynLoadFile), &info) != 0 && info.dli_fname != nullptr) {
fs::path fpath(info.dli_fname);
auto diopiInLoader = fpath.parent_path().append(diopiRealName).string();
printf(
"diopi dyload fail, seems LD_LIBRARAY_PATH not contains %s, try to load "
"from loader current dir's %s \n",
diopiRealName,
diopiInLoader.c_str());

handle = dlopen(diopiInLoader.c_str(), RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND);
}
}
if (!handle) {
fprintf(stderr, "%s ", dlerror());
throw std::runtime_error("diopi_init err");
}
return handle;
}
3 changes: 3 additions & 0 deletions impl/scripts/dyn_load_helper/dyn_helper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#pragma once

void* dynLoadFile(const char* diopiRealName);
94 changes: 94 additions & 0 deletions impl/scripts/dyn_load_helper/dyn_torch_handler.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env bash
set -eo pipefail

# pip install patchelf

diopi_suffix=".diopi"
torch_raws=("libtorch.so" "libtorch_cuda.so" "libtorch_cpu.so" "libc10_cuda.so" "libc10.so")
torch_4diopi=()
for ((i=0; i<${#torch_raws[@]}; i++)); do
torch_4diopi[i]=${torch_raws[$i]}${diopi_suffix}
done

# even using RTLD_DEEPBIND load, same name lib still has only one instance in the address space.
# RTLD_DEEPBIND just make same symbol name in different lib names be loaded as 2 instance but not
# symbol in same lib name.
function gen_versioned_torch() {
for ((i=0; i<${#torch_raws[@]}; i++)); do
cp ${torch_raws[$i]} ${torch_4diopi[$i]}
done

for ((i=0; i<${#torch_4diopi[@]}; i++)); do
libi=${torch_4diopi[$i]}
replace_items=""
for ((depIdx=i+1; depIdx<${#torch_4diopi[@]}; depIdx++)); do
dep_raw=${torch_raws[$depIdx]}
dep_4diopi=${torch_4diopi[$depIdx]}
replace_items=${replace_items}" --replace-needed ${dep_raw} ${dep_4diopi}"

done
patchelf ${replace_items} --set-soname ${libi} ${libi}
done
}

function check_correct_torch() {
echo "check diopi torch: $1"

# TODO: use an elf lib to remove unqiue flag in *so.diopi
# remove unique symbols of both cpu torch (dipu use) and device torch (diopi use).
# if you device torch is compiled by clang, which not supporting -fno-gnu-unique,
# just test if it works (eg: muxi torch with unique can coexist with no-uniqued cpu torch)

echo "please check if you torch builded with -fno-gnu-unique to support multi version torch coexist"
set +e
chk_ret=`cd $1 && ls -alh |grep .*\.so\.diopi | wc -l`
set -e
if [[ ${chk_ret} -ne ${#torch_4diopi[@]} ]]; then
echo "ret value: ${chk_ret}, in device-torch dir, not find dyn-load needed XX.so.diopi libs!"
echo "!! please manual run handle_dyload_torch.sh patch_torch {device_torch_dir} to gen dyn-load needed multi-version torch"
exit -1
fi

echo "diopi torch version check ok"
}

function patch_diopi_torch() {
removed_items=""
added_items=""
for ((i=0; i<${#torch_4diopi[@]}; i++)); do
dep_raw=${torch_raws[$i]}
dep_4diopi=${torch_4diopi[$i]}
removed_items=${removed_items}" --remove-needed ${dep_raw}"
added_items=${added_items}" --add-needed ${dep_4diopi}"
done
patchelf ${removed_items} libdiopi_real_impl.so
patchelf ${added_items} libdiopi_real_impl.so
}

# 1.because dipu libs are loaded by python using dlload. so relative to python main, real_diopi_libs are
# 2-hop dynamic loaded. it cannot see first-hop loaded libs like torch_dipu (unless the lib is loaded
# using RTLD_GLOBAL, but it's not used when directly loading python lib). so diopi need maunal link
# torch_dipu.so lib.
# 2.although both the 1st hop dynamic-loaded lib and the 2ed's link to torch_dipu.so, they still share
# the same lib instance in addr space.
function patch_diopi_dipu() {
patchelf --remove-needed libtorch_dipu.so libdiopi_real_impl.so
patchelf --add-needed libtorch_dipu.so libdiopi_real_impl.so
}


WORK_DIR=$2
cd ${WORK_DIR}
if [[ "$1" == "patch_torch" ]]; then
gen_versioned_torch
elif [[ "$1" == "patch_diopi" ]]; then
check_correct_torch $3
# in dipoi link lib list, torch_dipu.so must be placed behind torch_XX libs.
# because both dipu and inner 'DEEPBIND' torch_cpu call Library.<CppFunction>fallback() which is
# a template function instantiated when parameter types CppFunction is first called
# (!!! not directly link to the template definition in external torch_cpu.so !!!).
# if torch_dipu.so is linked in front, <CppFunction>fallback() symbol is bind to the symbol
# in torch_dipu.so which use external torch template class that cannot work with inner torch CppFunction.
patch_diopi_dipu
patch_diopi_torch
fi
Loading
Loading