From f0df092f4f048cbf03451c94c5730fdb40f4b142 Mon Sep 17 00:00:00 2001 From: Tailing Yuan Date: Thu, 20 Feb 2025 04:33:16 +0000 Subject: [PATCH 1/4] build extensions in parallel --- README.md | 2 +- setup.py | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a15e08ad..9abfc177 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,7 @@ CUDA and C++ extensions via git clone https://github.com/NVIDIA/apex cd apex # if pip >= 23.1 (ref: https://pip.pypa.io/en/stable/news/#v23-1) which supports multiple `--config-settings` with the same key... -pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ +pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext --cuda_ext --parallel 4" ./ # otherwise pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --global-option="--cpp_ext" --global-option="--cuda_ext" ./ ``` diff --git a/setup.py b/setup.py index 162db759..757723d3 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ import sys import warnings import os +import threading import glob from packaging.version import parse, Version @@ -859,6 +860,39 @@ def check_cudnn_version_and_warn(global_option: str, required_cudnn_version: int ) +# Patch because `setup.py bdist_wheel` does not accept the `parallel` option +parallel = None +if "--parallel" in sys.argv: + idx = sys.argv.index("--parallel") + parallel = int(sys.argv[idx + 1]) + sys.argv.pop(idx + 1) + sys.argv.pop(idx) + + +# Prevent file conflicts when multiple extensions are compiled simultaneously +class BuildExtensionSeparateDir(BuildExtension): + build_extension_patch_lock = threading.Lock() + thread_ext_name_map = {} + + def build_extension(self, ext): + with self.build_extension_patch_lock: + if not getattr(self.compiler, "_compile_separate_output_dir", False): + compile_orig = self.compiler.compile + + def compile_new(*args, **kwargs): + return compile_orig(*args, **{ + **kwargs, + "output_dir": os.path.join( + kwargs["output_dir"], + self.thread_ext_name_map[threading.current_thread().ident]), + }) + self.compiler.compile = compile_new + self.compiler._compile_separate_output_dir = True + self.thread_ext_name_map[threading.current_thread().ident] = ext.name + objects = super().build_extension(ext) + return objects + + setup( name="apex", version="0.1", @@ -868,6 +902,6 @@ def check_cudnn_version_and_warn(global_option: str, required_cudnn_version: int install_requires=["packaging>20.6"], description="PyTorch Extensions written by NVIDIA", ext_modules=ext_modules, - cmdclass={"build_ext": BuildExtension} if ext_modules else {}, + cmdclass={"build_ext": BuildExtensionSeparateDir.with_options(parallel=parallel)} if ext_modules else {}, extras_require=extras, ) From d9c3507ae087ddf7549383f6cd4e3b8955758adb Mon Sep 17 00:00:00 2001 From: Tailing Yuan Date: Fri, 21 Feb 2025 08:49:50 +0000 Subject: [PATCH 2/4] fix: setup.py develop supports --parallel --- setup.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 757723d3..0ec29b8d 100644 --- a/setup.py +++ b/setup.py @@ -860,7 +860,7 @@ def check_cudnn_version_and_warn(global_option: str, required_cudnn_version: int ) -# Patch because `setup.py bdist_wheel` does not accept the `parallel` option +# Patch because `setup.py bdist_wheel` and `setup.py develop` do not support the `parallel` option parallel = None if "--parallel" in sys.argv: idx = sys.argv.index("--parallel") @@ -874,6 +874,11 @@ class BuildExtensionSeparateDir(BuildExtension): build_extension_patch_lock = threading.Lock() thread_ext_name_map = {} + def finalize_options(self): + if parallel is not None: + self.parallel = parallel + super().finalize_options() + def build_extension(self, ext): with self.build_extension_patch_lock: if not getattr(self.compiler, "_compile_separate_output_dir", False): @@ -902,6 +907,6 @@ def compile_new(*args, **kwargs): install_requires=["packaging>20.6"], description="PyTorch Extensions written by NVIDIA", ext_modules=ext_modules, - cmdclass={"build_ext": BuildExtensionSeparateDir.with_options(parallel=parallel)} if ext_modules else {}, + cmdclass={"build_ext": BuildExtensionSeparateDir} if ext_modules else {}, extras_require=extras, ) From f1ee5b189f4fad4c4eae219a1d37b033808167ab Mon Sep 17 00:00:00 2001 From: Tailing Yuan Date: Fri, 21 Feb 2025 18:22:36 +0000 Subject: [PATCH 3/4] update README.md --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9abfc177..2d851830 100644 --- a/README.md +++ b/README.md @@ -130,11 +130,18 @@ CUDA and C++ extensions via git clone https://github.com/NVIDIA/apex cd apex # if pip >= 23.1 (ref: https://pip.pypa.io/en/stable/news/#v23-1) which supports multiple `--config-settings` with the same key... -pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext --cuda_ext --parallel 4" ./ +pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ # otherwise pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --global-option="--cpp_ext" --global-option="--cuda_ext" ./ ``` +To reduce the build time of APEX, parallel building can be enhanced via +```bash +export NVCC_APPEND_FLAGS="--threads 4" +pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" ./ +``` +When CPU cores or memory are limited, the `--parallel` option is generally preferred over `--threads`. See [pull#1882](https://github.com/NVIDIA/apex/pull/1882) for more details. + APEX also supports a Python-only build via ```bash pip install -v --disable-pip-version-check --no-build-isolation --no-cache-dir ./ From fd28caf0da23549875fa2ff57181de377737cb72 Mon Sep 17 00:00:00 2001 From: Tailing Yuan Date: Mon, 24 Feb 2025 10:37:26 +0800 Subject: [PATCH 4/4] Update README.md: Remove an `export` command --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 2d851830..f3cf0d2e 100644 --- a/README.md +++ b/README.md @@ -137,8 +137,7 @@ pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation - To reduce the build time of APEX, parallel building can be enhanced via ```bash -export NVCC_APPEND_FLAGS="--threads 4" -pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" ./ +NVCC_APPEND_FLAGS="--threads 4" pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" ./ ``` When CPU cores or memory are limited, the `--parallel` option is generally preferred over `--threads`. See [pull#1882](https://github.com/NVIDIA/apex/pull/1882) for more details.