From 31020f5ae5db6060f294a5b10578b896f758a9b1 Mon Sep 17 00:00:00 2001 From: Elton Zheng Date: Sat, 15 Oct 2022 15:42:42 -0700 Subject: [PATCH 1/2] Fix build issues on Windows --- build_win.bat | 19 +++++++++++++++++++ .../inference/includes/inference_context.h | 2 +- setup.py | 18 ++++++++++-------- 3 files changed, 30 insertions(+), 9 deletions(-) create mode 100644 build_win.bat diff --git a/build_win.bat b/build_win.bat new file mode 100644 index 000000000000..ec8c8a362a78 --- /dev/null +++ b/build_win.bat @@ -0,0 +1,19 @@ +@echo off + +set DS_BUILD_AIO=0 +set DS_BUILD_SPARSE_ATTN=0 + +echo Administrative permissions required. Detecting permissions... + +net session >nul 2>&1 +if %errorLevel% == 0 ( + echo Success: Administrative permissions confirmed. +) else ( + echo Failure: Current permissions inadequate. + goto end +) + + +python setup.py bdist_wheel + +:end diff --git a/csrc/transformer/inference/includes/inference_context.h b/csrc/transformer/inference/includes/inference_context.h index 64e490ef47fc..330da050cced 100644 --- a/csrc/transformer/inference/includes/inference_context.h +++ b/csrc/transformer/inference/includes/inference_context.h @@ -157,7 +157,7 @@ class Context { void* GetWorkSpace() { return _workspace; } void* GetAttentionUnfusedWorkspace() { - return _workspace + _attention_unfused_workspace_offset; + return (char*)_workspace + _attention_unfused_workspace_offset; } inline unsigned new_token(unsigned layer_id) diff --git a/setup.py b/setup.py index 24e5ec62b7dd..099fa1b929e7 100755 --- a/setup.py +++ b/setup.py @@ -4,12 +4,13 @@ DeepSpeed library To build wheel on Windows: - 1. Install pytorch, such as pytorch 1.8 + cuda 11.1 + 1. Install pytorch, such as pytorch 1.12 + cuda 11.6 2. Install visual cpp build tool - 3. Launch cmd console with Administrator privilege for creating required symlink folders + 3. Include cuda toolkit + 4. Launch cmd console with Administrator privilege for creating required symlink folders Create a new wheel via the following command: - python setup.py bdist_wheel + build_win.bat The wheel will be located at: dist/*.whl """ @@ -228,11 +229,12 @@ def create_dir_symlink(src, dest): hip_version = "0.0" if torch_available and torch.version.cuda is not None: cuda_version = ".".join(torch.version.cuda.split('.')[:2]) - if isinstance(torch.cuda.nccl.version(), int): - # This will break if minor version > 9 - nccl_version = ".".join(str(torch.cuda.nccl.version())[:2]) - else: - nccl_version = ".".join(map(str, torch.cuda.nccl.version()[:2])) + if sys.platform != "win32": + if isinstance(torch.cuda.nccl.version(), int): + # This will break if minor version > 9 + nccl_version = ".".join(str(torch.cuda.nccl.version())[:2]) + else: + nccl_version = ".".join(map(str, torch.cuda.nccl.version()[:2])) if hasattr(torch.cuda, 'is_bf16_supported') and torch.cuda.is_available(): bf16_support = torch.cuda.is_bf16_supported() if torch_available and hasattr(torch.version, 'hip') and torch.version.hip is not None: From e10c0f2a819be3c0862d4c46f86c978b9a05e322 Mon Sep 17 00:00:00 2001 From: Reza Yazdani Date: Tue, 18 Oct 2022 00:53:09 +0500 Subject: [PATCH 2/2] small fix to complie with new version of Microsoft C++ Build Tools --- op_builder/builder.py | 1 + op_builder/fused_adam.py | 9 +++++++-- op_builder/fused_lamb.py | 9 +++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/op_builder/builder.py b/op_builder/builder.py index 09b781fddd72..9fa94985b780 100644 --- a/op_builder/builder.py +++ b/op_builder/builder.py @@ -650,6 +650,7 @@ def nvcc_args(self): else: cuda_major, _ = installed_cuda_version() args += [ + '-allow-unsupported-compiler' if sys.platform == "win32" else '', '--use_fast_math', '-std=c++17' if sys.platform == "win32" and cuda_major > 10 else '-std=c++14', diff --git a/op_builder/fused_adam.py b/op_builder/fused_adam.py index 6ff264fbf1a1..2883d417ede9 100644 --- a/op_builder/fused_adam.py +++ b/op_builder/fused_adam.py @@ -3,6 +3,8 @@ """ from .builder import CUDAOpBuilder +import sys + class FusedAdamBuilder(CUDAOpBuilder): BUILD_VAR = "DS_BUILD_FUSED_ADAM" @@ -27,6 +29,9 @@ def cxx_args(self): def nvcc_args(self): nvcc_flags = ['-O3'] + self.version_dependent_macros() if not self.is_rocm_pytorch(): - nvcc_flags.extend(['-lineinfo', - '--use_fast_math'] + self.compute_capability_args()) + nvcc_flags.extend([ + '-allow-unsupported-compiler' if sys.platform == "win32" else '', + '-lineinfo', + '--use_fast_math' + ] + self.compute_capability_args()) return nvcc_flags diff --git a/op_builder/fused_lamb.py b/op_builder/fused_lamb.py index 106728f6f3fe..d5f88d0b1ad1 100644 --- a/op_builder/fused_lamb.py +++ b/op_builder/fused_lamb.py @@ -3,6 +3,8 @@ """ from .builder import CUDAOpBuilder +import sys + class FusedLambBuilder(CUDAOpBuilder): BUILD_VAR = 'DS_BUILD_FUSED_LAMB' @@ -33,6 +35,9 @@ def nvcc_args(self): '-DROCM_VERSION_MINOR=%s' % ROCM_MINOR ] else: - nvcc_flags.extend(['-lineinfo', - '--use_fast_math'] + self.compute_capability_args()) + nvcc_flags.extend([ + '-allow-unsupported-compiler' if sys.platform == "win32" else '', + '-lineinfo', + '--use_fast_math' + ] + self.compute_capability_args()) return nvcc_flags