diff --git a/deepspeed/utils/timer.py b/deepspeed/utils/timer.py index 6625e3cd8961..39f5c8be8fff 100755 --- a/deepspeed/utils/timer.py +++ b/deepspeed/utils/timer.py @@ -3,11 +3,17 @@ ''' import time -import psutil import torch from deepspeed.utils import logger +try: + import psutil + PSUTILS_INSTALLED = True +except ImportError: + PSUTILS_INSTALLED = False + pass + def print_rank_0(message): if torch.distributed.is_initialized(): @@ -103,7 +109,7 @@ def __init__(self, num_workers, start_step=2, steps_per_output=50, - monitor_memory=True, + monitor_memory=False, logging_fn=None): self.start_time = 0 self.end_time = 0 @@ -124,6 +130,9 @@ def __init__(self, self.logging = logger.info self.initialized = False + if self.monitor_memory and not PSUTILS_INSTALLED: + raise ImportError("Unable to import 'psutils', please install package") + def update_epoch_count(self): self.epoch_count += 1 self.local_step_count = 0 diff --git a/op_builder/cpu_adam.py b/op_builder/cpu_adam.py index d391301897e3..20f8fe2d8b6f 100644 --- a/op_builder/cpu_adam.py +++ b/op_builder/cpu_adam.py @@ -1,6 +1,6 @@ import os import torch -import warnings +import subprocess from .builder import CUDAOpBuilder @@ -21,35 +21,26 @@ def include_paths(self): CUDA_INCLUDE = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "include") return ['csrc/includes', CUDA_INCLUDE] - def available_vector_instructions(self): - try: - import cpufeature - except ImportError: - warnings.warn( - f'import cpufeature failed - CPU vector optimizations are not available for CPUAdam' - ) - return {} + def simd_width(self): + if not self.command_exists('lscpu'): + self.warning( + "CPUAdam attempted to query 'lscpu' to detect the existence " + "of AVX instructions. However, 'lscpu' does not appear to exist on " + "your system, will fall back to non-vectorized execution.") + return '' - cpu_vector_instructions = {} - try: - cpu_vector_instructions = cpufeature.CPUFeature - except _: - warnings.warn( - f'cpufeature.CPUFeature failed - CPU vector optimizations are not available for CPUAdam' - ) - return {} - - return cpu_vector_instructions + result = subprocess.check_output('lscpu', shell=True) + result = result.decode('utf-8').strip().lower() + if 'genuineintel' in result: + if 'avx512' in result: + return '-D__AVX512__' + elif 'avx2' in result: + return '-D__AVX256__' + return '' def cxx_args(self): CUDA_LIB64 = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "lib64") - cpu_info = self.available_vector_instructions() - SIMD_WIDTH = '' - if 'Intel' in cpu_info.get('VendorId', ''): - if cpu_info.get('AVX512f', False): - SIMD_WIDTH = '-D__AVX512__' - elif cpu_info.get('AVX2', False): - SIMD_WIDTH = '-D__AVX256__' + SIMD_WIDTH = self.simd_width() return [ '-O3', diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 575a30ff5568..5845cdff4452 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,7 +1,5 @@ torch>=1.2 torchvision>=0.4.0 tqdm -psutil tensorboardX==1.8 ninja -cpufeature