diff --git a/deepspeed/utils/timer.py b/deepspeed/utils/timer.py
index 6625e3cd8961..39f5c8be8fff 100755
--- a/deepspeed/utils/timer.py
+++ b/deepspeed/utils/timer.py
@@ -3,11 +3,17 @@
 '''
 
 import time
-import psutil
 import torch
 
 from deepspeed.utils import logger
 
+try:
+    import psutil
+    PSUTILS_INSTALLED = True
+except ImportError:
+    PSUTILS_INSTALLED = False
+    pass
+
 
 def print_rank_0(message):
     if torch.distributed.is_initialized():
@@ -103,7 +109,7 @@ def __init__(self,
                  num_workers,
                  start_step=2,
                  steps_per_output=50,
-                 monitor_memory=True,
+                 monitor_memory=False,
                  logging_fn=None):
         self.start_time = 0
         self.end_time = 0
@@ -124,6 +130,9 @@ def __init__(self,
             self.logging = logger.info
         self.initialized = False
 
+        if self.monitor_memory and not PSUTILS_INSTALLED:
+            raise ImportError("Unable to import 'psutils', please install package")
+
     def update_epoch_count(self):
         self.epoch_count += 1
         self.local_step_count = 0
diff --git a/op_builder/cpu_adam.py b/op_builder/cpu_adam.py
index d391301897e3..20f8fe2d8b6f 100644
--- a/op_builder/cpu_adam.py
+++ b/op_builder/cpu_adam.py
@@ -1,6 +1,6 @@
 import os
 import torch
-import warnings
+import subprocess
 from .builder import CUDAOpBuilder
 
 
@@ -21,35 +21,26 @@ def include_paths(self):
         CUDA_INCLUDE = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "include")
         return ['csrc/includes', CUDA_INCLUDE]
 
-    def available_vector_instructions(self):
-        try:
-            import cpufeature
-        except ImportError:
-            warnings.warn(
-                f'import cpufeature failed - CPU vector optimizations are not available for CPUAdam'
-            )
-            return {}
+    def simd_width(self):
+        if not self.command_exists('lscpu'):
+            self.warning(
+                "CPUAdam attempted to query 'lscpu' to detect the existence "
+                "of AVX instructions. However, 'lscpu' does not appear to exist on "
+                "your system, will fall back to non-vectorized execution.")
+            return ''
 
-        cpu_vector_instructions = {}
-        try:
-            cpu_vector_instructions = cpufeature.CPUFeature
-        except _:
-            warnings.warn(
-                f'cpufeature.CPUFeature failed - CPU vector optimizations are not available for CPUAdam'
-            )
-            return {}
-
-        return cpu_vector_instructions
+        result = subprocess.check_output('lscpu', shell=True)
+        result = result.decode('utf-8').strip().lower()
+        if 'genuineintel' in result:
+            if 'avx512' in result:
+                return '-D__AVX512__'
+            elif 'avx2' in result:
+                return '-D__AVX256__'
+        return ''
 
     def cxx_args(self):
         CUDA_LIB64 = os.path.join(torch.utils.cpp_extension.CUDA_HOME, "lib64")
-        cpu_info = self.available_vector_instructions()
-        SIMD_WIDTH = ''
-        if 'Intel' in cpu_info.get('VendorId', ''):
-            if cpu_info.get('AVX512f', False):
-                SIMD_WIDTH = '-D__AVX512__'
-            elif cpu_info.get('AVX2', False):
-                SIMD_WIDTH = '-D__AVX256__'
+        SIMD_WIDTH = self.simd_width()
 
         return [
             '-O3',
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 575a30ff5568..5845cdff4452 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -1,7 +1,5 @@
 torch>=1.2
 torchvision>=0.4.0
 tqdm
-psutil
 tensorboardX==1.8
 ninja
-cpufeature