add import guard for nlp plugin

Signed-off-by: Yi Dong <[email protected]>
NVIDIA · Jan 26, 2022 · 2856e84 · 2856e84
1 parent f70542c
commit 2856e84
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 11 deletions.
diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py
@@ -12,6 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+try:
+    from apex.transformer import parallel_state
+
+    HAVE_APEX = True
+
+except (ImportError, ModuleNotFoundError):
+
+    HAVE_APEX = False
+
 import os
 import shutil
 import tempfile
@@ -35,15 +44,6 @@
 from nemo.core.optim import MasterOptimizerWrapper
 from nemo.utils import AppState, logging
 
-try:
-    from apex.transformer import parallel_state
-
-    HAVE_APEX = True
-
-except (ImportError, ModuleNotFoundError):
-
-    HAVE_APEX = False
-
 
 class NLPDDPPlugin(DDPPlugin):
     """ DDP plugin for Pytorch Lightning. Needed to customize DDP for model parallel models.

diff --git a/nemo/core/classes/modelPT.py b/nemo/core/classes/modelPT.py
@@ -35,6 +35,13 @@
 from nemo.utils.app_state import AppState
 from nemo.utils.get_rank import is_global_rank_zero
 
+try:
+    from nemo.collections.nlp.parts.nlp_overrides import NLPDDPPlugin
+
+    HAVE_NLPPLUGIN = True
+except (ImportError, ModuleNotFoundError):
+    HAVE_NLPPLUGIN = False
+
 __all__ = ['ModelPT']
 
 
@@ -458,7 +465,6 @@ def setup_optimization(self, optim_config: Optional[Union[DictConfig, Dict]] = N
             logging.warning(f"Trainer wasn't specified in model constructor. Make sure that you really wanted it.")
 
         if 'sched' in optim_config and self._trainer is not None:
-            from nemo.collections.nlp.parts.nlp_overrides import NLPDDPPlugin
 
             if not isinstance(self._trainer.accumulate_grad_batches, int):
                 raise ValueError("We do not currently support gradient acculumation that is not an integer.")
@@ -473,7 +479,7 @@ def setup_optimization(self, optim_config: Optional[Union[DictConfig, Dict]] = N
                     optim_config['sched']['t_num_workers'] = self._trainer.num_processes * self._trainer.num_nodes
                 elif self._trainer.accelerator == "ddp":
                     optim_config['sched']['t_num_workers'] = self._trainer.num_gpus * self._trainer.num_nodes
-                elif isinstance(self._trainer.accelerator.training_type_plugin, NLPDDPPlugin):
+                elif HAVE_NLPPLUGIN and isinstance(self._trainer.accelerator.training_type_plugin, NLPDDPPlugin):
                     app = AppState()
                     optim_config['sched']['t_num_workers'] = app.data_parallel_size
                 else: