From b9e4441524ca7d8affaf4967a6b6a190bdbdd271 Mon Sep 17 00:00:00 2001
From: Jason <jasoli@nvidia.com>
Date: Wed, 27 May 2020 17:27:57 -0700
Subject: [PATCH] clean code for comments

Signed-off-by: Jason <jasoli@nvidia.com>
---
 examples/asr/jasper_an4.py       |  1 -
 nemo/backends/pytorch/actions.py | 66 ++------------------------------
 nemo/core/callbacks.py           |  2 -
 nemo/utils/nemo_logging.py       |  4 --
 4 files changed, 4 insertions(+), 69 deletions(-)

diff --git a/examples/asr/jasper_an4.py b/examples/asr/jasper_an4.py
index 40172008c9da..888d046ef936 100644
--- a/examples/asr/jasper_an4.py
+++ b/examples/asr/jasper_an4.py
@@ -238,7 +238,6 @@ def main():
         # Delete old graph and make a new one
         del g0
         nf.reset_trainer()
-        # [print(p) for p in nemo.utils.app_state.AppState().modules]
         loss, eval_tensors, callbacks, total_steps, _, _, new_g = create_dags(args.model_config, vocab, args, nf)
 
         nf.train(
diff --git a/nemo/backends/pytorch/actions.py b/nemo/backends/pytorch/actions.py
index d3e3261d5e55..95d3a9d1736b 100644
--- a/nemo/backends/pytorch/actions.py
+++ b/nemo/backends/pytorch/actions.py
@@ -30,8 +30,6 @@
 
 # these imports will happen on as-needed basis
 amp = None
-# convert_syncbn = None
-# create_syncbn_process_group = None
 LARC = None
 FusedLAMB = None
 FusedAdam = None
@@ -63,16 +61,12 @@ def __init__(
                     global amp
                     amp = importlib.import_module('apex.amp')
                 if local_rank is not None:
-                    # global convert_syncbn
-                    # global create_syncbn_process_group
                     global LARC
                     global FusedLAMB
                     global FusedAdam
                     global FusedNovoGrad
                     parallel = importlib.import_module('apex.parallel')
                     apex_optimizer = importlib.import_module('apex.optimizers')
-                    # convert_syncbn = parallel.convert_syncbn_model
-                    # create_syncbn_process_group = parallel.create_syncbn_process_group
                     LARC = parallel.LARC
                     FusedLAMB = apex_optimizer.FusedLAMB
                     FusedAdam = apex_optimizer.FusedAdam
@@ -150,12 +144,6 @@ def __get_top_sorted_modules_and_dataloader(self, hook: List[NmTensor]):
                     "distributed mode. Please instantiate NeuralModuleFactory first and pass its instance as "
                     "`factory` parameter to all your Neural Module objects.".format(str(m[0]))
                 )
-            # key = m[0].unique_instance_id
-            # if key not in self.module_reference_table:
-            #     if isinstance(m[0], TrainableNeuralModuleWrapper):
-            #         self.module_reference_table[key] = (m[0], m[0]._pt_module)
-            #     else:
-            #         self.module_reference_table[key] = (m[0], m[0])
 
         return top_sorted_modules, tdataset
 
@@ -349,18 +337,9 @@ def __nm_graph_forward_pass(
                 if in_cache:
                     continue
             call_args = call_chain[ind][1]
-            # module = call_chain[ind][0]
-            # pmodule = self.module_reference_table[m_id][1]
             m_id = call_chain[ind][0].unique_instance_id
             pmodule = self.ddp_module_dict[m_id] if self.ddp_initialized else call_chain[ind][0]
 
-            # if self._local_rank is not None:
-            #     if isinstance(pmodule, DDP):
-            #         if disable_allreduce:
-            #             pmodule.disable_allreduce()
-            #         else:
-            #             pmodule.enable_allreduce()
-
             if mode == OperationMode.training:
                 # if module.is_trainable():
                 if isinstance(pmodule, nn.Module):
@@ -374,14 +353,8 @@ def __nm_graph_forward_pass(
             # prepare call signature for `module`
             call_set = {}
             for tensor_name, nmtensor in call_args.items():
-                # _add_uuid_2_name(nmtensor.name, nmtensor.producer._uuid)
                 key = nmtensor.unique_name
                 call_set[tensor_name] = registered_tensors[key]
-            # actual PyTorch module call with signature
-            # if isinstance(self.module_reference_table[m_id][0], TrainableNeuralModuleWrapper,):
-            #     new_tensors = pmodule(**call_set)
-            # else:
-            #     new_tensors = pmodule(force_pt=True, **call_set)
             new_tensors = pmodule(force_pt=True, **call_set)
 
             if not isinstance(new_tensors, List):
@@ -462,11 +435,6 @@ def _eval(self, tensors_2_evaluate, callback, step, verbose=False):
                 assert dist.is_initialized()
                 is_distributed = True
                 world_size = torch.distributed.get_world_size()
-                # logging.info(
-                #     "Doing distributed evaluation. Rank {0} of {1}".format(
-                #         self.local_rank, world_size
-                #     )
-                # )
 
                 if dl_nm.dataset is not None:
                     sampler = None
@@ -638,11 +606,6 @@ def _infer(
                 assert dist.is_initialized()
                 is_distributed = True
                 world_size = torch.distributed.get_world_size()
-                # logging.info(
-                #     "Doing distributed evaluation. Rank {0} of {1}".format(
-                #         self.local_rank, world_size
-                #     )
-                # )
                 if dl_nm.dataset is not None:
                     sampler = None
                     if not isinstance(dl_nm.dataset, torch.utils.data.IterableDataset):
@@ -729,12 +692,6 @@ def _infer(
                     use_cache=use_cache,
                 )
 
-                # if offload_to_cpu:
-                #     # Take all cuda tensors and save them to value_dict as
-                #     # cpu tensors to save GPU memory
-                #     for name, tensor in registered_e_tensors.items():
-                #         if isinstance(tensor, torch.Tensor):
-                #             registered_e_tensors[name] = tensor.cpu()
                 if cache:
                     self.append_to_cache(registered_e_tensors, offload_to_cpu)
 
@@ -913,10 +870,10 @@ def __extract_dynamic_axes(port_name: str, ntype: NeuralType, dynamic_axes: defa
 
         module.eval()
         try:
-            # # Remove NeMo-related things from the module
-            # # We need to change __call__ method. Note that this will change the
-            # # whole class, not just this object! Which is why we need to repair it
-            # # in the finally block
+            # Remove NeMo-related things from the module
+            # We need to change __call__ method. Note that this will change the
+            # whole class, not just this object! Which is why we need to repair it
+            # in the finally block
             __orig_call__ = type(module).__call__
             type(module).__call__ = torch.nn.Module.__call__
 
@@ -1313,10 +1270,6 @@ def save_state_to(self, path):
         dataNM = training_loop[0][2][0][0]
         placement_gpu = dataNM.placement == DeviceType.AllGpu
         if placement_gpu:
-            # if len(training_loop) > 1:
-            #     raise NotImplementedError(
-            #         "Distributed training does nor work with multiple "
-            #         "optimizers")
             logging.info("Doing distributed training")
             if t_dataset is not None:
                 train_sampler = None
@@ -1341,12 +1294,6 @@ def save_state_to(self, path):
                 else:
                     train_sampler = None
 
-            # for train_iter in training_loop:
-            #     call_chain = train_iter[2]
-            #     for i in range(1, len(call_chain) - 1):
-            #         key = call_chain[i][0].unique_instance_id
-            #         pmodule = self.module_reference_table[key][1]
-            #         num_trainable_weights = self.module_reference_table[key][1].num_weights
             self.ddp_initialized = True
             module_list = [mod.name for mod in AppState().modules]
             module_list = sorted(module_list)
@@ -1356,11 +1303,6 @@ def save_state_to(self, path):
                 num_trainable_weights = module.num_weights
                 self.ddp_module_dict[key] = module
                 if not isinstance(module, DDP) and isinstance(module, torch.nn.Module) and num_trainable_weights > 0:
-                    # gpf = 1
-                    # if gradient_predivide:
-                    #     gpf = dist.get_world_size()
-                    # pmodule = DDP(pmodule, gradient_predivide_factor=gpf)  # Old Apex Method
-
                     # Per pytorch docs, convert sync bn prior to DDP
                     if synced_batchnorm:
                         world_size = dist.get_world_size()
diff --git a/nemo/core/callbacks.py b/nemo/core/callbacks.py
index 4b9826e9b6c1..d79eb23536db 100644
--- a/nemo/core/callbacks.py
+++ b/nemo/core/callbacks.py
@@ -469,7 +469,6 @@ def __restore_from(self, path, state):
             try:
                 trainer_checkpoints = get_checkpoint_from_dir(["trainer"], path)
                 state.restore_state_from(trainer_checkpoints[0])
-                # for tr, checkpoint in zip([self.action], trainer_checkpoints):
             except (ValueError) as e:
                 logging.warning(e)
                 logging.warning(
@@ -891,7 +890,6 @@ def on_iteration_start(self):
             setattr(self.module, self.arg_name, value)
             if self.tb_writer is not None:
                 class_name = self.module.__class__.__name__
-                # name = f'param/{class_name}.{self.arg_name}'
                 name = f"param/{class_name}.{self.arg_name}"
                 self.tb_writer.add_scalar(name, value, self.step)
         else:
diff --git a/nemo/utils/nemo_logging.py b/nemo/utils/nemo_logging.py
index ee5cb0f6ee4d..7fed7ff0c5c3 100644
--- a/nemo/utils/nemo_logging.py
+++ b/nemo/utils/nemo_logging.py
@@ -366,7 +366,3 @@ def critical(self, msg, *args, mode=LogMode.EACH, **kwargs):
             and not self._logged_once(msg, mode)
         ):
             self._logger._log(Logger.CRITICAL, msg, args, **kwargs)
-
-
-# # Necessary to catch the correct caller
-# _logging._srcfile = os.path.normcase(inspect.getfile(Logger.__class__))