Skip to content

Commit

Permalink
Fix os.environ.pop
Browse files Browse the repository at this point in the history
Signed-off-by: Guyue Huang <[email protected]>
  • Loading branch information
guyueh1 committed Jan 30, 2025
1 parent 83d35d5 commit 530719a
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions nemo/lightning/pytorch/callbacks/megatron_comm_overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,8 @@ def _set_num_cuda_device_max_connections(self):
"""
os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = "32"
else:
os.environ.pop('CUDA_DEVICE_MAX_CONNECTIONS')
if 'CUDA_DEVICE_MAX_CONNECTIONS' in os.environ:
os.environ.pop('CUDA_DEVICE_MAX_CONNECTIONS')
else:
if tp_size > 1 or cp_size > 1:
"""
Expand All @@ -244,7 +245,8 @@ def _set_num_cuda_device_max_connections(self):
"""
os.environ['CUDA_DEVICE_MAX_CONNECTIONS'] = "1"
else:
os.environ.pop('CUDA_DEVICE_MAX_CONNECTIONS')
if 'CUDA_DEVICE_MAX_CONNECTIONS' in os.environ:
os.environ.pop('CUDA_DEVICE_MAX_CONNECTIONS')

def setup(self, trainer: pl.Trainer, pl_module: pl.LightningModule, stage: str) -> None:
assert isinstance(trainer.strategy, MegatronStrategy), "MegatronCommOverlapCallback requires MegatronStrategy"
Expand Down

0 comments on commit 530719a

Please sign in to comment.