Skip to content

Commit

Permalink
Fix rank where torch.distributed may not be initialized yet and would…
Browse files Browse the repository at this point in the history
… not wait for tokenizer file caching (#7061)

Signed-off-by: Kim Ngo <[email protected]>
Co-authored-by: David <[email protected]>
Signed-off-by: jubick1337 <[email protected]>
  • Loading branch information
2 people authored and jubick1337 committed Aug 8, 2023
1 parent eea78b2 commit 9643081
Showing 1 changed file with 2 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import wget
from torch.hub import _get_torch_home

from nemo.utils import get_rank, logging
from nemo.utils import logging

__all__ = [
"get_megatron_lm_model",
Expand Down Expand Up @@ -203,7 +203,7 @@ def _download(path: str, url: str):
if url is None:
return None

if get_rank.is_global_rank_zero() and not os.path.exists(path):
if (not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0) and not os.path.exists(path):
os.makedirs(MEGATRON_CACHE, exist_ok=True)
logging.info(f"Downloading from {url} to {path}")
downloaded_path = wget.download(url)
Expand Down

0 comments on commit 9643081

Please sign in to comment.