Skip to content

Commit

Permalink
Remove compression from .nemo files (#3626)
Browse files Browse the repository at this point in the history
Signed-off-by: Oleksii Kuchaiev <[email protected]>

Co-authored-by: Somshubra Majumdar <[email protected]>
Co-authored-by: Eric Harper <[email protected]>
  • Loading branch information
3 people authored and fayejf committed Mar 2, 2022
1 parent 7290a93 commit 45355e9
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions nemo/core/connectors/save_restore_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,14 +384,23 @@ def _inject_model_parallel_rank_for_ckpt(self, dirname, basename):
def _make_nemo_file_from_folder(filename, source_dir):
dirname = os.path.dirname(filename)
os.makedirs(dirname, exist_ok=True)
with tarfile.open(filename, "w:gz") as tar:
with tarfile.open(filename, "w:") as tar:
tar.add(source_dir, arcname=".")

@staticmethod
def _unpack_nemo_file(path2file: str, out_folder: str) -> str:
if not os.path.exists(path2file):
raise FileNotFoundError(f"{path2file} does not exist")
tar = tarfile.open(path2file, "r:gz")
# we start with an assumption of uncompressed tar,
# which should be true for versions 1.7.0 and above
tar_header = "r:"
try:
tar_test = tarfile.open(path2file, tar_header)
tar_test.close()
except tarfile.ReadError:
# can be older checkpoint => try compressed tar
tar_header = "r:gz"
tar = tarfile.open(path2file, tar_header)
tar.extractall(path=out_folder)
tar.close()
return out_folder
Expand Down

0 comments on commit 45355e9

Please sign in to comment.