Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small change to gunzip to allow better restarting #476

Merged
merged 6 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 44 additions & 8 deletions src/atomate2/utils/file_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def gzip(
path: str | Path,
host: str | None = None,
compresslevel: int = 6,
force: bool = False,
force: bool | str = False,
):
"""
Gzip a file.
Expand All @@ -367,7 +367,12 @@ def gzip(
compresslevel : bool
Level of compression, 1-9. 9 is default for GzipFile, 6 is default for gzip.
force : bool
Overwrite gzipped file if it already exists.
How to handle writing a gzipped file if it already exists. Accepts
either a string or bool:

- `"force"` or `True`: Overwrite gzipped file if it already exists.
- `"raise"` or `False`: Raise an error if file already exists.
- `"skip"` Skip file if it already exists.
"""
path = self.abspath(path, host=host)
path_gz = path.parent / f"{path.name}.gz"
Expand All @@ -380,8 +385,21 @@ def gzip(
warnings.warn(f"{path} is a directory, skipping...", stacklevel=1)
return

if self.exists(path_gz, host=host) and not force:
raise FileExistsError(f"{path_gz} file already exists.")
if self.exists(path_gz, host=host):
if force is False or force == "raise":
raise FileExistsError(f"{path_gz} file already exists")
if force is True or force == "force":
pass
elif force == "skip":
warnings.warn(
f"{path_gz} file already exists, skipping...", stacklevel=2
)
return
else:
raise ValueError(
f"Invalid value for force: {force} "
"(must be True, False, 'raise', 'force', or 'skip'))"
)

if host is None:
with open(path, "rb") as f_in, GzipFile(
Expand All @@ -398,7 +416,7 @@ def gunzip(
self,
path: str | Path,
host: str | None = None,
force: bool = False,
force: bool | str = False,
):
"""
Ungzip a file.
Expand All @@ -410,7 +428,12 @@ def gunzip(
host : str or None
A remote file system host on which to perform file operations.
force : bool
Overwrite non-gzipped file if it already exists.
How to handle writing a non-gzipped file if it already exists. Accepts
either a string or bool:

- `"force"` or `True`: Overwrite non-gzipped file if it already exists.
- `"raise"` or `False`: Raise an error if file already exists.
- `"skip"` Skip file if it already exists.
"""
path = self.abspath(path, host=host)
path_nongz = path.with_suffix("")
Expand All @@ -419,8 +442,21 @@ def gunzip(
warnings.warn(f"{path} is not gzipped, skipping...", stacklevel=2)
return

if self.exists(path_nongz, host=host) and not force:
raise FileExistsError(f"{path_nongz} file already exists")
if self.exists(path_nongz, host=host):
if force is False or force == "raise":
raise FileExistsError(f"{path_nongz} file already exists")
if force is True or force == "force":
pass
elif force == "skip":
warnings.warn(
f"{path_nongz} file already exists, skipping...", stacklevel=2
)
return
else:
raise ValueError(
f"Invalid value for force: {force} "
"(must be True, False, 'raise', 'force', or 'skip'))"
)

if host is None:
with open(path_nongz, "wb") as f_out, zopen(path, "rb") as f_in:
Expand Down
11 changes: 8 additions & 3 deletions src/atomate2/vasp/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def copy_vasp_outputs(
src_host: str | None = None,
additional_vasp_files: Sequence[str] = (),
contcar_to_poscar: bool = True,
force_overwrite: bool = False,
force_overwrite: bool | str = False,
file_client: FileClient | None = None,
):
"""
Expand All @@ -53,8 +53,13 @@ def copy_vasp_outputs(
Additional files to copy, e.g. ["CHGCAR", "WAVECAR"].
contcar_to_poscar : bool
Move CONTCAR to POSCAR (original POSCAR is not copied).
force_overwrite : bool
If True, overwrite existing files during the copy step.
force_overwrite : bool or str
How to handle overwriting existing files during the copy step. Accepts
either a string or bool:

- `"force"` or `True`: Overwrite existing files if they already exist.
- `"raise"` or `False`: Raise an error if files already exist.
- `"skip"` Skip files they already exist.
file_client : .FileClient
A file client to use for performing file operations.
"""
Expand Down
30 changes: 30 additions & 0 deletions tests/common/test_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
def test_gunzip_force_overwrites(tmp_path):
from atomate2.common.files import gunzip_files, gzip_files

files = ["file1", "file2", "file3"]
for fname in files:
f = tmp_path / fname
f.write_text(fname)
gzip_files(tmp_path)

for fname in files:
f = tmp_path / fname
f.write_text(f"{fname} overwritten")
# "file1" in the zipped files and "file1 overwritten" in the unzipped files
gunzip_files(tmp_path, force=True)

for fname in files:
f = tmp_path / fname
assert f.read_text() == fname

gzip_files(tmp_path)

for fname in files:
f = tmp_path / fname
f.write_text(f"{fname} overwritten")

# "file1" in the zipped files and "file1 overwritten" in the unzipped files
gunzip_files(tmp_path, force="skip")
for fname in files:
f = tmp_path / fname
assert f.read_text() == f"{fname} overwritten"