Skip to content

Commit

Permalink
Rename VaspDir to PMGDir and move it to pymatgen.io.common since it is
Browse files Browse the repository at this point in the history
not specific to VASP.
  • Loading branch information
shyuep committed Oct 28, 2024
1 parent 4b10dd3 commit 89969ba
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 139 deletions.
111 changes: 108 additions & 3 deletions src/pymatgen/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

from __future__ import annotations

import collections
import importlib
import itertools
import json
import os
import typing
import warnings
from copy import deepcopy
from pathlib import Path
from typing import TYPE_CHECKING

import numpy as np
Expand All @@ -18,9 +23,7 @@
from pymatgen.electronic_structure.core import Spin

if TYPE_CHECKING:
from pathlib import Path

from typing_extensions import Self
from typing_extensions import Any, Self


class VolumetricData(MSONable):
Expand Down Expand Up @@ -414,3 +417,105 @@ def from_cube(cls, filename: str | Path) -> Self:
(num_x_voxels, num_y_voxels, num_z_voxels),
)
return cls(structure=structure, data={"total": data})


class PMGDir(collections.abc.Mapping):
"""
User-friendly class to access all files in a directory as pymatgen objects in a dict. For now, only VASP files are
implemented but there is no reason why this cannot be extended to other types of files.
Note that the files are lazily parsed to minimize initialization costs since not all files will be needed by all
users.
Example:
```
d = PMGDir(".")
print(d["INCAR"]["NELM"])
print(d["vasprun.xml"].parameters)
```
"""

FILE_MAPPINGS: typing.ClassVar = {
n: f"pymatgen.io.vasp.{n.capitalize()}"
for n in [
"INCAR",
"POSCAR",
"KPOINTS",
"POTCAR",
"vasprun",
"OUTCAR",
"OSZICAR",
"CHGCAR",
"WAVECAR",
"WAVEDER",
"LOCPOT",
"XDATCAR",
"EIGENVAL",
"PROCAR",
"ELFCAR",
"DYNMAT",
]
} | {
"CONTCAR": "pymatgen.io.vasp.Poscar",
"IBZKPT": "pymatgen.io.vasp.Kpoints",
"WSWQ": "pymatgen.io.vasp.WSWQ",
}

def __init__(self, dirname: str | Path):
"""
Args:
dirname: The directory containing the VASP calculation as a string or Path.
"""
self.path = Path(dirname).absolute()
self.reset()

def reset(self):
"""
Reset all loaded files and recheck the directory for files. Use this when the contents of the directory has
changed.
"""
# Note that py3.12 has Path.walk(). But we need to use os.walk to ensure backwards compatibility for now.
self.files = [str(Path(d) / f).lstrip(str(self.path)) for d, _, fnames in os.walk(self.path) for f in fnames]
self._parsed_files: dict[str, Any] = {}

def __len__(self):
return len(self.files)

def __iter__(self):
return iter(self.files)

def __getitem__(self, item):
if item in self._parsed_files:
return self._parsed_files[item]
fpath = self.path / item

if not (self.path / item).exists():
raise ValueError(f"{item} not found in {self.path}. List of files are {self.files}.")

for k, cls_ in PMGDir.FILE_MAPPINGS.items():
if k in item:
modname, classname = cls_.rsplit(".", 1)
module = importlib.import_module(modname)
class_ = getattr(module, classname)
try:
self._parsed_files[item] = class_.from_file(fpath)
except AttributeError:
self._parsed_files[item] = class_(fpath)

return self._parsed_files[item]

warnings.warn(
f"No parser defined for {item}. Contents are returned as a string.",
UserWarning,
)
with zopen(fpath, "rt") as f:
return f.read()

def get_files_by_name(self, name: str) -> dict[str, Any]:
"""
Returns all files with a given name. E.g., if you want all the OUTCAR files, set name="OUTCAR".
Returns:
{filename: object from PMGDir[filename]}
"""
return {f: self[f] for f in self.files if name in f}
1 change: 0 additions & 1 deletion src/pymatgen/io/vasp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
Oszicar,
Outcar,
Procar,
VaspDir,
Vasprun,
VolumetricData,
Wavecar,
Expand Down
100 changes: 0 additions & 100 deletions src/pymatgen/io/vasp/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@

from __future__ import annotations

import collections
import itertools
import math
import os
import re
import typing
import warnings
import xml.etree.ElementTree as ET
from collections import defaultdict
Expand Down Expand Up @@ -5749,101 +5747,3 @@ def from_file(cls, filename: str) -> Self:

class UnconvergedVASPWarning(Warning):
"""Warning for unconverged VASP run."""


class VaspDir(collections.abc.Mapping):
"""
User-friendly class to access all files in a VASP calculation directory as pymatgen objects in a dict.
Note that the files are lazily parsed to minimize initialization costs since not all files will be needed by all
users.
Example:
```
d = VaspDir(".")
print(d["INCAR"]["NELM"])
print(d["vasprun.xml"].parameters)
```
"""

FILE_MAPPINGS: typing.ClassVar = {
n: globals()[n.capitalize()]
for n in [
"INCAR",
"POSCAR",
"KPOINTS",
"POTCAR",
"vasprun",
"OUTCAR",
"OSZICAR",
"CHGCAR",
"WAVECAR",
"WAVEDER",
"LOCPOT",
"XDATCAR",
"EIGENVAL",
"PROCAR",
"ELFCAR",
"DYNMAT",
]
} | {
"CONTCAR": Poscar,
"IBZKPT": Kpoints,
"WSWQ": WSWQ,
}

def __init__(self, dirname: str | Path):
"""
Args:
dirname: The directory containing the VASP calculation as a string or Path.
"""
self.path = Path(dirname).absolute()
self.reset()

def reset(self):
"""
Reset all loaded files and recheck the directory for files. Use this when the contents of the directory has
changed.
"""
# Note that py3.12 has Path.walk(). But we need to use os.walk to ensure backwards compatibility for now.
self.files = [str(Path(d) / f).lstrip(str(self.path)) for d, _, fnames in os.walk(self.path) for f in fnames]
self._parsed_files: dict[str, Any] = {}

def __len__(self):
return len(self.files)

def __iter__(self):
return iter(self.files)

def __getitem__(self, item):
if item in self._parsed_files:
return self._parsed_files[item]
fpath = self.path / item

if not (self.path / item).exists():
raise ValueError(f"{item} not found in {self.path}. List of files are {self.files}.")

for k, cls_ in VaspDir.FILE_MAPPINGS.items():
if k in item:
try:
self._parsed_files[item] = cls_.from_file(fpath)
except AttributeError:
self._parsed_files[item] = cls_(fpath)

return self._parsed_files[item]

warnings.warn(
f"No parser defined for {item}. Contents are returned as a string.",
UserWarning,
)
with zopen(fpath, "rt") as f:
return f.read()

def get_files_by_name(self, name: str) -> dict[str, Any]:
"""
Returns all files with a given name. E.g., if you want all the OUTCAR files, set name="OUTCAR".
Returns:
{filename: object from VaspDir[filename]}
"""
return {f: self[f] for f in self.files if name in f}
38 changes: 37 additions & 1 deletion tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

from typing import TYPE_CHECKING

from pymatgen.io.common import VolumetricData
import pytest

from pymatgen.io.common import PMGDir, VolumetricData
from pymatgen.util.testing import TEST_FILES_DIR

if TYPE_CHECKING:
Expand All @@ -20,3 +22,37 @@ def test_cube_io_faithful(tmp_path: Path) -> None:
# structure should be preserved round-trip to/from cube file
assert cube_file.structure.volume == out_cube.structure.volume
assert cube_file.structure == out_cube.structure


class TestPMGDir:
def test_getitem(self):
# Some simple testing of loading and reading since all these were tested in other classes.
d = PMGDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/relaxation")
assert len(d) == 5
assert d["OUTCAR"].run_stats["cores"] == 8

d = PMGDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/scan_relaxation")
assert len(d) == 2
assert d["vasprun.xml.gz"].incar["METAGGA"] == "R2scan"

with pytest.raises(ValueError, match="hello not found"):
d["hello"]

d = PMGDir(f"{TEST_FILES_DIR}/io/pwscf")
with pytest.warns(UserWarning, match=r"No parser defined for Si.pwscf.out"):
assert isinstance(d["Si.pwscf.out"], str)

# Test NEB directories.
d = PMGDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/neb_analysis/neb1/neb")

assert len(d) == 10
from pymatgen.io.vasp import Poscar

assert isinstance(d["00/POSCAR"], Poscar)

outcars = d.get_files_by_name("OUTCAR")
assert len(outcars) == 5
assert all("OUTCAR" for k in outcars)

d.reset()
assert len(d._parsed_files) == 0
34 changes: 0 additions & 34 deletions tests/io/vasp/test_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
Outcar,
Procar,
UnconvergedVASPWarning,
VaspDir,
VaspParseError,
Vasprun,
Wavecar,
Expand Down Expand Up @@ -2183,36 +2182,3 @@ def test_consistency(self):
assert np.linalg.norm([r, i]) > 0.999
else:
assert np.linalg.norm([r, i]) < 0.001


class TestVaspDir(PymatgenTest):
def test_getitem(self):
# Some simple testing of loading and reading since all these were tested in other classes.
d = VaspDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/relaxation")
assert len(d) == 5
assert d["OUTCAR"].run_stats["cores"] == 8

d = VaspDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/scan_relaxation")
assert len(d) == 2
assert d["vasprun.xml.gz"].incar["METAGGA"] == "R2scan"

with pytest.raises(ValueError, match="hello not found"):
d["hello"]

d = VaspDir(f"{TEST_FILES_DIR}/io/pwscf")
with pytest.warns(UserWarning, match=r"No parser defined for Si.pwscf.out"):
assert isinstance(d["Si.pwscf.out"], str)

# Test NEB directories.
d = VaspDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/neb_analysis/neb1/neb")

assert len(d) == 10

assert isinstance(d["00/POSCAR"], Poscar)

outcars = d.get_files_by_name("OUTCAR")
assert len(outcars) == 5
assert all("OUTCAR" for k in outcars)

d.reset()
assert len(d._parsed_files) == 0

0 comments on commit 89969ba

Please sign in to comment.