From 89969babff1988e7d669a8f7695c375497c110ca Mon Sep 17 00:00:00 2001 From: Shyue Ping Ong Date: Mon, 28 Oct 2024 16:56:27 -0700 Subject: [PATCH] Rename VaspDir to PMGDir and move it to pymatgen.io.common since it is not specific to VASP. --- src/pymatgen/io/common.py | 111 ++++++++++++++++++++++++++++++- src/pymatgen/io/vasp/__init__.py | 1 - src/pymatgen/io/vasp/outputs.py | 100 ---------------------------- tests/io/test_common.py | 38 ++++++++++- tests/io/vasp/test_outputs.py | 34 ---------- 5 files changed, 145 insertions(+), 139 deletions(-) diff --git a/src/pymatgen/io/common.py b/src/pymatgen/io/common.py index 7dbfc2c708d..74f5cece0ab 100644 --- a/src/pymatgen/io/common.py +++ b/src/pymatgen/io/common.py @@ -2,10 +2,15 @@ from __future__ import annotations +import collections +import importlib import itertools import json +import os +import typing import warnings from copy import deepcopy +from pathlib import Path from typing import TYPE_CHECKING import numpy as np @@ -18,9 +23,7 @@ from pymatgen.electronic_structure.core import Spin if TYPE_CHECKING: - from pathlib import Path - - from typing_extensions import Self + from typing_extensions import Any, Self class VolumetricData(MSONable): @@ -414,3 +417,105 @@ def from_cube(cls, filename: str | Path) -> Self: (num_x_voxels, num_y_voxels, num_z_voxels), ) return cls(structure=structure, data={"total": data}) + + +class PMGDir(collections.abc.Mapping): + """ + User-friendly class to access all files in a directory as pymatgen objects in a dict. For now, only VASP files are + implemented but there is no reason why this cannot be extended to other types of files. + Note that the files are lazily parsed to minimize initialization costs since not all files will be needed by all + users. + + Example: + + ``` + d = PMGDir(".") + print(d["INCAR"]["NELM"]) + print(d["vasprun.xml"].parameters) + ``` + """ + + FILE_MAPPINGS: typing.ClassVar = { + n: f"pymatgen.io.vasp.{n.capitalize()}" + for n in [ + "INCAR", + "POSCAR", + "KPOINTS", + "POTCAR", + "vasprun", + "OUTCAR", + "OSZICAR", + "CHGCAR", + "WAVECAR", + "WAVEDER", + "LOCPOT", + "XDATCAR", + "EIGENVAL", + "PROCAR", + "ELFCAR", + "DYNMAT", + ] + } | { + "CONTCAR": "pymatgen.io.vasp.Poscar", + "IBZKPT": "pymatgen.io.vasp.Kpoints", + "WSWQ": "pymatgen.io.vasp.WSWQ", + } + + def __init__(self, dirname: str | Path): + """ + Args: + dirname: The directory containing the VASP calculation as a string or Path. + """ + self.path = Path(dirname).absolute() + self.reset() + + def reset(self): + """ + Reset all loaded files and recheck the directory for files. Use this when the contents of the directory has + changed. + """ + # Note that py3.12 has Path.walk(). But we need to use os.walk to ensure backwards compatibility for now. + self.files = [str(Path(d) / f).lstrip(str(self.path)) for d, _, fnames in os.walk(self.path) for f in fnames] + self._parsed_files: dict[str, Any] = {} + + def __len__(self): + return len(self.files) + + def __iter__(self): + return iter(self.files) + + def __getitem__(self, item): + if item in self._parsed_files: + return self._parsed_files[item] + fpath = self.path / item + + if not (self.path / item).exists(): + raise ValueError(f"{item} not found in {self.path}. List of files are {self.files}.") + + for k, cls_ in PMGDir.FILE_MAPPINGS.items(): + if k in item: + modname, classname = cls_.rsplit(".", 1) + module = importlib.import_module(modname) + class_ = getattr(module, classname) + try: + self._parsed_files[item] = class_.from_file(fpath) + except AttributeError: + self._parsed_files[item] = class_(fpath) + + return self._parsed_files[item] + + warnings.warn( + f"No parser defined for {item}. Contents are returned as a string.", + UserWarning, + ) + with zopen(fpath, "rt") as f: + return f.read() + + def get_files_by_name(self, name: str) -> dict[str, Any]: + """ + Returns all files with a given name. E.g., if you want all the OUTCAR files, set name="OUTCAR". + + Returns: + {filename: object from PMGDir[filename]} + """ + return {f: self[f] for f in self.files if name in f} diff --git a/src/pymatgen/io/vasp/__init__.py b/src/pymatgen/io/vasp/__init__.py index 8d5376116e0..59ca377e99e 100644 --- a/src/pymatgen/io/vasp/__init__.py +++ b/src/pymatgen/io/vasp/__init__.py @@ -17,7 +17,6 @@ Oszicar, Outcar, Procar, - VaspDir, Vasprun, VolumetricData, Wavecar, diff --git a/src/pymatgen/io/vasp/outputs.py b/src/pymatgen/io/vasp/outputs.py index 921aa71a6a4..7d74758bf89 100644 --- a/src/pymatgen/io/vasp/outputs.py +++ b/src/pymatgen/io/vasp/outputs.py @@ -2,12 +2,10 @@ from __future__ import annotations -import collections import itertools import math import os import re -import typing import warnings import xml.etree.ElementTree as ET from collections import defaultdict @@ -5749,101 +5747,3 @@ def from_file(cls, filename: str) -> Self: class UnconvergedVASPWarning(Warning): """Warning for unconverged VASP run.""" - - -class VaspDir(collections.abc.Mapping): - """ - User-friendly class to access all files in a VASP calculation directory as pymatgen objects in a dict. - Note that the files are lazily parsed to minimize initialization costs since not all files will be needed by all - users. - - Example: - - ``` - d = VaspDir(".") - print(d["INCAR"]["NELM"]) - print(d["vasprun.xml"].parameters) - ``` - """ - - FILE_MAPPINGS: typing.ClassVar = { - n: globals()[n.capitalize()] - for n in [ - "INCAR", - "POSCAR", - "KPOINTS", - "POTCAR", - "vasprun", - "OUTCAR", - "OSZICAR", - "CHGCAR", - "WAVECAR", - "WAVEDER", - "LOCPOT", - "XDATCAR", - "EIGENVAL", - "PROCAR", - "ELFCAR", - "DYNMAT", - ] - } | { - "CONTCAR": Poscar, - "IBZKPT": Kpoints, - "WSWQ": WSWQ, - } - - def __init__(self, dirname: str | Path): - """ - Args: - dirname: The directory containing the VASP calculation as a string or Path. - """ - self.path = Path(dirname).absolute() - self.reset() - - def reset(self): - """ - Reset all loaded files and recheck the directory for files. Use this when the contents of the directory has - changed. - """ - # Note that py3.12 has Path.walk(). But we need to use os.walk to ensure backwards compatibility for now. - self.files = [str(Path(d) / f).lstrip(str(self.path)) for d, _, fnames in os.walk(self.path) for f in fnames] - self._parsed_files: dict[str, Any] = {} - - def __len__(self): - return len(self.files) - - def __iter__(self): - return iter(self.files) - - def __getitem__(self, item): - if item in self._parsed_files: - return self._parsed_files[item] - fpath = self.path / item - - if not (self.path / item).exists(): - raise ValueError(f"{item} not found in {self.path}. List of files are {self.files}.") - - for k, cls_ in VaspDir.FILE_MAPPINGS.items(): - if k in item: - try: - self._parsed_files[item] = cls_.from_file(fpath) - except AttributeError: - self._parsed_files[item] = cls_(fpath) - - return self._parsed_files[item] - - warnings.warn( - f"No parser defined for {item}. Contents are returned as a string.", - UserWarning, - ) - with zopen(fpath, "rt") as f: - return f.read() - - def get_files_by_name(self, name: str) -> dict[str, Any]: - """ - Returns all files with a given name. E.g., if you want all the OUTCAR files, set name="OUTCAR". - - Returns: - {filename: object from VaspDir[filename]} - """ - return {f: self[f] for f in self.files if name in f} diff --git a/tests/io/test_common.py b/tests/io/test_common.py index b340830e50c..25747c3e8d4 100644 --- a/tests/io/test_common.py +++ b/tests/io/test_common.py @@ -2,7 +2,9 @@ from typing import TYPE_CHECKING -from pymatgen.io.common import VolumetricData +import pytest + +from pymatgen.io.common import PMGDir, VolumetricData from pymatgen.util.testing import TEST_FILES_DIR if TYPE_CHECKING: @@ -20,3 +22,37 @@ def test_cube_io_faithful(tmp_path: Path) -> None: # structure should be preserved round-trip to/from cube file assert cube_file.structure.volume == out_cube.structure.volume assert cube_file.structure == out_cube.structure + + +class TestPMGDir: + def test_getitem(self): + # Some simple testing of loading and reading since all these were tested in other classes. + d = PMGDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/relaxation") + assert len(d) == 5 + assert d["OUTCAR"].run_stats["cores"] == 8 + + d = PMGDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/scan_relaxation") + assert len(d) == 2 + assert d["vasprun.xml.gz"].incar["METAGGA"] == "R2scan" + + with pytest.raises(ValueError, match="hello not found"): + d["hello"] + + d = PMGDir(f"{TEST_FILES_DIR}/io/pwscf") + with pytest.warns(UserWarning, match=r"No parser defined for Si.pwscf.out"): + assert isinstance(d["Si.pwscf.out"], str) + + # Test NEB directories. + d = PMGDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/neb_analysis/neb1/neb") + + assert len(d) == 10 + from pymatgen.io.vasp import Poscar + + assert isinstance(d["00/POSCAR"], Poscar) + + outcars = d.get_files_by_name("OUTCAR") + assert len(outcars) == 5 + assert all("OUTCAR" for k in outcars) + + d.reset() + assert len(d._parsed_files) == 0 diff --git a/tests/io/vasp/test_outputs.py b/tests/io/vasp/test_outputs.py index 919e4c9ddbd..fe8b5ad83dc 100644 --- a/tests/io/vasp/test_outputs.py +++ b/tests/io/vasp/test_outputs.py @@ -34,7 +34,6 @@ Outcar, Procar, UnconvergedVASPWarning, - VaspDir, VaspParseError, Vasprun, Wavecar, @@ -2183,36 +2182,3 @@ def test_consistency(self): assert np.linalg.norm([r, i]) > 0.999 else: assert np.linalg.norm([r, i]) < 0.001 - - -class TestVaspDir(PymatgenTest): - def test_getitem(self): - # Some simple testing of loading and reading since all these were tested in other classes. - d = VaspDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/relaxation") - assert len(d) == 5 - assert d["OUTCAR"].run_stats["cores"] == 8 - - d = VaspDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/scan_relaxation") - assert len(d) == 2 - assert d["vasprun.xml.gz"].incar["METAGGA"] == "R2scan" - - with pytest.raises(ValueError, match="hello not found"): - d["hello"] - - d = VaspDir(f"{TEST_FILES_DIR}/io/pwscf") - with pytest.warns(UserWarning, match=r"No parser defined for Si.pwscf.out"): - assert isinstance(d["Si.pwscf.out"], str) - - # Test NEB directories. - d = VaspDir(f"{TEST_FILES_DIR}/io/vasp/fixtures/neb_analysis/neb1/neb") - - assert len(d) == 10 - - assert isinstance(d["00/POSCAR"], Poscar) - - outcars = d.get_files_by_name("OUTCAR") - assert len(outcars) == 5 - assert all("OUTCAR" for k in outcars) - - d.reset() - assert len(d._parsed_files) == 0