From ad6eafee2474718813cba57aa8e1752f1181b639 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sun, 21 Apr 2024 17:41:47 +0800 Subject: [PATCH] Add type annotations for `io.vasp.inputs/optics` (#3740) * some easy mypy fixes * ruff check pymatgen/io/vasp --select ANN204 --unsafe-fixes --fix * add type for io.vasp.help * add timeout 60 sec for requests.get * pre-commit auto-fixes * add timeout 60 sec for requests.get * fix default value of default_names * finish poscar.from_str * finish Poscar * finish Incar * temp save for potcarsingle * put dunder methods close and to the top * put properties close and to the top * put properties close and to the top * replace str with PathLike * add types for optics * suppress some overload * remove None type from completely untyped classes * pre-commit auto-fixes * fix type error outside io.vasp * check for None in Incar init * ruff fix * allow None * fix types * replace `defaultdict` with specific type * revert accidental changes * fix test * fix tests --------- Co-authored-by: Janosh Riebesell --- dev_scripts/update_pt_data.py | 2 +- pymatgen/alchemy/filters.py | 2 +- pymatgen/alchemy/transmuters.py | 2 +- .../coordination_geometries.py | 2 +- .../structure_environments.py | 2 +- pymatgen/analysis/ewald.py | 2 +- .../substitution_probability.py | 2 +- pymatgen/analysis/wulff.py | 4 +- pymatgen/apps/battery/plotter.py | 43 +- pymatgen/apps/borg/queen.py | 2 +- pymatgen/cli/pmg_analyze.py | 6 +- pymatgen/command_line/vampire_caller.py | 4 +- pymatgen/core/interface.py | 2 +- pymatgen/core/structure.py | 2 +- pymatgen/ext/cod.py | 4 +- pymatgen/ext/matproj_legacy.py | 2 +- pymatgen/io/abinit/abitimer.py | 4 +- pymatgen/io/abinit/pseudos.py | 8 +- pymatgen/io/cp2k/outputs.py | 14 +- pymatgen/io/pwscf.py | 2 +- pymatgen/io/qchem/inputs.py | 4 +- pymatgen/io/vasp/help.py | 14 +- pymatgen/io/vasp/inputs.py | 1448 +++++++++-------- pymatgen/io/vasp/optics.py | 91 +- pymatgen/io/vasp/outputs.py | 57 +- pymatgen/io/vasp/sets.py | 6 +- pymatgen/io/xtb/outputs.py | 4 +- .../advanced_transformations.py | 7 +- .../standard_transformations.py | 2 +- pymatgen/vis/plotters.py | 4 +- pymatgen/vis/structure_vtk.py | 2 +- tasks.py | 4 +- tests/apps/battery/test_plotter.py | 1 + tests/ext/test_cod.py | 2 +- tests/ext/test_matproj.py | 6 +- tests/ext/test_optimade.py | 16 +- 36 files changed, 949 insertions(+), 830 deletions(-) diff --git a/dev_scripts/update_pt_data.py b/dev_scripts/update_pt_data.py index 178c2ce394b..b6a59fdd249 100644 --- a/dev_scripts/update_pt_data.py +++ b/dev_scripts/update_pt_data.py @@ -234,7 +234,7 @@ def gen_iupac_ordering(): def add_electron_affinities(): """Update the periodic table data file with electron affinities.""" - req = requests.get("https://wikipedia.org/wiki/Electron_affinity_(data_page)") + req = requests.get("https://wikipedia.org/wiki/Electron_affinity_(data_page)", timeout=60) soup = BeautifulSoup(req.text, "html.parser") table = None for table in soup.find_all("table"): diff --git a/pymatgen/alchemy/filters.py b/pymatgen/alchemy/filters.py index 3f1bf77091e..fef3ed6bae9 100644 --- a/pymatgen/alchemy/filters.py +++ b/pymatgen/alchemy/filters.py @@ -244,7 +244,7 @@ def __init__(self, existing_structures, structure_matcher=None, symprec=None): structure matcher is used. A recommended value is 1e-5. """ self.symprec = symprec - self.structure_list = [] + self.structure_list: list = [] self.existing_structures = existing_structures if isinstance(structure_matcher, dict): self.structure_matcher = StructureMatcher.from_dict(structure_matcher) diff --git a/pymatgen/alchemy/transmuters.py b/pymatgen/alchemy/transmuters.py index e4b4b4e1ffd..089285d3ae8 100644 --- a/pymatgen/alchemy/transmuters.py +++ b/pymatgen/alchemy/transmuters.py @@ -244,7 +244,7 @@ def __init__(self, cif_string, transformations=None, primitive=True, extend_coll """ transformed_structures = [] lines = cif_string.split("\n") - structure_data = [] + structure_data: list = [] read_data = False for line in lines: if re.match(r"^\s*data", line): diff --git a/pymatgen/analysis/chemenv/coordination_environments/coordination_geometries.py b/pymatgen/analysis/chemenv/coordination_environments/coordination_geometries.py index 9605fc7ff4c..de6d88630f1 100644 --- a/pymatgen/analysis/chemenv/coordination_environments/coordination_geometries.py +++ b/pymatgen/analysis/chemenv/coordination_environments/coordination_geometries.py @@ -908,7 +908,7 @@ def __init__(self, permutations_safe_override=False, only_symbols=None): self.minpoints = {} self.maxpoints = {} - self.separations_cg = {} + self.separations_cg: dict[int, dict] = {} for cn in range(6, 21): for cg in self.get_implemented_geometries(coordination=cn): if only_symbols is not None and cg.ce_symbol not in only_symbols: diff --git a/pymatgen/analysis/chemenv/coordination_environments/structure_environments.py b/pymatgen/analysis/chemenv/coordination_environments/structure_environments.py index d4d2217a12b..74afc331db8 100644 --- a/pymatgen/analysis/chemenv/coordination_environments/structure_environments.py +++ b/pymatgen/analysis/chemenv/coordination_environments/structure_environments.py @@ -2085,7 +2085,7 @@ def __init__(self, coord_geoms=None): coord_geoms: coordination geometries to be added to the chemical environment. """ if coord_geoms is None: - self.coord_geoms = {} + self.coord_geoms: dict = {} else: raise NotImplementedError( "Constructor for ChemicalEnvironments with the coord_geoms argument is not yet implemented" diff --git a/pymatgen/analysis/ewald.py b/pymatgen/analysis/ewald.py index 092f45266a5..704eaf7329a 100644 --- a/pymatgen/analysis/ewald.py +++ b/pymatgen/analysis/ewald.py @@ -540,7 +540,7 @@ def __init__(self, matrix, m_list, num_to_return=1, algo=ALGO_FAST): if algo == EwaldMinimizer.ALGO_COMPLETE: raise NotImplementedError("Complete algo not yet implemented for EwaldMinimizer") - self._output_lists = [] + self._output_lists: list = [] # Tag that the recurse function looks at each level. If a method # sets this to true it breaks the recursion and stops the search. self._finished = False diff --git a/pymatgen/analysis/structure_prediction/substitution_probability.py b/pymatgen/analysis/structure_prediction/substitution_probability.py index 2270def52d4..40e20619a2a 100644 --- a/pymatgen/analysis/structure_prediction/substitution_probability.py +++ b/pymatgen/analysis/structure_prediction/substitution_probability.py @@ -79,7 +79,7 @@ def __init__(self, lambda_table=None, alpha=-5): # create Z and px self.Z = 0 - self._px = defaultdict(float) + self._px: dict[Species, float] = defaultdict(float) for s1, s2 in itertools.product(self.species, repeat=2): value = math.exp(self.get_lambda(s1, s2)) self._px[s1] += value / 2 diff --git a/pymatgen/analysis/wulff.py b/pymatgen/analysis/wulff.py index 8f23920069c..10c0c13362b 100644 --- a/pymatgen/analysis/wulff.py +++ b/pymatgen/analysis/wulff.py @@ -90,8 +90,8 @@ def __init__(self, normal, e_surf, normal_pt, dual_pt, index, m_ind_orig, miller self.index = index self.m_ind_orig = m_ind_orig self.miller = miller - self.points = [] - self.outer_lines = [] + self.points: list = [] + self.outer_lines: list = [] class WulffShape: diff --git a/pymatgen/apps/battery/plotter.py b/pymatgen/apps/battery/plotter.py index aa0486587e4..bac03116bf8 100644 --- a/pymatgen/apps/battery/plotter.py +++ b/pymatgen/apps/battery/plotter.py @@ -2,11 +2,16 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import matplotlib.pyplot as plt import plotly.graph_objects as go from pymatgen.util.plotting import pretty_plot +if TYPE_CHECKING: + from pymatgen.apps.battery.battery_abc import AbstractElectrode + __author__ = "Shyue Ping Ong" __copyright__ = "Copyright 2012, The Materials Project" __version__ = "0.1" @@ -18,7 +23,7 @@ class VoltageProfilePlotter: """A plotter to make voltage profile plots for batteries.""" - def __init__(self, xaxis="capacity", hide_negative=False): + def __init__(self, xaxis: str = "capacity", hide_negative: bool = False) -> None: """ Args: xaxis: The quantity to use as the xaxis. Can be either @@ -28,11 +33,11 @@ def __init__(self, xaxis="capacity", hide_negative=False): - frac_x: the atomic fraction of the working ion hide_negative: If True only plot the voltage steps above zero. """ - self._electrodes = {} + self._electrodes: dict[str, AbstractElectrode] = {} self.xaxis = xaxis self.hide_negative = hide_negative - def add_electrode(self, electrode, label=None): + def add_electrode(self, electrode: AbstractElectrode, label: str | None = None) -> None: """Add an electrode to the plot. Args: @@ -41,11 +46,11 @@ def add_electrode(self, electrode, label=None): label: A label for the electrode. If None, defaults to a counting system, i.e. 'Electrode 1', 'Electrode 2', ... """ - if not label: + if label is None: label = f"Electrode {len(self._electrodes) + 1}" self._electrodes[label] = electrode - def get_plot_data(self, electrode, term_zero=True): + def get_plot_data(self, electrode: AbstractElectrode, term_zero: bool = True) -> tuple[list, list]: """ Args: electrode: Electrode object @@ -82,7 +87,7 @@ def get_plot_data(self, electrode, term_zero=True): y.append(0) return x, y - def get_plot(self, width=8, height=8, term_zero=True, ax: plt.Axes = None): + def get_plot(self, width: float = 8, height: float = 8, term_zero: bool = True, ax: plt.Axes = None) -> plt.Axes: """Returns a plot object. Args: @@ -112,12 +117,12 @@ def get_plot(self, width=8, height=8, term_zero=True, ax: plt.Axes = None): def get_plotly_figure( self, - width=800, - height=600, - font_dict=None, - term_zero=True, + width: float = 800, + height: float = 600, + font_dict: dict | None = None, + term_zero: bool = True, **kwargs, - ): + ) -> plt.Figure: """Return plotly Figure object. Args: @@ -163,28 +168,28 @@ def get_plotly_figure( fig.update_layout(template="plotly_white", title_x=0.5) return fig - def _choose_best_x_label(self, formula, work_ion_symbol): + def _choose_best_x_label(self, formula: set[str], work_ion_symbol: set[str]) -> str: if self.xaxis in {"capacity", "capacity_grav"}: return "Capacity (mAh/g)" if self.xaxis == "capacity_vol": return "Capacity (Ah/l)" - formula = formula.pop() if len(formula) == 1 else None + _formula: str | None = formula.pop() if len(formula) == 1 else None - work_ion_symbol = work_ion_symbol.pop() if len(work_ion_symbol) == 1 else None + _work_ion_symbol: str | None = work_ion_symbol.pop() if len(work_ion_symbol) == 1 else None if self.xaxis == "x_form": - if formula and work_ion_symbol: - return f"x in {work_ion_symbol}x{formula}" + if _formula and _work_ion_symbol: + return f"x in {_work_ion_symbol}x{_formula}" return "x Work Ion per Host F.U." if self.xaxis == "frac_x": - if work_ion_symbol: - return f"Atomic Fraction of {work_ion_symbol}" + if _work_ion_symbol: + return f"Atomic Fraction of {_work_ion_symbol}" return "Atomic Fraction of Working Ion" raise RuntimeError("No xaxis label can be determined") - def show(self, width=8, height=6): + def show(self, width: float = 8, height: float = 6) -> None: """Show the voltage profile plot. Args: diff --git a/pymatgen/apps/borg/queen.py b/pymatgen/apps/borg/queen.py index 9e08d412e33..bf4ece76e31 100644 --- a/pymatgen/apps/borg/queen.py +++ b/pymatgen/apps/borg/queen.py @@ -44,7 +44,7 @@ def __init__(self, drone, rootpath=None, number_of_drones=1): """ self._drone = drone self._num_drones = number_of_drones - self._data = [] + self._data: list = [] if rootpath: if number_of_drones > 1: diff --git a/pymatgen/cli/pmg_analyze.py b/pymatgen/cli/pmg_analyze.py index 089479576ed..132e447b8c3 100644 --- a/pymatgen/cli/pmg_analyze.py +++ b/pymatgen/cli/pmg_analyze.py @@ -111,13 +111,13 @@ def get_magnetizations(dir: str, ion_list: list[int]): fullpath = os.path.join(parent, file) outcar = Outcar(fullpath) mags = outcar.magnetization - mags = [m["tot"] for m in mags] - all_ions = list(range(len(mags))) + _mags: list = [m["tot"] for m in mags] + all_ions = list(range(len(_mags))) row.append(fullpath.lstrip("./")) if ion_list: all_ions = ion_list for ion in all_ions: - row.append(str(mags[ion])) + row.append(str(_mags[ion])) data.append(row) if len(all_ions) > max_row: max_row = len(all_ions) diff --git a/pymatgen/command_line/vampire_caller.py b/pymatgen/command_line/vampire_caller.py index cfd4153f678..6f83e25a16f 100644 --- a/pymatgen/command_line/vampire_caller.py +++ b/pymatgen/command_line/vampire_caller.py @@ -131,8 +131,8 @@ def __init__( # Call Vampire with subprocess.Popen(["vampire-serial"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) as process: - stdout, stderr = process.communicate() - stdout = stdout.decode() + _stdout, stderr = process.communicate() + stdout: str = _stdout.decode() if stderr: van_helsing = stderr.decode() diff --git a/pymatgen/core/interface.py b/pymatgen/core/interface.py index f47ece317db..ba472c4159f 100644 --- a/pymatgen/core/interface.py +++ b/pymatgen/core/interface.py @@ -244,7 +244,7 @@ def coincidents(self) -> list[Site]: coincident_sites.append(self.sites[idx]) return coincident_sites - def __str__(self): + def __str__(self) -> str: comp = self.composition outs = [ f"Gb Summary ({comp.formula})", diff --git a/pymatgen/core/structure.py b/pymatgen/core/structure.py index 8e0c4ef6194..8fd45120a22 100644 --- a/pymatgen/core/structure.py +++ b/pymatgen/core/structure.py @@ -2860,7 +2860,7 @@ def from_str( # type: ignore[override] elif fmt_low == "poscar": from pymatgen.io.vasp import Poscar - struct = Poscar.from_str(input_string, default_names=False, read_velocities=False, **kwargs).structure + struct = Poscar.from_str(input_string, default_names=None, read_velocities=False, **kwargs).structure elif fmt_low == "cssr": from pymatgen.io.cssr import Cssr diff --git a/pymatgen/ext/cod.py b/pymatgen/ext/cod.py index 9a720104b2a..6c25f7d1d80 100644 --- a/pymatgen/ext/cod.py +++ b/pymatgen/ext/cod.py @@ -90,7 +90,7 @@ def get_structure_by_id(self, cod_id, **kwargs): Returns: A Structure. """ - response = requests.get(f"http://{self.url}/cod/{cod_id}.cif") + response = requests.get(f"http://{self.url}/cod/{cod_id}.cif", timeout=60) return Structure.from_str(response.text, fmt="cif", **kwargs) @requires(which("mysql"), "mysql must be installed to use this query.") @@ -112,7 +112,7 @@ def get_structure_by_formula(self, formula: str, **kwargs) -> list[dict[str, str for line in text: if line.strip(): cod_id, sg = line.split("\t") - response = requests.get(f"http://www.crystallography.net/cod/{cod_id.strip()}.cif") + response = requests.get(f"http://www.crystallography.net/cod/{cod_id.strip()}.cif", timeout=60) try: struct = Structure.from_str(response.text, fmt="cif", **kwargs) structures.append({"structure": struct, "cod_id": int(cod_id), "sg": sg}) diff --git a/pymatgen/ext/matproj_legacy.py b/pymatgen/ext/matproj_legacy.py index 84ed591e560..68ad6d2ee5f 100644 --- a/pymatgen/ext/matproj_legacy.py +++ b/pymatgen/ext/matproj_legacy.py @@ -1570,7 +1570,7 @@ def _check_get_download_info_url_by_task_id(self, prefix, task_ids) -> list[str] @staticmethod def _check_nomad_exist(url) -> bool: - response = requests.get(url=url) + response = requests.get(url=url, timeout=60) if response.status_code != 200: return False content = json.loads(response.text) diff --git a/pymatgen/io/abinit/abitimer.py b/pymatgen/io/abinit/abitimer.py index 2a428341092..242ba054db9 100644 --- a/pymatgen/io/abinit/abitimer.py +++ b/pymatgen/io/abinit/abitimer.py @@ -85,11 +85,11 @@ def walk(cls, top=".", ext=".abo"): def __init__(self): """Initialize object.""" # List of files that have been parsed. - self._filenames = [] + self._filenames: list = [] # timers[filename][mpi_rank] # contains the timer extracted from the file filename associated to the MPI rank mpi_rank. - self._timers = {} + self._timers: dict = {} def __iter__(self): return iter(self._timers) diff --git a/pymatgen/io/abinit/pseudos.py b/pymatgen/io/abinit/pseudos.py index cdbfa50c2f0..ad14a15f691 100644 --- a/pymatgen/io/abinit/pseudos.py +++ b/pymatgen/io/abinit/pseudos.py @@ -1033,10 +1033,10 @@ class PseudoParser: def __init__(self): # List of files that have been parsed successfully. - self._parsed_paths = [] + self._parsed_paths: list = [] # List of files that could not been parsed. - self._wrong_paths = [] + self._wrong_paths: list = [] def scan_directory(self, dirname, exclude_exts=(), exclude_fnames=()): """ @@ -1228,14 +1228,14 @@ def __init__(self, filepath): # In this way, we know that only the first two bound states (with f and n attributes) # should be used for constructing an initial guess for the wave functions. - self.valence_states = {} + self.valence_states: dict = {} for node in root.find("valence_states"): attrib = AttrDict(node.attrib) assert attrib.id not in self.valence_states self.valence_states[attrib.id] = attrib # Parse the radial grids - self.rad_grids = {} + self.rad_grids: dict = {} for node in root.findall("radial_grid"): grid_params = node.attrib gid = grid_params["id"] diff --git a/pymatgen/io/cp2k/outputs.py b/pymatgen/io/cp2k/outputs.py index d4309022a80..43b27d10d6c 100644 --- a/pymatgen/io/cp2k/outputs.py +++ b/pymatgen/io/cp2k/outputs.py @@ -57,15 +57,15 @@ def __init__(self, filename, verbose=False, auto_load=False): # IO Info self.filename = filename self.dir = os.path.dirname(filename) - self.filenames = {} + self.filenames: dict = {} self.parse_files() - self.data = {} + self.data: dict = {} # Material properties/results self.input = self.initial_structure = self.lattice = self.final_structure = self.composition = None self.efermi = self.vbm = self.cbm = self.band_gap = None - self.structures = [] - self.ionic_steps = [] + self.structures: list = [] + self.ionic_steps: list = [] # parse the basic run parameters always self.parse_cp2k_params() @@ -171,7 +171,7 @@ def calculation_type(self): @property def project_name(self) -> str: """What project name was used for this calculation.""" - return self.data.get("global").get("project_name") + return self.data.get("global", {}).get("project_name") @property def spin_polarized(self) -> bool: @@ -1259,12 +1259,12 @@ def parse_dos(self, dos_file=None, pdos_files=None, ldos_files=None): self.data["cdos"] = CompleteDos(self.final_structure, total_dos=tdos, pdoss=_ldoss) @property - def complete_dos(self) -> CompleteDos: + def complete_dos(self) -> CompleteDos | None: """Returns complete dos object if it has been parsed.""" return self.data.get("cdos") @property - def band_structure(self) -> BandStructure: + def band_structure(self) -> BandStructure | None: """Returns band structure object if it has been parsed.""" return self.data.get("band_structure") diff --git a/pymatgen/io/pwscf.py b/pymatgen/io/pwscf.py index b68d7df1648..fc4115965ab 100644 --- a/pymatgen/io/pwscf.py +++ b/pymatgen/io/pwscf.py @@ -522,7 +522,7 @@ def __init__(self, filename): filename (str): Filename. """ self.filename = filename - self.data = defaultdict(list) + self.data: dict[str, list[float] | float] = defaultdict(list) self.read_pattern(PWOutput.patterns) for k, v in self.data.items(): if k == "energies": diff --git a/pymatgen/io/qchem/inputs.py b/pymatgen/io/qchem/inputs.py index 99678728278..d99e9f85d5e 100644 --- a/pymatgen/io/qchem/inputs.py +++ b/pymatgen/io/qchem/inputs.py @@ -243,8 +243,8 @@ def get_str(self) -> str: """Return a string representation of an entire input file.""" return str(self) - def __str__(self): - combined_list = [] + def __str__(self) -> str: + combined_list: list = [] # molecule section combined_list.extend((self.molecule_template(self.molecule), "", self.rem_template(self.rem), "")) # opt section diff --git a/pymatgen/io/vasp/help.py b/pymatgen/io/vasp/help.py index 6a9059e71b9..376e114d0a8 100644 --- a/pymatgen/io/vasp/help.py +++ b/pymatgen/io/vasp/help.py @@ -11,11 +11,11 @@ class VaspDoc: """A VASP documentation helper.""" - def __init__(self): + def __init__(self) -> None: """Init for VaspDoc.""" self.url_template = "http://www.vasp.at/wiki/index.php/%s" - def print_help(self, tag): + def print_help(self, tag: str) -> None: """ Print the help for a TAG. @@ -24,7 +24,7 @@ def print_help(self, tag): """ print(self.get_help(tag)) - def print_jupyter_help(self, tag): + def print_jupyter_help(self, tag: str) -> None: """ Display HTML help in ipython notebook. @@ -37,7 +37,7 @@ def print_jupyter_help(self, tag): display(HTML(html_str)) @classmethod - def get_help(cls, tag, fmt="text"): + def get_help(cls, tag: str, fmt: str = "text") -> str: """ Get help on a VASP tag. @@ -48,7 +48,7 @@ def get_help(cls, tag, fmt="text"): Help text. """ tag = tag.upper() - response = requests.get(f"https://www.vasp.at/wiki/index.php/{tag}", verify=False) + response = requests.get(f"https://www.vasp.at/wiki/index.php/{tag}", verify=False, timeout=60) soup = BeautifulSoup(response.text) main_doc = soup.find(id="mw-content-text") if fmt == "text": @@ -60,14 +60,14 @@ def get_help(cls, tag, fmt="text"): return output @classmethod - def get_incar_tags(cls): + def get_incar_tags(cls) -> list[str]: """Returns: All incar tags.""" tags = [] for page in [ "https://www.vasp.at/wiki/index.php/Category:INCAR", "https://www.vasp.at/wiki/index.php?title=Category:INCAR&pagefrom=ML+FF+LCONF+DISCARD#mw-pages", ]: - response = requests.get(page, verify=False) + response = requests.get(page, verify=False, timeout=60) soup = BeautifulSoup(response.text) for div in soup.findAll("div", {"class": "mw-category-group"}): children = div.findChildren("li") diff --git a/pymatgen/io/vasp/inputs.py b/pymatgen/io/vasp/inputs.py index aa122ec2aa0..7b246aaffa1 100644 --- a/pymatgen/io/vasp/inputs.py +++ b/pymatgen/io/vasp/inputs.py @@ -19,7 +19,7 @@ from enum import Enum, unique from glob import glob from hashlib import sha256 -from typing import TYPE_CHECKING, Any, Literal, cast +from typing import TYPE_CHECKING, cast import numpy as np import scipy.constants as const @@ -38,6 +38,7 @@ if TYPE_CHECKING: from collections.abc import Iterator, Sequence from pathlib import Path + from typing import Any, Literal from numpy.typing import ArrayLike from typing_extensions import Self @@ -86,7 +87,7 @@ def __init__( predictor_corrector_preamble: str | None = None, lattice_velocities: ArrayLike | None = None, sort_structure: bool = False, - ): + ) -> None: """ Args: structure (Structure): Structure object. @@ -143,53 +144,70 @@ def __init__( self.temperature = -1.0 + def __setattr__(self, name: str, value: Any) -> None: + if name in {"selective_dynamics", "velocities"} and value is not None and len(value) > 0: + value = np.array(value) + dim = value.shape + if dim[1] != 3 or dim[0] != len(self.structure): + raise ValueError(f"{name} array must be same length as the structure.") + value = value.tolist() + + super().__setattr__(name, value) + + def __repr__(self) -> str: + return self.get_str() + + def __str__(self) -> str: + """String representation of Poscar file.""" + return self.get_str() + @property - def velocities(self): + def velocities(self) -> ArrayLike | None: """Velocities in Poscar.""" return self.structure.site_properties.get("velocities") @property - def selective_dynamics(self): + def selective_dynamics(self) -> ArrayLike | None: """Selective dynamics in Poscar.""" return self.structure.site_properties.get("selective_dynamics") @property - def predictor_corrector(self): + def predictor_corrector(self) -> ArrayLike | None: """Predictor corrector in Poscar.""" return self.structure.site_properties.get("predictor_corrector") @property - def predictor_corrector_preamble(self): + def predictor_corrector_preamble(self) -> str | None: """Predictor corrector preamble in Poscar.""" return self.structure.properties.get("predictor_corrector_preamble") @property - def lattice_velocities(self): + def lattice_velocities(self) -> ArrayLike | None: """Lattice velocities in Poscar (including the current lattice vectors).""" return self.structure.properties.get("lattice_velocities") - @velocities.setter # type: ignore - def velocities(self, velocities): + @velocities.setter # type: ignore[no-redef, attr-defined] + def velocities(self, velocities: ArrayLike | None) -> None: """Setter for Poscar.velocities.""" self.structure.add_site_property("velocities", velocities) - @selective_dynamics.setter # type: ignore - def selective_dynamics(self, selective_dynamics): + @selective_dynamics.setter # type: ignore[no-redef, attr-defined] + def selective_dynamics(self, selective_dynamics: ArrayLike | None) -> None: """Setter for Poscar.selective_dynamics.""" self.structure.add_site_property("selective_dynamics", selective_dynamics) - @predictor_corrector.setter # type: ignore - def predictor_corrector(self, predictor_corrector): + @predictor_corrector.setter # type: ignore[no-redef, attr-defined] + def predictor_corrector(self, predictor_corrector: ArrayLike | None) -> None: """Setter for Poscar.predictor_corrector.""" self.structure.add_site_property("predictor_corrector", predictor_corrector) - @predictor_corrector_preamble.setter # type: ignore - def predictor_corrector_preamble(self, predictor_corrector_preamble): + @predictor_corrector_preamble.setter # type: ignore[no-redef, attr-defined] + def predictor_corrector_preamble(self, predictor_corrector_preamble: str | None) -> None: """Setter for Poscar.predictor_corrector.""" self.structure.properties["predictor_corrector"] = predictor_corrector_preamble - @lattice_velocities.setter # type: ignore - def lattice_velocities(self, lattice_velocities: ArrayLike) -> None: + @lattice_velocities.setter # type: ignore[no-redef, attr-defined] + def lattice_velocities(self, lattice_velocities: ArrayLike | None) -> None: """Setter for Poscar.lattice_velocities.""" self.structure.properties["lattice_velocities"] = np.asarray(lattice_velocities) @@ -198,7 +216,7 @@ def site_symbols(self) -> list[str]: """ Sequence of symbols associated with the Poscar. Similar to 6th line in VASP 5+ POSCAR. """ - syms = [site.specie.symbol for site in self.structure] + syms: list[str] = [site.specie.symbol for site in self.structure] return [a[0] for a in itertools.groupby(syms)] @property @@ -207,22 +225,19 @@ def natoms(self) -> list[int]: Sequence of number of sites of each type associated with the Poscar. Similar to 7th line in vasp 5+ POSCAR or the 6th line in vasp 4 POSCAR. """ - syms = [site.specie.symbol for site in self.structure] + syms: list[str] = [site.specie.symbol for site in self.structure] return [len(tuple(a[1])) for a in itertools.groupby(syms)] - def __setattr__(self, name, value): - if name in ("selective_dynamics", "velocities") and value is not None and len(value) > 0: - value = np.array(value) - dim = value.shape - if dim[1] != 3 or dim[0] != len(self.structure): - raise ValueError(f"{name} array must be same length as the structure.") - value = value.tolist() - super().__setattr__(name, value) - @classmethod - def from_file(cls, filename, check_for_potcar=True, read_velocities=True, **kwargs) -> Self: + def from_file( + cls, + filename: PathLike, + check_for_potcar: bool = True, + read_velocities: bool = True, + **kwargs: dict[str, Any], + ) -> Self: """ - Reads a Poscar from a file. + Read POSCAR from a file. The code will try its best to determine the elements in the POSCAR in the following order: @@ -254,10 +269,10 @@ def from_file(cls, filename, check_for_potcar=True, read_velocities=True, **kwar """ if "check_for_POTCAR" in kwargs: warnings.warn("check_for_POTCAR is deprecated. Use check_for_potcar instead.", DeprecationWarning) - check_for_potcar = kwargs.pop("check_for_POTCAR") + check_for_potcar = cast(bool, kwargs.pop("check_for_POTCAR")) - dirname = os.path.dirname(os.path.abspath(filename)) - names = None + dirname: str = os.path.dirname(os.path.abspath(filename)) + names: list[str] | None = None if check_for_potcar and SETTINGS.get("PMG_POTCAR_CHECKS") is not False: potcars = glob(f"{dirname}/*POTCAR*") if potcars: @@ -267,13 +282,19 @@ def from_file(cls, filename, check_for_potcar=True, read_velocities=True, **kwar [get_el_sp(n) for n in names] # ensure valid names except Exception: names = None + with zopen(filename, mode="rt") as file: return cls.from_str(file.read(), names, read_velocities=read_velocities) @classmethod - def from_str(cls, data, default_names=None, read_velocities=True) -> Self: + def from_str( + cls, + data: str, + default_names: list[str] | None = None, + read_velocities: bool = True, + ) -> Self: """ - Reads a Poscar from a string. + Read POSCAR from a string. The code will try its best to determine the elements in the POSCAR in the following order: @@ -314,28 +335,28 @@ def from_str(cls, data, default_names=None, read_velocities=True) -> Self: raise ValueError("Empty POSCAR") # Parse positions - lines = list(clean_lines(chunks[0].split("\n"), remove_empty_lines=False)) - comment = lines[0] - scale = float(lines[1]) - lattice = np.array([[float(i) for i in line.split()] for line in lines[2:5]]) + lines: list[str] = list(clean_lines(chunks[0].split("\n"), remove_empty_lines=False)) + comment: str = lines[0] + scale: float = float(lines[1]) + lattice: np.ndarray = np.array([[float(i) for i in line.split()] for line in lines[2:5]]) if scale < 0: # In vasp, a negative scale factor is treated as a volume. We need # to translate this to a proper lattice vector scaling. - vol = abs(np.linalg.det(lattice)) + vol: float = abs(np.linalg.det(lattice)) lattice *= (-scale / vol) ** (1 / 3) else: lattice *= scale - vasp5_symbols = False - atomic_symbols = [] + vasp5_symbols: bool = False + atomic_symbols: list[str] = [] try: - n_atoms = [int(i) for i in lines[5].split()] - ipos = 6 + n_atoms: list[int] = [int(i) for i in lines[5].split()] + ipos: int = 6 except ValueError: vasp5_symbols = True - symbols = [symbol.split("/")[0] for symbol in lines[5].split()] + symbols: list[str] = [symbol.split("/")[0] for symbol in lines[5].split()] # Atoms and number of atoms in POSCAR written with vasp appear on # multiple lines when atoms of the same type are not grouped together @@ -352,7 +373,7 @@ def from_str(cls, data, default_names=None, read_velocities=True) -> Self: # 2 1 3 2 5 # Direct # ... - n_lines_symbols = 1 + n_lines_symbols: int = 1 for n_lines_symbols in range(1, 11): try: int(lines[5 + n_lines_symbols].split()[0]) @@ -362,43 +383,45 @@ def from_str(cls, data, default_names=None, read_velocities=True) -> Self: for i_line_symbols in range(6, 5 + n_lines_symbols): symbols.extend(lines[i_line_symbols].split()) + n_atoms = [] iline_natoms_start = 5 + n_lines_symbols for iline_natoms in range(iline_natoms_start, iline_natoms_start + n_lines_symbols): n_atoms.extend([int(i) for i in lines[iline_natoms].split()]) - for i, nat in enumerate(n_atoms): - atomic_symbols.extend([symbols[i]] * nat) + for idx, n_atom in enumerate(n_atoms): + atomic_symbols.extend([symbols[idx]] * n_atom) + ipos = 5 + 2 * n_lines_symbols - pos_type = lines[ipos].split()[0] + pos_type: str = lines[ipos].split()[0] - has_selective_dynamics = False + has_selective_dynamics: bool = False # Selective dynamics if pos_type[0] in "sS": has_selective_dynamics = True ipos += 1 pos_type = lines[ipos].split()[0] - cart = pos_type[0] in "cCkK" - n_sites = sum(n_atoms) + cart: bool = pos_type[0] in "cCkK" + n_sites: int = sum(n_atoms) # If default_names is specified (usually coming from a POTCAR), use # them. This is in line with VASP's parsing order that the POTCAR # specified is the default used. - if default_names: + if default_names is not None: try: atomic_symbols = [] - for i, nat in enumerate(n_atoms): - atomic_symbols.extend([default_names[i]] * nat) + for idx, n_atom in enumerate(n_atoms): + atomic_symbols.extend([default_names[idx]] * n_atom) vasp5_symbols = True except IndexError: pass if not vasp5_symbols: - ind = 6 if has_selective_dynamics else 3 + ind: Literal[3, 6] = 6 if has_selective_dynamics else 3 try: - # Check if names are appended at the end of the coordinates. + # Check if names are appended at the end of the coordinates atomic_symbols = [line.split()[ind] for line in lines[ipos + 1 : ipos + 1 + n_sites]] # Ensure symbols are valid elements if not all(Element.is_valid_symbol(sym) for sym in atomic_symbols): @@ -406,22 +429,22 @@ def from_str(cls, data, default_names=None, read_velocities=True) -> Self: vasp5_symbols = True except (ValueError, IndexError): - # Defaulting to false names. + # Defaulting to false names atomic_symbols = [] - for i, nat in enumerate(n_atoms, start=1): - sym = Element.from_Z(i).symbol - atomic_symbols.extend([sym] * nat) + for idx, n_atom in enumerate(n_atoms, start=1): + symbol = Element.from_Z(idx).symbol + atomic_symbols.extend([symbol] * n_atom) warnings.warn( f"Elements in POSCAR cannot be determined. Defaulting to false names {atomic_symbols}.", BadPoscarWarning, ) # Read the atomic coordinates - coords = [] - selective_dynamics: list[np.ndarray] | None = [] if has_selective_dynamics else None - for i in range(n_sites): - tokens = lines[ipos + 1 + i].split() - crd_scale = scale if cart else 1 + coords: list[list[float]] = [] + selective_dynamics: list[list[bool]] | None = [] if has_selective_dynamics else None + for idx in range(n_sites): + tokens: list[str] = lines[ipos + 1 + idx].split() + crd_scale: float = scale if cart else 1 coords.append([float(j) * crd_scale for j in tokens[:3]]) if selective_dynamics is not None: # Warn when values contain suspicious entries @@ -429,7 +452,7 @@ def from_str(cls, data, default_names=None, read_velocities=True) -> Self: warnings.warn("Selective dynamics values must be either 'T' or 'F'.", BadPoscarWarning) # Warn when elements contains Fluorine (F) (#3539) - if atomic_symbols[i] == "F" and len(tokens[3:]) >= 4 and "F" in tokens[3:7]: + if atomic_symbols[idx] == "F" and len(tokens[3:]) >= 4 and "F" in tokens[3:7]: warnings.warn( ( "Selective dynamics toggled with Fluorine element detected. " @@ -455,10 +478,10 @@ def from_str(cls, data, default_names=None, read_velocities=True) -> Self: coords_are_cartesian=cart, ) - lattice_velocities = [] - velocities = [] - predictor_corrector = [] - predictor_corrector_preamble = "" + lattice_velocities: list[list[float]] = [] + velocities: list[list[float]] = [] + predictor_corrector: list = [] + predictor_corrector_preamble: str = "" if read_velocities: # Parse the lattice velocities and current lattice, if present. @@ -504,10 +527,15 @@ def from_str(cls, data, default_names=None, read_velocities=True) -> Self: lattice_velocities=lattice_velocities, ) - def get_str(self, direct: bool = True, vasp4_compatible: bool = False, significant_figures: int = 16) -> str: + def get_str( + self, + direct: bool = True, + vasp4_compatible: bool = False, + significant_figures: int = 16, + ) -> str: """ - Returns a string to be written as a POSCAR file. By default, site - symbols are written, which means compatibility is for vasp >= 5. + Return a string to be written as a POSCAR file. By default, site + symbols are written, which is compatible for vasp >= 5. Args: direct (bool): Whether coordinates are output in direct or @@ -515,7 +543,7 @@ def get_str(self, direct: bool = True, vasp4_compatible: bool = False, significa vasp4_compatible (bool): Set to True to omit site symbols on 6th line to maintain backward vasp 4.x compatibility. Defaults to False. - significant_figures (int): No. of significant figures to + significant_figures (int): Number of significant digits to output all quantities. Defaults to 16. Note that positions are output in fixed point, while velocities are output in scientific format. @@ -530,23 +558,28 @@ def get_str(self, direct: bool = True, vasp4_compatible: bool = False, significa if np.linalg.det(lattice.matrix) < 0: lattice = Lattice(-lattice.matrix) - format_str = f"{{:{significant_figures + 5}.{significant_figures}f}}" - lines = [self.comment, "1.0"] + # Add comment and lattice + format_str: str = f"{{:{significant_figures + 5}.{significant_figures}f}}" + lines: list[str] = [self.comment, "1.0"] for vec in lattice.matrix: lines.append(" ".join(format_str.format(c) for c in vec)) + # Add element symbols if self.true_names and not vasp4_compatible: lines.append(" ".join(self.site_symbols)) lines.append(" ".join(map(str, self.natoms))) + if self.selective_dynamics: lines.append("Selective dynamics") + lines.append("direct" if direct else "cartesian") + # Add ion positions and selective dynamics for idx, site in enumerate(self.structure): - coords = site.frac_coords if direct else site.coords - line = " ".join(format_str.format(c) for c in coords) + coords: ArrayLike = site.frac_coords if direct else site.coords + line: str = " ".join(format_str.format(c) for c in coords) if self.selective_dynamics is not None: - sd = ["T" if j else "F" for j in self.selective_dynamics[idx]] + sd: list[str] = ["T" if j else "F" for j in self.selective_dynamics[idx]] line += f" {sd[0]} {sd[1]} {sd[2]}" line += f" {site.species_string}" lines.append(line) @@ -586,16 +619,9 @@ def get_str(self, direct: bool = True, vasp4_compatible: bool = False, significa get_string = get_str - def __repr__(self): - return self.get_str() - - def __str__(self): - """String representation of Poscar file.""" - return self.get_str() - - def write_file(self, filename: PathLike, **kwargs): + def write_file(self, filename: PathLike, **kwargs) -> None: """ - Writes POSCAR to a file. The supported kwargs are the same as those for + Write POSCAR to a file. The supported kwargs are the same as those for the Poscar.get_str method and are passed through directly. """ with zopen(filename, mode="wt") as file: @@ -632,17 +658,17 @@ def from_dict(cls, dct: dict) -> Self: predictor_corrector=dct.get("predictor_corrector"), ) - def set_temperature(self, temperature: float): + def set_temperature(self, temperature: float) -> None: """ - Initializes the velocities based on Maxwell-Boltzmann distribution. + Initialize the velocities based on Maxwell-Boltzmann distribution. Removes linear, but not angular drift (same as VASP). - Scales the energies to the exact temperature (microcanonical ensemble) - Velocities are given in A/fs. This is the vasp default when + Scale the energies to the exact temperature (microcanonical ensemble) + Velocities are given in A/fs. This is the VASP default when direct/cartesian is not specified (even when positions are given in - direct coordinates) + direct coordinates). - Overwrites imported velocities, if any. + Overwrite imported velocities, if any. Args: temperature (float): Temperature in Kelvin. @@ -650,18 +676,18 @@ def set_temperature(self, temperature: float): # mean 0 variance 1 velocities = np.random.randn(len(self.structure), 3) - # in AMU, (N,1) array + # In AMU, (N,1) array atomic_masses = np.array([site.specie.atomic_mass.to("kg") for site in self.structure]) dof = 3 * len(self.structure) - 3 - # remove linear drift (net momentum) + # Remove linear drift (net momentum) velocities -= np.average(atomic_masses[:, np.newaxis] * velocities, axis=0) / np.average(atomic_masses) - # scale velocities due to atomic masses + # Scale velocities due to atomic masses # mean 0 std proportional to sqrt(1/m) velocities /= atomic_masses[:, np.newaxis] ** (1 / 2) - # scale velocities to get correct temperature + # Scale velocities to get correct temperature energy = np.sum(1 / 2 * atomic_masses * np.sum(velocities**2, axis=1)) scale = (temperature * dof / (2 * energy / const.k)) ** (1 / 2) @@ -670,9 +696,9 @@ def set_temperature(self, temperature: float): self.temperature = temperature self.structure.site_properties.pop("selective_dynamics", None) self.structure.site_properties.pop("predictor_corrector", None) - # returns as a list of lists to be consistent with the other - # initializations + # Set as list[list] to be consistent with the other + # initializations self.structure.add_site_property("velocities", velocities.tolist()) @@ -682,19 +708,19 @@ class BadPoscarWarning(UserWarning): class Incar(dict, MSONable): """ - INCAR object for reading and writing INCAR files. Essentially consists of - a dictionary with some helper functions. + INCAR object for reading and writing INCAR files. + Essentially a dictionary with some helper functions. """ - def __init__(self, params: dict[str, Any] | None = None): + def __init__(self, params: dict[str, Any] | None = None) -> None: """ - Creates an Incar object. + Create an Incar object. Args: params (dict): A set of input parameters as a dictionary. """ super().__init__() - if params: + if params is not None: # if Incar contains vector-like magmoms given as a list # of floats, convert to a list of lists if (params.get("MAGMOM") and isinstance(params["MAGMOM"][0], (int, float))) and ( @@ -707,10 +733,10 @@ def __init__(self, params: dict[str, Any] | None = None): self.update(params) - def __setitem__(self, key: str, val: Any): + def __setitem__(self, key: str, val: Any) -> None: """ - Add parameter-val pair to Incar. Warns if parameter is not in list of - valid INCAR tags. Also cleans the parameter and val by stripping + Add parameter-val pair to Incar. Warn if parameter is not in list of + valid INCAR tags. Also clean the parameter and val by stripping leading and trailing white spaces. """ super().__setitem__( @@ -718,6 +744,21 @@ def __setitem__(self, key: str, val: Any): Incar.proc_val(key.strip(), val.strip()) if isinstance(val, str) else val, ) + def __str__(self) -> str: + return self.get_str(sort_keys=True, pretty=False) + + def __add__(self, other: Incar) -> Incar: + """ + Add all the values of another INCAR object to this object. + Facilitate the use of "standard" INCARs. + """ + params: dict[str, Any] = dict(self.items()) + for key, val in other.items(): + if key in self and val != self[key]: + raise ValueError(f"INCARs have conflicting values for {key}: {self[key]} != {val}") + params[key] = val + return Incar(params) + def as_dict(self) -> dict: """MSONable dict.""" dct = dict(self) @@ -738,22 +779,21 @@ def from_dict(cls, dct: dict[str, Any]) -> Self: dct["MAGMOM"] = [Magmom.from_dict(m) for m in dct["MAGMOM"]] return Incar({k: v for k, v in dct.items() if k not in ("@module", "@class")}) - def copy(self): + def copy(self) -> Self: return type(self)(self) def get_str(self, sort_keys: bool = False, pretty: bool = False) -> str: """ - Returns a string representation of the INCAR. The reason why this - method is different from the __str__ method is to provide options for - pretty printing. + Return a string representation of the INCAR. Differ from the + __str__ method to provide options for pretty printing. Args: - sort_keys (bool): Set to True to sort the INCAR parameters + sort_keys (bool): Whether to sort the INCAR parameters alphabetically. Defaults to False. - pretty (bool): Set to True for pretty aligned output. Defaults - to False. + pretty (bool): Whether to pretty align output. + Defaults to False. """ - keys = sorted(self) if sort_keys else list(self) + keys: list[str] = sorted(self) if sort_keys else list(self) lines = [] for key in keys: if key == "MAGMOM" and isinstance(self[key], list): @@ -762,13 +802,13 @@ def get_str(self, sort_keys: bool = False, pretty: bool = False) -> str: if isinstance(self[key][0], (list, Magmom)) and (self.get("LSORBIT") or self.get("LNONCOLLINEAR")): value.append(" ".join(str(i) for j in self[key] for i in j)) elif self.get("LSORBIT") or self.get("LNONCOLLINEAR"): - for m, g in itertools.groupby(self[key]): - value.append(f"3*{len(tuple(g))}*{m}") + for _key, group in itertools.groupby(self[key]): + value.append(f"3*{len(tuple(group))}*{_key}") else: # float() to ensure backwards compatibility between # float magmoms and Magmom objects - for m, g in itertools.groupby(self[key], key=float): - value.append(f"{len(tuple(g))}*{m}") + for _key, group in itertools.groupby(self[key], key=float): + value.append(f"{len(tuple(group))}*{_key}") lines.append([key, " ".join(value)]) elif isinstance(self[key], list): @@ -780,10 +820,7 @@ def get_str(self, sort_keys: bool = False, pretty: bool = False) -> str: return str(tabulate([[line[0], "=", line[1]] for line in lines], tablefmt="plain")) return str_delimited(lines, None, " = ") + "\n" - def __str__(self): - return self.get_str(sort_keys=True, pretty=False) - - def write_file(self, filename: PathLike): + def write_file(self, filename: PathLike) -> None: """Write Incar to a file. Args: @@ -794,7 +831,7 @@ def write_file(self, filename: PathLike): @classmethod def from_file(cls, filename: PathLike) -> Self: - """Reads an Incar object from a file. + """Read an Incar object from a file. Args: filename (str): Filename for file @@ -807,7 +844,7 @@ def from_file(cls, filename: PathLike) -> Self: @classmethod def from_str(cls, string: str) -> Self: - """Reads an Incar object from a string. + """Read an Incar object from a string. Args: string (str): Incar string @@ -815,26 +852,35 @@ def from_str(cls, string: str) -> Self: Returns: Incar object """ - lines = list(clean_lines(string.splitlines())) - params = {} + lines: list[str] = list(clean_lines(string.splitlines())) + params: dict[str, Any] = {} for line in lines: for sline in line.split(";"): if match := re.match(r"(\w+)\s*=\s*(.*)", sline.strip()): - key = match.group(1).strip() - val = match.group(2).strip() - val = Incar.proc_val(key, val) - params[key] = val + key: str = match[1].strip() + val: Any = match[2].strip() + params[key] = Incar.proc_val(key, val) return cls(params) @staticmethod - def proc_val(key: str, val: Any): - """Helper method to convert INCAR parameters to proper types like ints, floats, lists, etc. + def proc_val(key: str, val: Any) -> list | bool | float | int | str: + """Helper method to convert INCAR parameters to proper types + like ints, floats, lists, etc. Args: - key: INCAR parameter key - val: Actual value of INCAR parameter. - """ - list_keys = ("LDAUU", "LDAUL", "LDAUJ", "MAGMOM", "DIPOL", "LANGEVIN_GAMMA", "QUAD_EFG", "EINT") + key (str): INCAR parameter key + val (Any): Value of INCAR parameter. + """ + list_keys = ( + "LDAUU", + "LDAUL", + "LDAUJ", + "MAGMOM", + "DIPOL", + "LANGEVIN_GAMMA", + "QUAD_EFG", + "EINT", + ) bool_keys = ( "LDAU", "LWAVE", @@ -847,7 +893,18 @@ def proc_val(key: str, val: Any): "LSORBIT", "LNONCOLLINEAR", ) - float_keys = ("EDIFF", "SIGMA", "TIME", "ENCUTFOCK", "HFSCREEN", "POTIM", "EDIFFG", "AGGAC", "PARAM1", "PARAM2") + float_keys = ( + "EDIFF", + "SIGMA", + "TIME", + "ENCUTFOCK", + "HFSCREEN", + "POTIM", + "EDIFFG", + "AGGAC", + "PARAM1", + "PARAM2", + ) int_keys = ( "NSW", "NBANDS", @@ -872,8 +929,9 @@ def proc_val(key: str, val: Any): ) lower_str_keys = ("ML_MODE",) - def smart_int_or_float(num_str): - if num_str.find(".") != -1 or num_str.lower().find("e") != -1: + def smart_int_or_float(num_str: str) -> str | float: + """Determine whether a string represents an integer or a float.""" + if "." in num_str or "e" in num_str.lower(): return float(num_str) return int(num_str) @@ -889,17 +947,18 @@ def smart_int_or_float(num_str): else: output.append(smart_int_or_float(tok[0])) return output + if key in bool_keys: if match := re.match(r"^\.?([T|F|t|f])[A-Za-z]*\.?", val): - return match.group(1).lower() == "t" + return match[1].lower() == "t" raise ValueError(f"{key} should be a boolean type!") if key in float_keys: - return float(re.search(r"^-?\d*\.?\d*[e|E]?-?\d*", val).group(0)) # type: ignore + return float(re.search(r"^-?\d*\.?\d*[e|E]?-?\d*", val)[0]) # type: ignore[index] if key in int_keys: - return int(re.match(r"^-?[0-9]+", val).group(0)) # type: ignore + return int(re.match(r"^-?[0-9]+", val)[0]) # type: ignore[index] if key in lower_str_keys: return val.strip().lower() @@ -907,7 +966,7 @@ def smart_int_or_float(num_str): except ValueError: pass - # Not in standard keys. We will try a hierarchy of conversions. + # Not in known keys. We will try a hierarchy of conversions. try: return int(val) except ValueError: @@ -928,7 +987,7 @@ def smart_int_or_float(num_str): def diff(self, other: Incar) -> dict[str, dict[str, Any]]: """ - Diff function for Incar. Compares two Incars and indicates which + Diff function for Incar. Compare two Incars and indicate which parameters are the same and which are not. Useful for checking whether two runs were done using the same parameters. @@ -940,31 +999,21 @@ def diff(self, other: Incar) -> dict[str, dict[str, Any]]: {"Same" : parameters_that_are_the_same, "Different": parameters_that_are_different} Note that the parameters are return as full dictionaries of values. E.g. {"ISIF":3} """ - similar_param = {} - different_param = {} + similar_params = {} + different_params = {} for k1, v1 in self.items(): if k1 not in other: - different_param[k1] = {"INCAR1": v1, "INCAR2": None} + different_params[k1] = {"INCAR1": v1, "INCAR2": None} elif v1 != other[k1]: - different_param[k1] = {"INCAR1": v1, "INCAR2": other[k1]} + different_params[k1] = {"INCAR1": v1, "INCAR2": other[k1]} else: - similar_param[k1] = v1 + similar_params[k1] = v1 + for k2, v2 in other.items(): - if k2 not in similar_param and k2 not in different_param and k2 not in self: - different_param[k2] = {"INCAR1": None, "INCAR2": v2} - return {"Same": similar_param, "Different": different_param} + if k2 not in similar_params and k2 not in different_params and k2 not in self: + different_params[k2] = {"INCAR1": None, "INCAR2": v2} - def __add__(self, other): - """ - Add all the values of another INCAR object to this object. - Facilitates the use of "standard" INCARs. - """ - params = dict(self.items()) - for key, val in other.items(): - if key in self and val != self[key]: - raise ValueError(f"Incars have conflicting values for {key}: {self[key]} != {val}") - params[key] = val - return Incar(params) + return {"Same": similar_params, "Different": different_params} def check_params(self) -> None: """Check INCAR for invalid tags or values. @@ -1009,7 +1058,7 @@ class KpointsSupportedModes(Enum): Cartesian = 4 Reciprocal = 5 - def __str__(self): + def __str__(self) -> str: return str(self.name) @classmethod @@ -1046,7 +1095,7 @@ def __init__( tet_number: int = 0, tet_weight: float = 0, tet_connections=None, - ): + ) -> None: """ Highly flexible constructor for Kpoints object. The flexibility comes at the cost of usability and in general, it is recommended that you use @@ -1502,13 +1551,14 @@ def write_file(self, filename: str) -> None: with zopen(filename, mode="wt") as file: file.write(str(self)) - def __repr__(self): + def __repr__(self) -> str: lines = [self.comment, str(self.num_kpts), self.style.name] style = self.style.name.lower()[0] if style == "l": lines.append(self.coord_type) for idx, kpt in enumerate(self.kpts): - lines.append(" ".join(map(str, kpt))) + # TODO (@DanielYang59): fix the following type annotation + lines.append(" ".join(map(str, kpt))) # type: ignore[arg-type] if style == "l": lines[-1] += f" ! {self.labels[idx]}" if idx % 2 == 1: @@ -1584,21 +1634,25 @@ def from_dict(cls, dct: dict) -> Self: ) -def _parse_bool(string): +def _parse_bool(string: str) -> bool: if match := re.match(r"^\.?([TFtf])[A-Za-z]*\.?", string): return match[1] in {"T", "t"} raise ValueError(f"{string} should be a boolean type!") -def _parse_float(string): - return float(re.search(r"^-?\d*\.?\d*[eE]?-?\d*", string).group(0)) +def _parse_float(string: str) -> float: + if match := re.search(r"^-?\d*\.?\d*[eE]?-?\d*", string): + return float(match[0]) + raise ValueError(f"{string} should be a float type!") -def _parse_int(string): - return int(re.match(r"^-?[0-9]+", string).group(0)) +def _parse_int(string: str) -> int: + if match := re.match(r"^-?[0-9]+", string): + return int(match[0]) + raise ValueError(f"{string} should be an int type!") -def _parse_list(string): +def _parse_list(string: str) -> list[float]: return [float(y) for y in re.split(r"\s+", string.strip()) if not y.isalpha()] @@ -1610,47 +1664,47 @@ def _parse_list(string): PYMATGEN_POTCAR_HASHES = loadfn(f"{module_dir}/vasp_potcar_pymatgen_hashes.json") # written to some newer POTCARs by VASP VASP_POTCAR_HASHES = loadfn(f"{module_dir}/vasp_potcar_file_hashes.json") -POTCAR_STATS_PATH = os.path.join(module_dir, "potcar-summary-stats.json.bz2") +POTCAR_STATS_PATH: str = os.path.join(module_dir, "potcar-summary-stats.json.bz2") class PotcarSingle: """ Object for a **single** POTCAR. The builder assumes the POTCAR contains - the complete untouched data in "data" as a string and a dict of keywords. + the complete untouched string "data" and a dict of keywords. Attributes: data (str): POTCAR data as a string. keywords (dict): Keywords parsed from the POTCAR as a dict. All keywords are also accessible as attributes in themselves. E.g., potcar.enmax, potcar.encut, etc. - md5 hashes of the entire POTCAR file and the actual data are validated + MD5 hashes of the entire POTCAR file and the actual data are validated against a database of known good hashes. Appropriate warnings or errors - are raised if a POTCAR hash fails validation. + are raised if validation fails. """ - # NB: there are multiple releases of the {LDA,PBE} {52,54} POTCARs - # the original (univie) releases include no SHA256 hashes nor COPYR fields + # Note: there are multiple releases of the {LDA,PBE} {52,54} POTCARs + # the original (UNIVIE) releases include no SHA256 hashes nor COPYR fields # in the PSCTR/header field. # We indicate the older release in `functional_dir` as PBE_52, PBE_54, LDA_52, LDA_54. # The newer release is indicated as PBE_52_W_HASH, etc. - functional_dir = dict( - PBE="POT_GGA_PAW_PBE", - PBE_52="POT_GGA_PAW_PBE_52", - PBE_52_W_HASH="POTPAW_PBE_52", - PBE_54="POT_GGA_PAW_PBE_54", - PBE_54_W_HASH="POTPAW_PBE_54", - PBE_64="POT_PAW_PBE_64", - LDA="POT_LDA_PAW", - LDA_52="POT_LDA_PAW_52", - LDA_52_W_HASH="POTPAW_LDA_52", - LDA_54="POT_LDA_PAW_54", - LDA_54_W_HASH="POTPAW_LDA_54", - LDA_64="POT_LDA_PAW_64", - PW91="POT_GGA_PAW_PW91", - LDA_US="POT_LDA_US", - PW91_US="POT_GGA_US_PW91", - Perdew_Zunger81="POT_LDA_PAW", - ) + functional_dir = { + "PBE": "POT_GGA_PAW_PBE", + "PBE_52": "POT_GGA_PAW_PBE_52", + "PBE_52_W_HASH": "POTPAW_PBE_52", + "PBE_54": "POT_GGA_PAW_PBE_54", + "PBE_54_W_HASH": "POTPAW_PBE_54", + "PBE_64": "POT_PAW_PBE_64", + "LDA": "POT_LDA_PAW", + "LDA_52": "POT_LDA_PAW_52", + "LDA_52_W_HASH": "POTPAW_LDA_52", + "LDA_54": "POT_LDA_PAW_54", + "LDA_54_W_HASH": "POTPAW_LDA_54", + "LDA_64": "POT_LDA_PAW_64", + "PW91": "POT_GGA_PAW_PW91", + "LDA_US": "POT_LDA_US", + "PW91_US": "POT_GGA_US_PW91", + "Perdew_Zunger81": "POT_LDA_PAW", + } functional_tags = { "pe": {"name": "PBE", "class": "GGA"}, @@ -1666,95 +1720,92 @@ class PotcarSingle: "wi": {"name": "Wigner Interpolation", "class": "LDA"}, } - parse_functions = dict( - LULTRA=_parse_bool, - LUNSCR=_parse_bool, - LCOR=_parse_bool, - LPAW=_parse_bool, - EATOM=_parse_float, - RPACOR=_parse_float, - POMASS=_parse_float, - ZVAL=_parse_float, - RCORE=_parse_float, - RWIGS=_parse_float, - ENMAX=_parse_float, - ENMIN=_parse_float, - EMMIN=_parse_float, - EAUG=_parse_float, - DEXC=_parse_float, - RMAX=_parse_float, - RAUG=_parse_float, - RDEP=_parse_float, - RDEPT=_parse_float, - QCUT=_parse_float, - QGAM=_parse_float, - RCLOC=_parse_float, - IUNSCR=_parse_int, - ICORE=_parse_int, - NDATA=_parse_int, - VRHFIN=str.strip, - LEXCH=str.strip, - TITEL=str.strip, - STEP=_parse_list, - RRKJ=_parse_list, - GGA=_parse_list, - SHA256=str.strip, - COPYR=str.strip, - ) - - # used for POTCAR validation + parse_functions = { + "LULTRA": _parse_bool, + "LUNSCR": _parse_bool, + "LCOR": _parse_bool, + "LPAW": _parse_bool, + "EATOM": _parse_float, + "RPACOR": _parse_float, + "POMASS": _parse_float, + "ZVAL": _parse_float, + "RCORE": _parse_float, + "RWIGS": _parse_float, + "ENMAX": _parse_float, + "ENMIN": _parse_float, + "EMMIN": _parse_float, + "EAUG": _parse_float, + "DEXC": _parse_float, + "RMAX": _parse_float, + "RAUG": _parse_float, + "RDEP": _parse_float, + "RDEPT": _parse_float, + "QCUT": _parse_float, + "QGAM": _parse_float, + "RCLOC": _parse_float, + "IUNSCR": _parse_int, + "ICORE": _parse_int, + "NDATA": _parse_int, + "VRHFIN": str.strip, + "LEXCH": str.strip, + "TITEL": str.strip, + "STEP": _parse_list, + "RRKJ": _parse_list, + "GGA": _parse_list, + "SHA256": str.strip, + "COPYR": str.strip, + } + + # Used for POTCAR validation _potcar_summary_stats = loadfn(POTCAR_STATS_PATH) def __init__(self, data: str, symbol: str | None = None) -> None: """ Args: - data (str): Complete and single POTCAR file as a string. - symbol (str): POTCAR symbol corresponding to the filename suffix e.g. "Tm_3" for POTCAR.TM_3". - If not given, pymatgen will attempt to extract the symbol from the file itself. This is - not always reliable! + data (str): Complete, single and raw POTCAR file as a string. + symbol (str): POTCAR symbol corresponding to the filename suffix + e.g. "Tm_3" for POTCAR.TM_3". + If not given, pymatgen will attempt to extract the symbol + from the file itself, but is not always reliable! """ - self.data = data # raw POTCAR as a string + self.data = data - # VASP parses header in vasprun.xml and this differs from the titel + # VASP parses header in vasprun.xml and this differs from the TITEL self.header = data.split("\n")[0].strip() match = re.search(r"(?s)(parameters from PSCTR are:.*?END of PSCTR-controll parameters)", data) - search_lines = match.group(1) if match else "" + search_lines = match[1] if match else "" keywords = {} for key, val in re.findall(r"(\S+)\s*=\s*(.*?)(?=;|$)", search_lines, flags=re.MULTILINE): try: - keywords[key] = self.parse_functions[key](val) # type: ignore + keywords[key] = self.parse_functions[key](val) # type: ignore[operator] except KeyError: warnings.warn(f"Ignoring unknown variable type {key}") PSCTR: dict[str, Any] = {} array_search = re.compile(r"(-*[0-9.]+)") - orbitals = [] - descriptions = [] - atomic_config_match = re.search(r"(?s)Atomic configuration(.*?)Description", search_lines) - if atomic_config_match: - lines = atomic_config_match.group(1).splitlines() + orbitals: list[Orbital] = [] + descriptions: list[OrbitalDescription] = [] + if atomic_config_match := re.search(r"(?s)Atomic configuration(.*?)Description", search_lines): + lines = atomic_config_match[1].splitlines() match = re.search(r"([0-9]+)", lines[1]) - num_entries = int(match.group(1)) if match else 0 + num_entries = int(match[1]) if match else 0 PSCTR["nentries"] = num_entries for line in lines[3:]: - orbit = array_search.findall(line) - if orbit: + if orbit := array_search.findall(line): orbitals.append( Orbital(int(orbit[0]), int(orbit[1]), float(orbit[2]), float(orbit[3]), float(orbit[4])) ) PSCTR["Orbitals"] = tuple(orbitals) - description_string = re.search( + if description_string := re.search( r"(?s)Description\s*\n(.*?)Error from kinetic energy argument \(eV\)", search_lines, - ) - if description_string: - for line in description_string.group(1).splitlines(): - description = array_search.findall(line) - if description: + ): + for line in description_string[1].splitlines(): + if description := array_search.findall(line): descriptions.append( OrbitalDescription( int(description[0]), @@ -1775,13 +1826,13 @@ def __init__(self, data: str, symbol: str | None = None) -> None: ) rrkj_array = [] if rrkj_kinetic_energy_string: - for line in rrkj_kinetic_energy_string.group(1).splitlines(): + for line in rrkj_kinetic_energy_string[1].splitlines(): if "=" not in line: rrkj_array += _parse_list(line.strip("\n")) if rrkj_array: PSCTR["RRKJ"] = tuple(rrkj_array) - self.keywords = dict(sorted({**PSCTR, **keywords}.items())) + self.keywords = dict(sorted((PSCTR | keywords).items())) if symbol: self._symbol = symbol @@ -1793,7 +1844,6 @@ def __init__(self, data: str, symbol: str | None = None) -> None: # Compute the POTCAR meta to check them against the database of known metadata, # and possibly SHA256 hashes contained in the file itself. - if not self.is_valid: warnings.warn( f"POTCAR data with symbol {self.symbol} is not known to pymatgen. Your " @@ -1801,15 +1851,38 @@ def __init__(self, data: str, symbol: str | None = None) -> None: UnknownPotcarWarning, ) + def __eq__(self, other: object) -> bool: + if not isinstance(other, PotcarSingle): + return NotImplemented + return self.data == other.data and self.keywords == other.keywords + + def __getattr__(self, attr: str) -> Any: + """Delegates attributes to keywords. For example, you can use potcarsingle.enmax to get the ENMAX of the POTCAR. + + For float type properties, they are converted to the correct float. By + default, all energies in eV and all length scales are in Angstroms. + """ + try: + return self.keywords[attr.upper()] + except Exception: + raise AttributeError(attr) + def __str__(self) -> str: return f"{self.data}\n" + def __repr__(self) -> str: + cls_name = type(self).__name__ + symbol, functional = self.symbol, self.functional + TITEL, VRHFIN, n_valence_elec = (self.keywords.get(key) for key in ("TITEL", "VRHFIN", "ZVAL")) + return f"{cls_name}({symbol=}, {functional=}, {TITEL=}, {VRHFIN=}, {n_valence_elec=:.0f})" + @property def electron_configuration(self) -> list[tuple[int, str, int]] | None: """Electronic configuration of the PotcarSingle.""" if not self.nelectrons.is_integer(): warnings.warn("POTCAR has non-integer charge, electron configuration not well-defined.") return None + el = Element.from_Z(self.atomic_no) full_config = el.full_electronic_structure nelect = self.nelectrons @@ -1820,89 +1893,13 @@ def electron_configuration(self) -> list[tuple[int, str, int]] | None: nelect -= e[-1] return config - def write_file(self, filename: str) -> None: - """Write PotcarSingle to a file. - - Args: - filename (str): Filename to write to. - """ - with zopen(filename, mode="wt") as file: - file.write(str(self)) - - def __eq__(self, other: object) -> bool: - if not isinstance(other, PotcarSingle): - return NotImplemented - return self.data == other.data and self.keywords == other.keywords - - def copy(self) -> PotcarSingle: - """Returns a copy of the PotcarSingle. - - Returns: - PotcarSingle - """ - return PotcarSingle(self.data, symbol=self.symbol) - - @classmethod - def from_file(cls, filename: str) -> Self: - """Reads PotcarSingle from file. - - Args: - filename: Filename. - - Returns: - PotcarSingle - """ - match = re.search(r"(?<=POTCAR\.)(.*)(?=.gz)", str(filename)) - symbol = match[0] if match else "" - - try: - with zopen(filename, mode="rt") as file: - return cls(file.read(), symbol=symbol or None) - except UnicodeDecodeError: - warnings.warn("POTCAR contains invalid unicode errors. We will attempt to read it by ignoring errors.") - - with codecs.open(filename, "r", encoding="utf-8", errors="ignore") as file: - return cls(file.read(), symbol=symbol or None) - - @classmethod - def from_symbol_and_functional(cls, symbol: str, functional: str | None = None) -> Self: - """Makes a PotcarSingle from a symbol and functional. - - Args: - symbol (str): Symbol, e.g., Li_sv - functional (str): Functional, e.g., PBE - - Returns: - PotcarSingle - """ - functional = functional or SETTINGS.get("PMG_DEFAULT_FUNCTIONAL", "PBE") - assert isinstance(functional, str) # mypy type narrowing - funcdir = cls.functional_dir[functional] - PMG_VASP_PSP_DIR = SETTINGS.get("PMG_VASP_PSP_DIR") - if PMG_VASP_PSP_DIR is None: - raise ValueError( - f"No POTCAR for {symbol} with {functional=} found. Please set the PMG_VASP_PSP_DIR in .pmgrc.yaml." - ) - paths_to_try = [ - os.path.join(PMG_VASP_PSP_DIR, funcdir, f"POTCAR.{symbol}"), - os.path.join(PMG_VASP_PSP_DIR, funcdir, symbol, "POTCAR"), - ] - for path in paths_to_try: - path = os.path.expanduser(path) - path = zpath(path) - if os.path.isfile(path): - return cls.from_file(path) - raise OSError( - f"You do not have the right POTCAR with {functional=} and {symbol=} " - f"in your {PMG_VASP_PSP_DIR=}. Paths tried: {paths_to_try}" - ) - @property def element(self) -> str: """Attempt to return the atomic symbol based on the VRHFIN keyword.""" element = self.keywords["VRHFIN"].split(":")[0].strip() try: return Element(element).symbol + except ValueError: # VASP incorrectly gives the element symbol for Xe as "X" # Some potentials, e.g., Zr_sv, gives the symbol as r. @@ -1917,17 +1914,17 @@ def atomic_no(self) -> int: @property def nelectrons(self) -> float: - """Number of electrons""" + """Number of electrons.""" return self.zval @property def symbol(self) -> str: - """The POTCAR symbol, e.g. W_pv""" + """The POTCAR symbol, e.g. W_pv.""" return self._symbol @property def potential_type(self) -> Literal["NC", "PAW", "US"]: - """Type of PSP. E.g., US, PAW, etc.""" + """Type of PSP: NC, US or PAW.""" if self.lultra: return "US" if self.lpaw: @@ -1940,57 +1937,357 @@ def functional(self) -> str | None: return self.functional_tags.get(self.LEXCH.lower(), {}).get("name") @property - def functional_class(self): + def functional_class(self) -> str | None: """Functional class associated with PotcarSingle.""" return self.functional_tags.get(self.LEXCH.lower(), {}).get("class") - def verify_potcar(self) -> tuple[bool, bool]: - """ - Attempts to verify the integrity of the POTCAR data. - - This method checks the whole file (removing only the SHA256 - metadata) against the SHA256 hash in the header if this is found. - If no SHA256 hash is found in the file, the file hash (md5 hash of the - whole file) is checked against all POTCAR file hashes known to pymatgen. + @property + def hash_sha256_from_file(self) -> str | None: + """SHA256 hash of the POTCAR file as read from the file. None if no SHA256 hash is found.""" + if sha256 := getattr(self, "SHA256", None): + return sha256.split()[0] + return None - Returns: - tuple[bool, bool]: has_sha256 and passed_hash_check are returned. - """ - if self.hash_sha256_from_file: - has_sha256 = True - hash_is_valid = self.hash_sha256_from_file == self.sha256_computed_file_hash - else: - has_sha256 = False - # if no sha256 hash is found in the POTCAR file, compare the whole - # file with known potcar file hashes. - md5_file_hash = self.md5_computed_file_hash - hash_is_valid = md5_file_hash in VASP_POTCAR_HASHES - return has_sha256, hash_is_valid + @property + def sha256_computed_file_hash(self) -> str: + """Computes a SHA256 hash of the PotcarSingle EXCLUDING lines starting with 'SHA256' and 'COPYR'.""" + # We have to remove lines with the hash itself and the copyright + # notice to get the correct hash. + potcar_list = self.data.split("\n") + potcar_to_hash = [line for line in potcar_list if not line.strip().startswith(("SHA256", "COPYR"))] + potcar_to_hash_str = "\n".join(potcar_to_hash) + return sha256(potcar_to_hash_str.encode("utf-8")).hexdigest() - def identify_potcar( - self, mode: Literal["data", "file"] = "data", data_tol: float = 1e-6 - ) -> tuple[list[str], list[str]]: - """ - Identify the symbol and compatible functionals associated with this PotcarSingle. + @property + def md5_computed_file_hash(self) -> str: + """MD5 hash of the entire PotcarSingle.""" + # usedforsecurity=False needed in FIPS mode (Federal Information Processing Standards) + # https://github.com/materialsproject/pymatgen/issues/2804 + md5 = hashlib.new("md5", usedforsecurity=False) # hashlib.md5(usedforsecurity=False) is py39+ + md5.update(self.data.encode("utf-8")) + return md5.hexdigest() - This method checks the summary statistics of either the POTCAR metadadata - (PotcarSingle._summary_stats[key]["header"] for key in ("keywords", "stats") ) - or the entire POTCAR file (PotcarSingle._summary_stats) against a database - of hashes for POTCARs distributed with VASP 5.4.4. + @property + def md5_header_hash(self) -> str: + """MD5 hash of the metadata defining the PotcarSingle.""" + hash_str = "" + for k, v in self.keywords.items(): + # For newer POTCARS we have to exclude 'SHA256' and 'COPYR lines + # since they were not used in the initial hashing + if k in {"nentries", "Orbitals", "SHA256", "COPYR"}: + continue + hash_str += f"{k}" + if isinstance(v, (bool, int)): + hash_str += f"{v}" + elif isinstance(v, float): + hash_str += f"{v:.3f}" + elif isinstance(v, (tuple, list)): + for item in v: + if isinstance(item, float): + hash_str += f"{item:.3f}" + elif isinstance(item, (Orbital, OrbitalDescription)): + for item_v in item: + if isinstance(item_v, (int, str)): + hash_str += f"{item_v}" + elif isinstance(item_v, float): + hash_str += f"{item_v:.3f}" + else: + hash_str += f"{item_v}" if item_v else "" + else: + hash_str += v.replace(" ", "") - Args: - mode ('data' | 'file'): 'data' mode checks the POTCAR header keywords and stats only - while 'file' mode checks the entire summary stats. - data_tol (float): Tolerance for comparing the summary statistics of the POTCAR - with the reference statistics. + self.hash_str = hash_str + # usedforsecurity=False needed in FIPS mode (Federal Information Processing Standards) + # https://github.com/materialsproject/pymatgen/issues/2804 + md5 = hashlib.new("md5", usedforsecurity=False) # hashlib.md5(usedforsecurity=False) is py39+ + md5.update(hash_str.lower().encode("utf-8")) + return md5.hexdigest() - Returns: - symbol (list): List of symbols associated with the PotcarSingle - potcar_functionals (list): List of potcar functionals associated with - the PotcarSingle + @property + def is_valid(self) -> bool: """ - if mode == "data": - check_modes = ["header"] + Check that POTCAR matches reference metadata. + Parsed metadata is stored in self._summary_stats as a human-readable dict, + self._summary_stats = { + "keywords": { + "header": list[str], + "data": list[str], + }, + "stats": { + "header": dict[float], + "data": dict[float], + }, + } + + Rationale: + Each POTCAR is structured as + Header (self.keywords) + Data (actual pseudopotential values in data blocks) + + For the Data block of POTCAR, there are unformatted data blocks + of unknown length and contents/data type, e.g., you might see + + + + ... + + ... + but this is impossible to process algorithmically without a full POTCAR schema. + Note also that POTCARs can contain **different** data keywords + + All keywords found in the header, essentially self.keywords, and the data block + ( above) are stored in self._summary_stats["keywords"] + + To avoid issues of copyright, statistics (mean, mean of abs vals, variance, max, min) + for the numeric values in the header and data sections of POTCAR are stored + in self._summary_stats["stats"] + + tol is then used to match statistical values within a tolerance + """ + + possible_potcar_matches = [] + # Some POTCARs have an LEXCH (functional used to generate the POTCAR) + # with the expected functional, e.g., the C_d POTCAR for PBE is actually an + # LDA pseudopotential. + + # Thus we have to look for matches in all POTCAR dirs, not just the ones with + # consistent values of LEXCH + for func in self.functional_dir: + for titel_no_spc in self._potcar_summary_stats[func]: + if self.TITEL.replace(" ", "") == titel_no_spc: + for potcar_subvariant in self._potcar_summary_stats[func][titel_no_spc]: + if self.VRHFIN.replace(" ", "") == potcar_subvariant["VRHFIN"]: + possible_potcar_matches.append( + { + "POTCAR_FUNCTIONAL": func, + "TITEL": titel_no_spc, + **potcar_subvariant, + } + ) + + def parse_fortran_style_str(input_str: str) -> str | bool | float | int: + """Parse any input string as bool, int, float, or failing that, str. + Used to parse FORTRAN-generated POTCAR files where it's unknown + a priori what type of data will be encountered. + """ + input_str = input_str.strip() + + if input_str.lower() in {"t", "f", "true", "false"}: + return input_str[0].lower() == "t" + + if input_str.upper() == input_str.lower() and input_str[0].isnumeric(): + # NB: fortran style floats always include a decimal point. + # While you can set, e.g., x = 1E4, you cannot print/write x without + # a decimal point: + # `write(6,*) x` --> `10000.0000` in stdout + # `write(6,'(E10.0)') x` --> segfault + # The (E10.0) means write an exponential-format number with 10 + # characters before the decimal, and 0 characters after + return float(input_str) if "." in input_str else int(input_str) + + try: + return float(input_str) + except ValueError: + return input_str + + psp_keys, psp_vals = [], [] + potcar_body = self.data.split("END of PSCTR-controll parameters\n")[1] + for row in re.split(r"\n+|;", potcar_body): # FORTRAN allows ; to delimit multiple lines merged into 1 line + tmp_str = "" + for raw_val in row.split(): + parsed_val = parse_fortran_style_str(raw_val) + if isinstance(parsed_val, str): + tmp_str += parsed_val.strip() + elif isinstance(parsed_val, (float, int)): + psp_vals.append(parsed_val) + if len(tmp_str) > 0: + psp_keys.append(tmp_str.lower()) + + keyword_vals = [] + for kwd in self.keywords: + val = self.keywords[kwd] + if isinstance(val, bool): + # has to come first since bools are also ints + keyword_vals.append(1.0 if val else 0.0) + elif isinstance(val, (float, int)): + keyword_vals.append(val) + elif hasattr(val, "__len__"): + keyword_vals += [num for num in val if isinstance(num, (float, int))] + + def data_stats(data_list: Sequence) -> dict: + """Used for hash-less and therefore less brittle POTCAR validity checking.""" + arr = np.array(data_list) + return { + "MEAN": np.mean(arr), + "ABSMEAN": np.mean(np.abs(arr)), + "VAR": np.mean(arr**2), + "MIN": arr.min(), + "MAX": arr.max(), + } + + # NB: to add future summary stats in a way that's consistent with PMG, + # it's easiest to save the summary stats as an attr of PotcarSingle + self._summary_stats: dict[str, dict] = { # for this PotcarSingle instance + "keywords": { + "header": [kwd.lower() for kwd in self.keywords], + "data": psp_keys, + }, + "stats": { + "header": data_stats(keyword_vals), + "data": data_stats(psp_vals), + }, + } + + data_match_tol: float = 1e-6 + for ref_psp in possible_potcar_matches: + key_match = all( + set(ref_psp["keywords"][key]) == set(self._summary_stats["keywords"][key]) for key in ["header", "data"] + ) + + data_diff = [ + abs(ref_psp["stats"][key][stat] - self._summary_stats["stats"][key][stat]) + for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"] + for key in ["header", "data"] + ] + data_match = all(np.array(data_diff) < data_match_tol) + + if key_match and data_match: + return True + + return False + + def write_file(self, filename: str) -> None: + """Write PotcarSingle to a file. + + Args: + filename (str): Filename to write to. + """ + with zopen(filename, mode="wt") as file: + file.write(str(self)) + + def copy(self) -> PotcarSingle: + """Return a copy of the PotcarSingle. + + Returns: + PotcarSingle + """ + return PotcarSingle(self.data, symbol=self.symbol) + + @classmethod + def from_file(cls, filename: PathLike) -> Self: + """Read PotcarSingle from file. + + Args: + filename: Filename. + + Returns: + PotcarSingle + """ + match = re.search(r"(?<=POTCAR\.)(.*)(?=.gz)", str(filename)) + symbol = match[0] if match else "" + + try: + with zopen(filename, mode="rt") as file: + return cls(file.read(), symbol=symbol or None) + + except UnicodeDecodeError: + warnings.warn("POTCAR contains invalid unicode errors. We will attempt to read it by ignoring errors.") + + with codecs.open(str(filename), "r", encoding="utf-8", errors="ignore") as file: + return cls(file.read(), symbol=symbol or None) + + @classmethod + def from_symbol_and_functional( + cls, + symbol: str, + functional: str | None = None, + ) -> Self: + """Make a PotcarSingle from a symbol and functional. + + Args: + symbol (str): Symbol, e.g., Li_sv + functional (str): Functional, e.g., PBE + + Returns: + PotcarSingle + """ + functional = functional or SETTINGS.get("PMG_DEFAULT_FUNCTIONAL", "PBE") + if functional is None: + raise ValueError("Cannot get functional.") + + funcdir = cls.functional_dir[functional] + PMG_VASP_PSP_DIR = SETTINGS.get("PMG_VASP_PSP_DIR") + if PMG_VASP_PSP_DIR is None: + raise ValueError( + f"No POTCAR for {symbol} with {functional=} found. Please set the PMG_VASP_PSP_DIR in .pmgrc.yaml." + ) + + paths_to_try: list[str] = [ + os.path.join(PMG_VASP_PSP_DIR, funcdir, f"POTCAR.{symbol}"), + os.path.join(PMG_VASP_PSP_DIR, funcdir, symbol, "POTCAR"), + ] + for path in paths_to_try: + path = os.path.expanduser(path) + path = zpath(path) + if os.path.isfile(path): + return cls.from_file(path) + + raise RuntimeError( + f"You do not have the right POTCAR with {functional=} and {symbol=} " + f"in your {PMG_VASP_PSP_DIR=}. Paths tried: {paths_to_try}" + ) + + def verify_potcar(self) -> tuple[bool, bool]: + """ + Attempt to verify the integrity of the POTCAR data. + + This method checks the whole file (removing only the SHA256 + metadata) against the SHA256 hash in the header if this is found. + If no SHA256 hash is found in the file, the file hash (md5 hash of the + whole file) is checked against all POTCAR file hashes known to pymatgen. + + Returns: + tuple[bool, bool]: has_sha256 and passed_hash_check. + """ + if self.hash_sha256_from_file: + has_sha256 = True + hash_is_valid = self.hash_sha256_from_file == self.sha256_computed_file_hash + + else: + has_sha256 = False + # If no sha256 hash is found in the POTCAR file, compare the whole + # file with known potcar file hashes. + md5_file_hash = self.md5_computed_file_hash + hash_is_valid = md5_file_hash in VASP_POTCAR_HASHES + + return has_sha256, hash_is_valid + + def identify_potcar( + self, + mode: Literal["data", "file"] = "data", + data_tol: float = 1e-6, + ) -> tuple[list[str], list[str]]: + """ + Identify the symbol and compatible functionals associated with this PotcarSingle. + + This method checks the summary statistics of either the POTCAR metadadata + (PotcarSingle._summary_stats[key]["header"] for key in ("keywords", "stats") ) + or the entire POTCAR file (PotcarSingle._summary_stats) against a database + of hashes for POTCARs distributed with VASP 5.4.4. + + Args: + mode ("data" or "file"): "data" mode checks the POTCAR header keywords + and stats only while "file" mode checks the entire summary stats. + data_tol (float): Tolerance for comparing the summary statistics of the POTCAR + with the reference statistics. + + Returns: + symbol (list): List of symbols associated with the PotcarSingle + potcar_functionals (list): List of potcar functionals associated with + the PotcarSingle + """ + if mode == "data": + check_modes = ["header"] elif mode == "file": check_modes = ["header", "data"] else: @@ -2003,12 +2300,11 @@ def identify_potcar( continue key_match = all( - set(ref_psp["keywords"][key]) == set(self._summary_stats["keywords"][key]) # type: ignore[index] - for key in check_modes + set(ref_psp["keywords"][key]) == set(self._summary_stats["keywords"][key]) for key in check_modes ) data_diff = [ - abs(ref_psp["stats"][key][stat] - self._summary_stats["stats"][key][stat]) # type: ignore[index] + abs(ref_psp["stats"][key][stat] - self._summary_stats["stats"][key][stat]) for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"] for key in check_modes ] @@ -2021,23 +2317,26 @@ def identify_potcar( for key, values in identity.items(): if len(values) == 0: - # the two keys are set simultaneously, either key being zero indicates no match + # The two keys are set simultaneously, either key being zero indicates no match return [], [] identity[key] = list(set(values)) return identity["potcar_functionals"], identity["potcar_symbols"] - def identify_potcar_hash_based(self, mode: Literal["data", "file"] = "data"): + def identify_potcar_hash_based( + self, + mode: Literal["data", "file"] = "data", + ) -> tuple[list[str], list[str]]: """ Identify the symbol and compatible functionals associated with this PotcarSingle. - This method checks the md5 hash of either the POTCAR metadadata (PotcarSingle.md5_header_hash) + This method checks the MD5 hash of either the POTCAR metadadata (PotcarSingle.md5_header_hash) or the entire POTCAR file (PotcarSingle.md5_computed_file_hash) against a database of hashes for POTCARs distributed with VASP 5.4.4. Args: - mode ('data' | 'file'): 'data' mode checks the hash of the POTCAR metadata in self.keywords, - while 'file' mode checks the hash of the entire POTCAR file. + mode ("data" | "file"): "data" mode checks the hash of the POTCAR metadata in self.keywords, + while "file" mode checks the hash of the entire POTCAR file. Returns: symbol (list): List of symbols associated with the PotcarSingle @@ -2141,255 +2440,23 @@ def identify_potcar_hash_based(self, mode: Literal["data", "file"] = "data"): raise ValueError(f"Bad {mode=}. Choose 'data' or 'file'.") if identity := hash_db.get(potcar_hash): - # convert the potcar_functionals from the .json dict into the functional + # Convert the potcar_functionals from the .json dict into the functional # keys that pymatgen uses potcar_functionals = [*{mapping_dict[i]["pymatgen_key"] for i in identity["potcar_functionals"]}] return potcar_functionals, identity["potcar_symbols"] return [], [] - @property - def hash_sha256_from_file(self) -> str | None: - """SHA256 hash of the POTCAR file as read from the file. None if no SHA256 hash is found.""" - if sha256 := getattr(self, "SHA256", None): - return sha256.split()[0] - return None - - @property - def sha256_computed_file_hash(self) -> str: - """Computes a SHA256 hash of the PotcarSingle EXCLUDING lines starting with 'SHA256' and 'COPYR'.""" - # we have to remove lines with the hash itself and the copyright - # notice to get the correct hash. - potcar_list = self.data.split("\n") - potcar_to_hash = [line for line in potcar_list if not line.strip().startswith(("SHA256", "COPYR"))] - potcar_to_hash_str = "\n".join(potcar_to_hash) - return sha256(potcar_to_hash_str.encode("utf-8")).hexdigest() - - @property - def md5_computed_file_hash(self) -> str: - """md5 hash of the entire PotcarSingle.""" - # usedforsecurity=False needed in FIPS mode (Federal Information Processing Standards) - # https://github.com/materialsproject/pymatgen/issues/2804 - md5 = hashlib.new("md5", usedforsecurity=False) # hashlib.md5(usedforsecurity=False) is py39+ - md5.update(self.data.encode("utf-8")) - return md5.hexdigest() - - @property - def md5_header_hash(self) -> str: - """Computes a md5 hash of the metadata defining the PotcarSingle.""" - hash_str = "" - for k, v in self.keywords.items(): - # for newer POTCARS we have to exclude 'SHA256' and 'COPYR lines - # since they were not used in the initial hashing - if k in ("nentries", "Orbitals", "SHA256", "COPYR"): - continue - hash_str += f"{k}" - if isinstance(v, (bool, int)): - hash_str += f"{v}" - elif isinstance(v, float): - hash_str += f"{v:.3f}" - elif isinstance(v, (tuple, list)): - for item in v: - if isinstance(item, float): - hash_str += f"{item:.3f}" - elif isinstance(item, (Orbital, OrbitalDescription)): - for item_v in item: - if isinstance(item_v, (int, str)): - hash_str += f"{item_v}" - elif isinstance(item_v, float): - hash_str += f"{item_v:.3f}" - else: - hash_str += f"{item_v}" if item_v else "" - else: - hash_str += v.replace(" ", "") - - self.hash_str = hash_str - # usedforsecurity=False needed in FIPS mode (Federal Information Processing Standards) - # https://github.com/materialsproject/pymatgen/issues/2804 - md5 = hashlib.new("md5", usedforsecurity=False) # hashlib.md5(usedforsecurity=False) is py39+ - md5.update(hash_str.lower().encode("utf-8")) - return md5.hexdigest() - - @property - def is_valid(self) -> bool: - """ - Check that POTCAR matches reference metadata. - Parsed metadata is stored in self._summary_stats as a human-readable dict, - self._summary_stats = { - "keywords": { - "header": list[str], - "data": list[str], - }, - "stats": { - "header": dict[float], - "data": dict[float], - }, - } - - Rationale: - Each POTCAR is structured as - Header (self.keywords) - Data (actual pseudopotential values in data blocks) - - For the Data block of POTCAR, there are unformatted data blocks - of unknown length and contents/data type, e.g., you might see - - - - ... - - ... - but this is impossible to process algorithmically without a full POTCAR schema. - Note also that POTCARs can contain **different** data keywords - - All keywords found in the header, essentially self.keywords, and the data block - ( above) are stored in self._summary_stats["keywords"] - - To avoid issues of copyright, statistics (mean, mean of abs vals, variance, max, min) - for the numeric values in the header and data sections of POTCAR are stored - in self._summary_stats["stats"] - - tol is then used to match statistical values within a tolerance - """ - - possible_potcar_matches = [] - # Some POTCARs have an LEXCH (functional used to generate the POTCAR) - # with the expected functional, e.g., the C_d POTCAR for PBE is actually an - # LDA pseudopotential. - - # Thus we have to look for matches in all POTCAR dirs, not just the ones with - # consistent values of LEXCH - for func in self.functional_dir: - for titel_no_spc in self._potcar_summary_stats[func]: - if self.TITEL.replace(" ", "") == titel_no_spc: - for potcar_subvariant in self._potcar_summary_stats[func][titel_no_spc]: - if self.VRHFIN.replace(" ", "") == potcar_subvariant["VRHFIN"]: - possible_potcar_matches.append( - { - "POTCAR_FUNCTIONAL": func, - "TITEL": titel_no_spc, - **potcar_subvariant, - } - ) - - def parse_fortran_style_str(input_str: str) -> Any: - """Parse any input string as bool, int, float, or failing that, str. - Used to parse FORTRAN-generated POTCAR files where it's unknown - a priori what type of data will be encountered. - """ - input_str = input_str.strip() - - if input_str.lower() in {"t", "f", "true", "false"}: - return input_str[0].lower() == "t" - - if input_str.upper() == input_str.lower() and input_str[0].isnumeric(): - if "." in input_str: - # NB: fortran style floats always include a decimal point. - # While you can set, e.g., x = 1E4, you cannot print/write x without - # a decimal point: - # `write(6,*) x` --> `10000.0000` in stdout - # `write(6,'(E10.0)') x` --> segfault - # The (E10.0) means write an exponential-format number with 10 - # characters before the decimal, and 0 characters after - return float(input_str) - return int(input_str) - try: - return float(input_str) - except ValueError: - return input_str - - psp_keys, psp_vals = [], [] - potcar_body = self.data.split("END of PSCTR-controll parameters\n")[1] - for row in re.split(r"\n+|;", potcar_body): # FORTRAN allows ; to delimit multiple lines merged into 1 line - tmp_str = "" - for raw_val in row.split(): - parsed_val = parse_fortran_style_str(raw_val) - if isinstance(parsed_val, str): - tmp_str += parsed_val.strip() - elif isinstance(parsed_val, (float, int)): - psp_vals.append(parsed_val) - if len(tmp_str) > 0: - psp_keys.append(tmp_str.lower()) - - keyword_vals = [] - for kwd in self.keywords: - val = self.keywords[kwd] - if isinstance(val, bool): - # has to come first since bools are also ints - keyword_vals.append(1.0 if val else 0.0) - elif isinstance(val, (float, int)): - keyword_vals.append(val) - elif hasattr(val, "__len__"): - keyword_vals += [num for num in val if isinstance(num, (float, int))] - - def data_stats(data_list: Sequence) -> dict: - """Used for hash-less and therefore less brittle POTCAR validity checking.""" - arr = np.array(data_list) - return { - "MEAN": np.mean(arr), - "ABSMEAN": np.mean(np.abs(arr)), - "VAR": np.mean(arr**2), - "MIN": arr.min(), - "MAX": arr.max(), - } - - # NB: to add future summary stats in a way that's consistent with PMG, - # it's easiest to save the summary stats as an attr of PotcarSingle - self._summary_stats = { # for this PotcarSingle instance - "keywords": { - "header": [kwd.lower() for kwd in self.keywords], - "data": psp_keys, - }, - "stats": { - "header": data_stats(keyword_vals), - "data": data_stats(psp_vals), - }, - } - - data_match_tol = 1e-6 - for ref_psp in possible_potcar_matches: - key_match = all( - set(ref_psp["keywords"][key]) == set(self._summary_stats["keywords"][key]) # type: ignore - for key in ["header", "data"] - ) - - data_diff = [ - abs(ref_psp["stats"][key][stat] - self._summary_stats["stats"][key][stat]) # type: ignore - for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"] - for key in ["header", "data"] - ] - data_match = all(np.array(data_diff) < data_match_tol) - - if key_match and data_match: - return True - - return False - - def __getattr__(self, attr: str) -> Any: - """Delegates attributes to keywords. For example, you can use potcarsingle.enmax to get the ENMAX of the POTCAR. - - For float type properties, they are converted to the correct float. By - default, all energies in eV and all length scales are in Angstroms. - """ - try: - return self.keywords[attr.upper()] - except Exception: - raise AttributeError(attr) - - def __repr__(self) -> str: - cls_name = type(self).__name__ - symbol, functional = self.symbol, self.functional - TITEL, VRHFIN, n_valence_elec = (self.keywords.get(key) for key in ("TITEL", "VRHFIN", "ZVAL")) - return f"{cls_name}({symbol=}, {functional=}, {TITEL=}, {VRHFIN=}, {n_valence_elec=:.0f})" - def _gen_potcar_summary_stats( - append: bool = False, vasp_psp_dir: str | None = None, summary_stats_filename: str | None = POTCAR_STATS_PATH + append: bool = False, + vasp_psp_dir: str | None = None, + summary_stats_filename: str | None = POTCAR_STATS_PATH, ): """ - This function is intended for internal use only. It regenerates the reference data in - potcar-summary-stats.json.bz2 used to validate POTCARs by comparing header values and - several statistics of copyrighted POTCAR data without having to record the POTCAR data itself. + Regenerate the reference data in potcar-summary-stats.json.bz2 used to validate POTCARs + by comparing header values and several statistics of copyrighted POTCAR data without + having to record the POTCAR data itself. THIS FUNCTION IS DESTRUCTIVE. It will completely overwrite potcar-summary-stats.json.bz2. @@ -2410,9 +2477,9 @@ def _gen_potcar_summary_stats( else: warnings.warn(f"missing {func_dir} POTCAR directory") - # use append = True if a new POTCAR library is released to add new summary stats + # Use append = True if a new POTCAR library is released to add new summary stats # without completely regenerating the dict of summary stats - # use append = False to completely regenerate the summary stats dict + # Use append = False to completely regenerate the summary stats dict new_summary_stats = loadfn(summary_stats_filename) if append else {} for func, func_dir in func_dir_exist.items(): @@ -2423,7 +2490,7 @@ def _gen_potcar_summary_stats( psp = PotcarSingle.from_file(potcar) titel_key = psp.TITEL.replace(" ", "") - # some POTCARs have the same TITEL, but are named differently + # Some POTCARs have the same TITEL, but are named differently # e.g., there is an "original" PBE POTCAR.Fe_pv and a POTCAR.Fe_pv_new # which share a TITEL but differ in their contents if titel_key not in new_summary_stats[func]: @@ -2446,7 +2513,7 @@ def _gen_potcar_summary_stats( class Potcar(list, MSONable): """ - Object for reading and writing POTCAR files for calculations. Consists of a + Read and write POTCAR files for calculations. Consists of a list of PotcarSingle. """ @@ -2480,14 +2547,31 @@ def __init__( if symbols is not None: self.set_symbols(symbols, functional, sym_potcar_map) + def __str__(self) -> str: + return "\n".join(str(potcar).strip("\n") for potcar in self) + "\n" + def __iter__(self) -> Iterator[PotcarSingle]: """Boilerplate code. Only here to supply type hint so `for psingle in Potcar()` is correctly inferred as PotcarSingle """ return super().__iter__() - def as_dict(self): - """MSONable dict representation""" + @property + def symbols(self) -> list[str]: + """Get the atomic symbols of all the atoms in the POTCAR file.""" + return [psingle.symbol for psingle in self] + + @symbols.setter + def symbols(self, symbols: Sequence[str]) -> None: + self.set_symbols(symbols, functional=self.functional) + + @property + def spec(self) -> list[dict]: + """Get the atomic symbols and hash of all the atoms in the POTCAR file.""" + return [{"symbol": psingle.symbol, "hash": psingle.md5_computed_file_hash} for psingle in self] + + def as_dict(self) -> dict: + """MSONable dict representation.""" return { "functional": self.functional, "symbols": self.symbols, @@ -2496,7 +2580,7 @@ def as_dict(self): } @classmethod - def from_dict(cls, dct) -> Self: + def from_dict(cls, dct: dict) -> Self: """ Args: dct (dict): Dict representation. @@ -2507,7 +2591,7 @@ def from_dict(cls, dct) -> Self: return Potcar(symbols=dct["symbols"], functional=dct["functional"]) @classmethod - def from_file(cls, filename: str) -> Self: + def from_file(cls, filename: PathLike) -> Self: """ Reads Potcar from file. @@ -2519,49 +2603,37 @@ def from_file(cls, filename: str) -> Self: """ with zopen(filename, mode="rt") as file: fdata = file.read() - potcar = cls() - functionals = [] + potcar = cls() + functionals: list[str | None] = [] for psingle_str in fdata.split("End of Dataset"): if p_strip := psingle_str.strip(): psingle = PotcarSingle(p_strip + "\nEnd of Dataset\n") potcar.append(psingle) functionals.append(psingle.functional) + if len(set(functionals)) != 1: raise ValueError("File contains incompatible functionals!") + potcar.functional = functionals[0] return potcar - def __str__(self) -> str: - return "\n".join(str(potcar).strip("\n") for potcar in self) + "\n" - - def write_file(self, filename: str) -> None: + def write_file(self, filename: PathLike) -> None: """ Write Potcar to a file. Args: - filename (str): filename to write to. + filename (PathLike): filename to write to. """ with zopen(filename, mode="wt") as file: file.write(str(self)) - @property - def symbols(self): - """Get the atomic symbols of all the atoms in the POTCAR file.""" - return [psingle.symbol for psingle in self] - - @symbols.setter - def symbols(self, symbols): - self.set_symbols(symbols, functional=self.functional) - - @property - def spec(self): - """Get the atomic symbols and hash of all the atoms in the POTCAR file.""" - return [{"symbol": psingle.symbol, "hash": psingle.md5_computed_file_hash} for psingle in self] - def set_symbols( - self, symbols: Sequence[str], functional: str | None = None, sym_potcar_map: dict[str, str] | None = None - ): + self, + symbols: Sequence[str], + functional: str | None = None, + sym_potcar_map: dict[str, str] | None = None, + ) -> None: """ Initialize the POTCAR from a set of symbols. Currently, the POTCARs can be fetched from a location specified in .pmgrc.yaml. Use pmg config @@ -2577,6 +2649,7 @@ def set_symbols( the given map data rather than the config file location. """ del self[:] + if sym_potcar_map: self.extend(PotcarSingle(sym_potcar_map[el]) for el in symbols) else: @@ -2600,7 +2673,7 @@ def __init__( **kwargs, ) -> None: """ - Initializes a VaspInput object with the given input files. + Initialize a VaspInput object with the given input files. Args: incar (Incar): The Incar object. @@ -2617,13 +2690,13 @@ def __init__( if optional_files is not None: self.update(optional_files) - def __str__(self): - output = [] + def __str__(self) -> str: + output: list = [] for key, val in self.items(): output.extend((key, str(val), "")) return "\n".join(output) - def as_dict(self): + def as_dict(self) -> dict: """MSONable dict.""" dct = {key: val.as_dict() for key, val in self.items()} dct["@module"] = type(self).__module__ @@ -2647,35 +2720,44 @@ def from_dict(cls, dct: dict) -> Self: sub_dct["optional_files"][key] = MontyDecoder().process_decoded(val) return cls(**sub_dct) # type: ignore[arg-type] - def write_input(self, output_dir=".", make_dir_if_not_present=True): + def write_input( + self, + output_dir: PathLike = ".", + make_dir_if_not_present: bool = True, + ) -> None: """ - Write VASP input to a directory. + Write VASP inputs to a directory. Args: - output_dir (str): Directory to write to. Defaults to current - directory ("."). + output_dir (PathLike): Directory to write to. + Defaults to current directory ("."). make_dir_if_not_present (bool): Create the directory if not present. Defaults to True. """ - if make_dir_if_not_present: - os.makedirs(output_dir, exist_ok=True) - for k, v in self.items(): - if v is not None: - with zopen(os.path.join(output_dir, k), mode="wt") as file: - file.write(str(v)) + if not os.path.isdir(output_dir) and make_dir_if_not_present: + os.makedirs(output_dir) + + for key, value in self.items(): + if value is not None: + with zopen(os.path.join(output_dir, key), mode="wt") as file: + file.write(str(value)) @classmethod - def from_directory(cls, input_dir: str, optional_files: dict | None = None) -> Self: + def from_directory( + cls, + input_dir: PathLike, + optional_files: dict | None = None, + ) -> Self: """ - Read in a set of VASP input from a directory. Note that only the + Read in a set of VASP inputs from a directory. Note that only the standard INCAR, POSCAR, POTCAR and KPOINTS files are read unless optional_filenames is specified. Args: - input_dir (str): Directory to read VASP input from. - optional_files (dict): Optional files to read in as well as a - dict of {filename: Object type}. Object type must have a - static method from_file. + input_dir (PathLike): Directory to read VASP input from. + optional_files (dict): Optional files to read in as a + dict of {filename: Object type}. Objects must have + from_file method. """ sub_dct = {} for fname, ftype in [ @@ -2696,7 +2778,7 @@ def from_directory(cls, input_dir: str, optional_files: dict | None = None) -> S return cls(**sub_dct) - def copy(self, deep: bool = True): + def copy(self, deep: bool = True) -> VaspInput: """Deep copy of VaspInput.""" if deep: return self.from_dict(self.as_dict()) diff --git a/pymatgen/io/vasp/optics.py b/pymatgen/io/vasp/optics.py index 35ccd1a3cdf..9cf3f928bde 100644 --- a/pymatgen/io/vasp/optics.py +++ b/pymatgen/io/vasp/optics.py @@ -4,7 +4,7 @@ import itertools from dataclasses import dataclass -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, overload import numpy as np import scipy.constants @@ -16,21 +16,21 @@ from pymatgen.io.vasp.outputs import Vasprun, Waveder if TYPE_CHECKING: - from pathlib import Path - from numpy.typing import ArrayLike, NDArray from typing_extensions import Self + from pymatgen.util.typing import PathLike + __author__ = "Jimmy-Xuan Shen" __copyright__ = "Copyright 2022, The Materials Project" __maintainer__ = "Jimmy-Xuan Shen" __email__ = "jmmshn@gmail.com" -au2ang = scipy.constants.physical_constants["atomic unit of length"][0] / 1e-10 -ryd2ev = scipy.constants.physical_constants["Rydberg constant times hc in eV"][0] -edeps = 4 * np.pi * 2 * ryd2ev * au2ang # from constant.inc in VASP +au2ang: float = scipy.constants.physical_constants["atomic unit of length"][0] / 1e-10 +ryd2ev: float = scipy.constants.physical_constants["Rydberg constant times hc in eV"][0] +edeps: float = 4 * np.pi * 2 * ryd2ev * au2ang # from constant.inc in VASP -KB = scipy.constants.physical_constants["Boltzmann constant in eV/K"][0] +KB: float = scipy.constants.physical_constants["Boltzmann constant in eV/K"][0] @dataclass @@ -111,8 +111,10 @@ def from_vasp_objects(cls, vrun: Vasprun, waveder: Waveder) -> Self: ) @classmethod - def from_directory(cls, directory: Path | str) -> Self: - """Construct a DielectricFunction from a directory containing vasprun.xml and WAVEDER files.""" + def from_directory(cls, directory: PathLike) -> Self: + """Construct a DielectricFunction from a directory containing + vasprun.xml and WAVEDER files. + """ def _try_reading(dtypes): """Return None if failed.""" @@ -163,37 +165,51 @@ def get_epsilon( mask: Mask for the bands/kpoint/spin index to include in the calculation """ - def _use_default(param, default): + @overload + def _use_default(param: int | None, default: int) -> int: + pass + + @overload + def _use_default(param: float | None, default: float) -> float: + pass + + def _use_default(param: float | None, default: float) -> float: return param if param is not None else default - efermi = _use_default(efermi, self.efermi) - nedos = _use_default(nedos, self.nedos) - deltae = _use_default(deltae, self.deltae) - ismear = _use_default(ismear, self.ismear) - sigma = _use_default(sigma, self.sigma) - cshift = _use_default(cshift, self.cshift) + _efermi = _use_default(efermi, self.efermi) + _nedos = _use_default(nedos, self.nedos) + _deltae = _use_default(deltae, self.deltae) + _ismear = _use_default(ismear, self.ismear) + _sigma = _use_default(sigma, self.sigma) + _cshift = _use_default(cshift, self.cshift) - egrid, eps_imag = epsilon_imag( # type: ignore + egrid, eps_imag = epsilon_imag( cder=self.cder, eigs=self.eigs, kweights=self.kweights, - efermi=efermi, # type: ignore - nedos=nedos, # type: ignore - deltae=deltae, # type: ignore - ismear=ismear, # type: ignore - sigma=sigma, # type: ignore + efermi=_efermi, + nedos=_nedos, + deltae=_deltae, + ismear=_ismear, + sigma=_sigma, idir=idir, jdir=jdir, mask=mask, ) # scaling constant: edeps * np.pi / structure.volume eps_in = eps_imag * edeps * np.pi / self.volume - eps = kramers_kronig(eps_in, nedos=nedos, deltae=deltae, cshift=cshift) # type: ignore + eps = kramers_kronig(eps_in, nedos=_nedos, deltae=_deltae, cshift=_cshift) if idir == jdir: eps += 1.0 + 0.0j return egrid, eps - def plot_weighted_transition_data(self, idir: int, jdir: int, mask: NDArray | None = None, min_val: float = 0.0): + def plot_weighted_transition_data( + self, + idir: int, + jdir: int, + mask: NDArray | None = None, + min_val: float = 0.0, + ): """Data for plotting the weight matrix elements as a scatter plot. Since the computation of the final spectrum (especially the smearing part) @@ -214,10 +230,12 @@ def plot_weighted_transition_data(self, idir: int, jdir: int, mask: NDArray | No norm_kweights = np.array(self.kweights) / np.sum(self.kweights) eigs_shifted = self.eigs - self.efermi rspin = 3 - cderm.shape[3] - # limit the first two indices based on the mask + + # Limit the first two indices based on the mask try: min_band0, max_band0 = np.min(np.where(cderm)[0]), np.max(np.where(cderm)[0]) min_band1, max_band1 = np.min(np.where(cderm)[1]), np.max(np.where(cderm)[1]) + except ValueError as exc: if "zero-size array" in str(exc): raise ValueError("No matrix elements found. Check the mask.") @@ -248,7 +266,7 @@ def plot_weighted_transition_data(self, idir: int, jdir: int, mask: NDArray | No return x_val, y_val, text -def delta_methfessel_paxton(x, n): +def delta_methfessel_paxton(x: NDArray, n: int) -> NDArray: """ D_n (x) = exp -x^2 * sum_i=0^n A_i H_2i(x) where H is a Hermite polynomial and @@ -260,7 +278,7 @@ def delta_methfessel_paxton(x, n): return np.exp(-(x * x)) * np.dot(A, H.T) -def step_methfessel_paxton(x, n): +def step_methfessel_paxton(x: NDArray, n: int) -> NDArray: """ S_n (x) = (1 + erf x)/2 - exp -x^2 * sum_i=1^n A_i H_{2i-1}(x) where H is a Hermite polynomial and @@ -272,7 +290,7 @@ def step_methfessel_paxton(x, n): return (1.0 + scipy.special.erf(x)) / 2.0 - np.exp(-(x * x)) * np.dot(A, H.T) -def delta_func(x, ismear): +def delta_func(x: NDArray, ismear: int) -> NDArray: """Replication of VASP's delta function.""" if ismear < -1: raise ValueError("Delta function not implemented for ismear < -1") @@ -283,7 +301,7 @@ def delta_func(x, ismear): return delta_methfessel_paxton(x, ismear) -def step_func(x, ismear): +def step_func(x: NDArray, ismear: int) -> NDArray: """Replication of VASP's step function.""" if ismear < -1: raise ValueError("Delta function not implemented for ismear < -1") @@ -294,7 +312,7 @@ def step_func(x, ismear): return step_methfessel_paxton(x, ismear) -def get_delta(x0: float, sigma: float, nx: int, dx: float, ismear: int = 3): +def get_delta(x0: float, sigma: float, nx: int, dx: float, ismear: int = 3) -> NDArray: """Get the smeared delta function to be added to form the spectrum. This replaces the `SLOT` function from VASP. Uses finite differences instead of @@ -319,7 +337,7 @@ def get_delta(x0: float, sigma: float, nx: int, dx: float, ismear: int = 3): return dfun -def get_step(x0, sigma, nx, dx, ismear): +def get_step(x0: float, sigma: float, nx: int, dx: float, ismear: int) -> float: """Get the smeared step function to be added to form the spectrum. This replaces the `SLOT` function from VASP. @@ -352,7 +370,7 @@ def epsilon_imag( idir: int, jdir: int, mask: NDArray | None = None, -): +) -> tuple[NDArray, NDArray]: """Replicate the EPSILON_IMAG function of VASP. Args: @@ -377,7 +395,7 @@ def epsilon_imag( # np.subtract.outer results in a matrix of shape (nband, nband) rspin = 3 - cder.shape[3] - # for the transition between two bands at one kpoint the contributions is: + # For the transition between two bands at one kpoint the contributions is: # (fermi[band_i] - fermi[band_j]) * rspin * normalized_kpoint_weight cderm = cder * mask if mask is not None else cder @@ -416,7 +434,7 @@ def epsilon_imag( def kramers_kronig( - eps: np.ndarray, + eps: NDArray, nedos: int, deltae: float, cshift: float = 0.1, @@ -429,7 +447,8 @@ def kramers_kronig( The output should be the complex dielectric function. Args: - eps: The dielectric function with the imaginary part stored as the real part and nothing in the imaginary part. + eps: The dielectric function with the imaginary part stored as the real part + and nothing in the imaginary part. nedos: The sampling of the energy values deltae: The energy grid spacing cshift: The shift of the imaginary part of the dielectric function. @@ -443,5 +462,3 @@ def kramers_kronig( csum = np.add.outer(egrid, egrid) + csfhit vals = -0.5 * ((eps / cdiff) - (np.conj(eps) / csum)) return np.sum(vals, axis=1) * 2 / np.pi * deltae - - # loop over that diff --git a/pymatgen/io/vasp/outputs.py b/pymatgen/io/vasp/outputs.py index deacb1231c2..3d36ca2c978 100644 --- a/pymatgen/io/vasp/outputs.py +++ b/pymatgen/io/vasp/outputs.py @@ -1583,7 +1583,7 @@ def __init__( parse_potcar_file: bool | str = False, occu_tol: float = 1e-8, separate_spins: bool = False, - ): + ) -> None: """ Args: filename: Filename to parse @@ -1823,7 +1823,7 @@ class Outcar: Authors: Rickard Armiento, Shyue Ping Ong """ - def __init__(self, filename): + def __init__(self, filename) -> None: """ Args: filename (str): OUTCAR filename to parse. @@ -1842,7 +1842,7 @@ def __init__(self, filename): mag_y = [] mag_z = [] header = [] - run_stats = {} + run_stats: dict[str, float | None] = {} total_mag = nelect = efermi = e_fr_energy = e_wo_entrp = e0 = None time_patt = re.compile(r"\((sec|kb)\)") @@ -1957,7 +1957,7 @@ def __init__(self, filename): else: mag = mag_x - # data from beginning of OUTCAR + # Data from beginning of OUTCAR run_stats["cores"] = None with zopen(filename, mode="rt") as file: for line in file: @@ -1982,7 +1982,7 @@ def __init__(self, filename): self.final_energy = e0 self.final_energy_wo_entrp = e_wo_entrp self.final_fr_energy = e_fr_energy - self.data = {} + self.data: dict = {} # Read "total number of plane waves", NPLWV: self.read_pattern( @@ -3616,7 +3616,7 @@ def write_spin(data_type): class Locpot(VolumetricData): """Simple object for reading a LOCPOT file.""" - def __init__(self, poscar: Poscar, data: np.ndarray, **kwargs) -> None: + def __init__(self, poscar: Poscar, data: np.ndarray, **kwargs): """ Args: poscar (Poscar): Poscar object containing structure. @@ -3642,22 +3642,23 @@ def from_file(cls, filename: str, **kwargs) -> Self: class Chgcar(VolumetricData): """Simple object for reading a CHGCAR file.""" - def __init__(self, poscar, data, data_aug=None): + def __init__(self, poscar, data, data_aug=None) -> None: """ Args: poscar (Poscar | Structure): Object containing structure. data: Actual data. data_aug: Augmentation charge data. """ - # allow for poscar or structure files to be passed + # Allow for poscar or structure files to be passed if isinstance(poscar, Poscar): struct = poscar.structure self.poscar = poscar - self.name = poscar.comment + self.name: str = poscar.comment elif isinstance(poscar, Structure): struct = poscar self.poscar = Poscar(poscar) - self.name = None + # TODO (@DanielYang59): use a default str name for the following? + self.name = None # type: ignore[assignment] else: raise TypeError("Unsupported POSCAR type.") @@ -3779,7 +3780,7 @@ def __init__(self, filename): n_kpoints = None n_bands = None n_ions = None - weights = [] + weights: list[float] = [] headers = None data = None phase_factors = None @@ -3802,9 +3803,11 @@ def __init__(self, filename): headers.pop(0) headers.pop(-1) - data = defaultdict(lambda: np.zeros((n_kpoints, n_bands, n_ions, len(headers)))) + data: dict[Spin, np.ndarray] = defaultdict( + lambda: np.zeros((n_kpoints, n_bands, n_ions, len(headers))) + ) - phase_factors = defaultdict( + phase_factors: dict[Spin, np.ndarray] = defaultdict( lambda: np.full((n_kpoints, n_bands, n_ions, len(headers)), np.nan, dtype=np.complex128) ) elif expr.match(line): @@ -3926,7 +3929,7 @@ def smart_convert(header, num): except ValueError: return "--" - header = [] + header: list = [] with zopen(filename, mode="rt") as fid: for line in fid: m = electronic_pattern.match(line.strip()) @@ -4050,9 +4053,9 @@ def __init__(self, filename, ionicstep_start=1, ionicstep_end=None, comment=None comment (str): Optional comment attached to this set of structures. """ preamble = None - coords_str = [] - structures = [] - preamble_done = False + coords_str: list = [] + structures: list = [] + preamble_done: bool = False if ionicstep_start < 1: raise ValueError("Start ionic step cannot be less than 1") if ionicstep_end is not None and ionicstep_end < 1: @@ -4242,7 +4245,7 @@ def write_file(self, filename, **kwargs): with zopen(filename, mode="wt") as file: file.write(self.get_str(**kwargs)) - def __str__(self): + def __str__(self) -> str: return self.get_str() @@ -4269,7 +4272,7 @@ def __init__(self, filename): lines = list(clean_lines(file.readlines())) self._nspecs, self._natoms, self._ndisps = map(int, lines[0].split()) self._masses = map(float, lines[1].split()) - self.data = defaultdict(dict) + self.data: dict[int, dict] = defaultdict(dict) atom, disp = None, None for idx, line in enumerate(lines[2:]): v = list(map(float, line.split())) @@ -4502,7 +4505,7 @@ def __init__(self, filename="WAVECAR", verbose=False, precision="normal", vasp_t self.kpoints = [] if spin == 2: self.coeffs = [[[None for _ in range(self.nb)] for _ in range(self.nk)] for _ in range(spin)] - self.band_energy = [[] for _ in range(spin)] + self.band_energy: list = [[] for _ in range(spin)] else: self.coeffs = [[None for i in range(self.nb)] for j in range(self.nk)] self.band_energy = [] @@ -4538,11 +4541,11 @@ def __init__(self, filename="WAVECAR", verbose=False, precision="normal", vasp_t np.fromfile(file, dtype=np.float64, count=(recl8 - 4 - 3 * self.nb) % recl8) if self.vasp_type is None: - self.Gpoints[ink], extra_gpoints, extra_coeff_inds = self._generate_G_points(kpoint, gamma=True) + self.Gpoints[ink], extra_gpoints, extra_coeff_inds = self._generate_G_points(kpoint, gamma=True) # type: ignore[call-overload] if len(self.Gpoints[ink]) == nplane: self.vasp_type = "gam" else: - self.Gpoints[ink], extra_gpoints, extra_coeff_inds = self._generate_G_points( + self.Gpoints[ink], extra_gpoints, extra_coeff_inds = self._generate_G_points( # type: ignore[call-overload] kpoint, gamma=False ) self.vasp_type = "std" if len(self.Gpoints[ink]) == nplane else "ncl" @@ -4550,7 +4553,7 @@ def __init__(self, filename="WAVECAR", verbose=False, precision="normal", vasp_t if verbose: print(f"\ndetermined {self.vasp_type = }\n") else: - self.Gpoints[ink], extra_gpoints, extra_coeff_inds = self._generate_G_points( + self.Gpoints[ink], extra_gpoints, extra_coeff_inds = self._generate_G_points( # type: ignore[call-overload] kpoint, gamma=self.vasp_type.lower()[0] == "g" ) @@ -4706,7 +4709,7 @@ def evaluate_wavefunc(self, kpoint: int, band: int, r: np.ndarray, spin: int = 0 v = self.Gpoints[kpoint] + self.kpoints[kpoint] u = np.dot(np.dot(v, self.b), r) if self.vasp_type.lower()[0] == "n": - c = self.coeffs[kpoint][band][spinor, :] + c = self.coeffs[kpoint][band][spinor, :] # type: ignore[call-overload] elif self.spin == 2: c = self.coeffs[spin][kpoint][band] else: @@ -4739,14 +4742,14 @@ def fft_mesh(self, kpoint: int, band: int, spin: int = 0, spinor: int = 0, shift a numpy ndarray representing the 3D mesh of coefficients """ if self.vasp_type.lower()[0] == "n": - tcoeffs = self.coeffs[kpoint][band][spinor, :] + tcoeffs = self.coeffs[kpoint][band][spinor, :] # type: ignore[call-overload] elif self.spin == 2: tcoeffs = self.coeffs[spin][kpoint][band] else: tcoeffs = self.coeffs[kpoint][band] mesh = np.zeros(tuple(self.ng), dtype=np.complex128) - for gp, coeff in zip(self.Gpoints[kpoint], tcoeffs): + for gp, coeff in zip(self.Gpoints[kpoint], tcoeffs): # type: ignore[call-overload] t = tuple(gp.astype(int) + (self.ng / 2).astype(int)) mesh[t] = coeff @@ -4896,7 +4899,7 @@ class Eigenval: to be converted into proper objects. The kpoint index is 0-based (unlike the 1-based indexing in VASP). """ - def __init__(self, filename, occu_tol=1e-8, separate_spins=False): + def __init__(self, filename, occu_tol=1e-8, separate_spins=False) -> None: """ Reads input from filename to construct Eigenval object. diff --git a/pymatgen/io/vasp/sets.py b/pymatgen/io/vasp/sets.py index 3f1c4dba468..7dd9fd631d3 100644 --- a/pymatgen/io/vasp/sets.py +++ b/pymatgen/io/vasp/sets.py @@ -1042,10 +1042,10 @@ def from_prev_calc(cls, prev_calc_dir: str, **kwargs) -> Self: input_set = cls(_dummy_structure, **kwargs) return input_set.override_from_prev_calc(prev_calc_dir=prev_calc_dir) - def __str__(self): + def __str__(self) -> str: return type(self).__name__ - def __repr__(self): + def __repr__(self) -> str: return type(self).__name__ def write_input( @@ -2272,7 +2272,7 @@ class MITNEBSet(DictSet): Note that EDIFF is not on a per atom basis for this input set. """ - def __init__(self, structures, unset_encut=False, **kwargs): + def __init__(self, structures, unset_encut=False, **kwargs) -> None: """ Args: structures: List of Structure objects. diff --git a/pymatgen/io/xtb/outputs.py b/pymatgen/io/xtb/outputs.py index 1a6132f496b..2df33550c4f 100644 --- a/pymatgen/io/xtb/outputs.py +++ b/pymatgen/io/xtb/outputs.py @@ -36,8 +36,8 @@ def __init__(self, output_filename, path="."): self.path = path self.filename = output_filename - self.cmd_options = {} - self.sorted_structures_energies = [] + self.cmd_options: dict = {} + self.sorted_structures_energies: list = [] self.properly_terminated = False self._parse_crest_output() diff --git a/pymatgen/transformations/advanced_transformations.py b/pymatgen/transformations/advanced_transformations.py index 338387283c3..49e32d640c5 100644 --- a/pymatgen/transformations/advanced_transformations.py +++ b/pymatgen/transformations/advanced_transformations.py @@ -1378,7 +1378,7 @@ def __init__( rotation_angle, expand_times=4, vacuum_thickness=0.0, - ab_shift=None, + ab_shift: tuple[float, float] | None = None, normal=False, ratio=True, plane=None, @@ -1404,7 +1404,7 @@ def __init__( cell do not interact with each other. Default set to 4. vacuum_thickness (float): The thickness of vacuum that you want to insert between two grains of the GB. Default to 0. - ab_shift (list of float, in unit of a, b vectors of Gb): in plane shift of two grains + ab_shift (tuple[float, float]): in plane shift of two grains in unit of a, b vectors of Gb normal (logic): determine if need to require the c axis of top grain (first transformation matrix) perpendicular to the surface or not. @@ -1447,7 +1447,7 @@ def __init__( self.rotation_angle = rotation_angle self.expand_times = expand_times self.vacuum_thickness = vacuum_thickness - self.ab_shift = ab_shift or [0, 0] + self.ab_shift = ab_shift or (0, 0) self.normal = normal self.ratio = ratio self.plane = plane @@ -1468,6 +1468,7 @@ def apply_transformation(self, structure: Structure): Grain boundary Structures. """ gbg = GrainBoundaryGenerator(structure) + return gbg.gb_from_parameters( self.rotation_axis, self.rotation_angle, diff --git a/pymatgen/transformations/standard_transformations.py b/pymatgen/transformations/standard_transformations.py index 00232e3c99c..40f56c4beb1 100644 --- a/pymatgen/transformations/standard_transformations.py +++ b/pymatgen/transformations/standard_transformations.py @@ -509,7 +509,7 @@ def __init__(self, algo=ALGO_FAST, symmetrized_structures=False, no_oxi_states=F ordering. """ self.algo = algo - self._all_structures = [] + self._all_structures: list = [] self.no_oxi_states = no_oxi_states self.symmetrized_structures = symmetrized_structures diff --git a/pymatgen/vis/plotters.py b/pymatgen/vis/plotters.py index cae4d50e88c..026d55f9663 100644 --- a/pymatgen/vis/plotters.py +++ b/pymatgen/vis/plotters.py @@ -49,8 +49,8 @@ def __init__(self, xshift=0.0, yshift=0.0, stack=False, color_cycle=("qualitativ mod = importlib.import_module(f"palettable.colorbrewer.{color_cycle[0]}") self.colors_cycle = getattr(mod, color_cycle[1]).mpl_colors - self.colors = [] - self._spectra = {} + self.colors: list = [] + self._spectra: dict = {} def add_spectrum(self, label, spectrum, color=None): """ diff --git a/pymatgen/vis/structure_vtk.py b/pymatgen/vis/structure_vtk.py index cfad254aae4..d9111b00522 100644 --- a/pymatgen/vis/structure_vtk.py +++ b/pymatgen/vis/structure_vtk.py @@ -90,7 +90,7 @@ def __init__( self.title = "Structure Visualizer" self.iren = vtk.vtkRenderWindowInteractor() self.iren.SetRenderWindow(self.ren_win) - self.mapper_map = {} + self.mapper_map: dict = {} self.structure = None if element_color_mapping: diff --git a/tasks.py b/tasks.py index 7cf78730856..f9c9f99f9e9 100644 --- a/tasks.py +++ b/tasks.py @@ -177,7 +177,9 @@ def update_changelog(ctx: Context, version: str | None = None, dry_run: bool = F if re_match and "materialsproject/dependabot/pip" not in line: pr_number = re_match.group(1) contributor, pr_name = re_match.group(2).split("/", 1) - response = requests.get(f"https://api.github.com/repos/materialsproject/pymatgen/pulls/{pr_number}") + response = requests.get( + f"https://api.github.com/repos/materialsproject/pymatgen/pulls/{pr_number}", timeout=60 + ) lines += [f"* PR #{pr_number} from @{contributor} {pr_name}"] json_resp = response.json() if body := json_resp["body"]: diff --git a/tests/apps/battery/test_plotter.py b/tests/apps/battery/test_plotter.py index 5d5764e10a9..8b93b20f8bf 100644 --- a/tests/apps/battery/test_plotter.py +++ b/tests/apps/battery/test_plotter.py @@ -40,6 +40,7 @@ def test_plotly(self): plotter.add_electrode(self.ce_FF, "FeF3 conversion") fig = plotter.get_plotly_figure() assert fig.layout.xaxis.title.text == "Atomic Fraction of Li" + plotter = VoltageProfilePlotter(xaxis="x_form") plotter.add_electrode(self.ce_FF, "FeF3 conversion") fig = plotter.get_plotly_figure() diff --git a/tests/ext/test_cod.py b/tests/ext/test_cod.py index de925483794..6616205909e 100644 --- a/tests/ext/test_cod.py +++ b/tests/ext/test_cod.py @@ -9,7 +9,7 @@ from pymatgen.ext.cod import COD try: - website_down = requests.get("https://www.crystallography.net").status_code != 200 + website_down = requests.get("https://www.crystallography.net", timeout=60).status_code != 200 except requests.exceptions.ConnectionError: website_down = True diff --git a/tests/ext/test_matproj.py b/tests/ext/test_matproj.py index a44fda5c37b..2d4961518c9 100644 --- a/tests/ext/test_matproj.py +++ b/tests/ext/test_matproj.py @@ -26,7 +26,7 @@ from pymatgen.util.testing import TEST_FILES_DIR, PymatgenTest try: - skip_mprester_tests = requests.get("https://materialsproject.org").status_code != 200 + skip_mprester_tests = requests.get("https://materialsproject.org", timeout=60).status_code != 200 except (ModuleNotFoundError, ImportError, requests.exceptions.ConnectionError): # Skip all MPRester tests if some downstream problem on the website, mp-api or whatever. @@ -76,7 +76,7 @@ def test_get_data(self): "total_magnetization", } mp_id = "mp-1143" - vals = requests.get(f"http://legacy.materialsproject.org/materials/{mp_id}/json/") + vals = requests.get(f"http://legacy.materialsproject.org/materials/{mp_id}/json/", timeout=60) expected_vals = vals.json() for prop in props: @@ -570,7 +570,7 @@ def test_get_all_materials_ids_doc(self): # "total_magnetization", # } # mp_id = "mp-1143" - # vals = requests.get(f"http://legacy.materialsproject.org/materials/{mp_id}/json/") + # vals = requests.get(f"http://legacy.materialsproject.org/materials/{mp_id}/json/", timeout=60) # expected_vals = vals.json() # # for prop in props: diff --git a/tests/ext/test_optimade.py b/tests/ext/test_optimade.py index 9e09625a2b8..564694b48f0 100644 --- a/tests/ext/test_optimade.py +++ b/tests/ext/test_optimade.py @@ -8,22 +8,30 @@ try: # 403 is returned when server detects bot-like behavior - website_down = requests.get(OptimadeRester.aliases["mp"]).status_code not in (200, 403) + website_down = requests.get(OptimadeRester.aliases["mp"], timeout=60).status_code not in (200, 403) except requests.exceptions.ConnectionError: website_down = True try: - optimade_providers_down = requests.get("https://providers.optimade.org").status_code not in (200, 403) + optimade_providers_down = requests.get("https://providers.optimade.org", timeout=60).status_code not in (200, 403) except requests.exceptions.ConnectionError: optimade_providers_down = True try: - mc3d_down = requests.get(OptimadeRester.aliases["mcloud.mc3d"] + "/v1/info").status_code not in (200, 403, 301) + mc3d_down = requests.get(OptimadeRester.aliases["mcloud.mc3d"] + "/v1/info", timeout=60).status_code not in ( + 200, + 403, + 301, + ) except requests.exceptions.ConnectionError: mc3d_down = True try: - mc2d_down = requests.get(OptimadeRester.aliases["mcloud.mc2d"] + "/v1/info").status_code not in (200, 403, 301) + mc2d_down = requests.get(OptimadeRester.aliases["mcloud.mc2d"] + "/v1/info", timeout=60).status_code not in ( + 200, + 403, + 301, + ) except requests.exceptions.ConnectionError: mc2d_down = True