From 40100e97f25a071f9d74cee7e28b8fa3c6dd52d0 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel) YANG" Date: Thu, 14 Nov 2024 01:48:17 +0800 Subject: [PATCH] Improve element mismatch handling with POTCAR for `Poscar.from_file/str` (#4143) * insert warning for element mismatch * try value error and see if there's any breakage * fix typo in comment * revise message to cover cases where default_name is given as arg * NEED DISCUSSION: re-raise ValueError * add todo tag * clean up POTCAR element check * recover code logic * add comment * fix mismatch in unit test * improve glob logic * add some test, pmg test cannot be parametrized * fix warning and exception logic * remove dummy POSCAR * fix test_from_str_default_names * fix test_from_file_potcar_overwrite_elements * suppress many expected warnings * fix condition * clean up comment * add superset * also test elements * clean up overwrite logic for readability * recover vasp5or6_symbols tag * WIP: halfed done logic cleanup * clean up comment * fix test across OS owing to rounding * finish VASP 4 overwrite test * reduce code repetition * enhance test a tiny bit --------- Signed-off-by: Shyue Ping Ong Co-authored-by: Shyue Ping Ong --- src/pymatgen/io/vasp/inputs.py | 52 ++++++++---- tests/io/vasp/test_inputs.py | 142 ++++++++++++++++++++++++++++++--- 2 files changed, 168 insertions(+), 26 deletions(-) diff --git a/src/pymatgen/io/vasp/inputs.py b/src/pymatgen/io/vasp/inputs.py index 1ec54342217..07bff38a14a 100644 --- a/src/pymatgen/io/vasp/inputs.py +++ b/src/pymatgen/io/vasp/inputs.py @@ -275,7 +275,10 @@ def from_file( and (potcars := glob(f"{dirname}/*POTCAR*")) ): try: - potcar = Potcar.from_file(min(potcars)) + # Make sure exact match has highest priority + potcar_path = next((f for f in potcars if f == f"{dirname}/POTCAR"), min(potcars)) + + potcar = Potcar.from_file(potcar_path) names = [sym.split("_")[0] for sym in potcar.symbols] map(get_el_sp, names) # ensure valid names except Exception: @@ -316,7 +319,8 @@ def from_str( Args: data (str): String containing Poscar data. default_names ([str]): Default symbols for the POSCAR file, - usually coming from a POTCAR in the same directory. + usually coming from a POTCAR in the same directory. This + could be used to convert a VASP 4 POSCAR to POSCAR 5/6 format. read_velocities (bool): Whether to read or not velocities if they are present in the POSCAR. Default is True. @@ -332,8 +336,9 @@ def from_str( except IndexError: raise ValueError("Empty POSCAR") - # Parse positions lines: list[str] = list(clean_lines(chunks[0].split("\n"), remove_empty_lines=False)) + + # Parse comment/scale/lattice comment: str = lines[0] scale: float = float(lines[1]) lattice: np.ndarray = np.array([[float(i) for i in line.split()] for line in lines[2:5]]) @@ -345,15 +350,26 @@ def from_str( else: lattice *= scale - vasp5_symbols: bool = False + # "atomic_symbols" is the "fully expanded" list of symbols, + # while "symbols" is the list as shown in POSCAR. + # For example with a POSCAR: + # ... (comment/scale/lattice) + # H O + # 2 1 + # ... + # atomic_symbols is ["H", "H", "O"] + # symbols is ["H", "O"] atomic_symbols: list[str] = [] try: + symbols: list[str] | None = None # would be None for VASP 4 + n_atoms: list[int] = [int(i) for i in lines[5].split()] + vasp5or6_symbols: bool = False # VASP 4 tag ipos: int = 6 except ValueError: - vasp5_symbols = True + vasp5or6_symbols = True # In VASP 6.x.x, part of the POTCAR hash is written to POSCAR-style strings # In VASP 6.4.2 and up, the POTCAR symbol is also written, ex.: @@ -365,7 +381,7 @@ def from_str( # Mg_pv/f474ac0d Si/79d9987ad87``` # whereas older VASP 5.x.x POSCAR strings would just have `Mg Si` on the last line - symbols: list[str] = [symbol.split("/")[0].split("_")[0] for symbol in lines[5].split()] + symbols = [symbol.split("/")[0].split("_")[0] for symbol in lines[5].split()] # Atoms and number of atoms in POSCAR written with VASP appear on # multiple lines when atoms of the same type are not grouped together @@ -415,19 +431,25 @@ def from_str( cart: bool = pos_type[0] in "cCkK" n_sites: int = sum(n_atoms) - # If default_names is specified (usually coming from a POTCAR), use - # them. This is in line with VASP's parsing order that the POTCAR + # If default_names is specified (usually coming from a POTCAR), use them. + # This is in line with VASP's parsing order that the POTCAR # specified is the default used. if default_names is not None: - try: + # Use len(n_atoms) over len(symbols) as symbols is None for VASP 4 format + if len(n_atoms) > len(default_names): + raise ValueError(f"{default_names=} (likely from POTCAR) has fewer elements than POSCAR {symbols}") + + if symbols is None or default_names[: len(symbols)] != symbols: + # After this VASP 4.x POSCAR would be converted to VASP 5/6 format + vasp5or6_symbols = True + atomic_symbols = [] for idx, n_atom in enumerate(n_atoms): atomic_symbols.extend([default_names[idx]] * n_atom) - vasp5_symbols = True - except IndexError: - pass - if not vasp5_symbols: + warnings.warn(f"Elements in POSCAR would be overwritten by {default_names=}", stacklevel=2) + + if not vasp5or6_symbols: ind: Literal[3, 6] = 6 if has_selective_dynamics else 3 try: # Check if names are appended at the end of the coordinates @@ -435,7 +457,7 @@ def from_str( # Ensure symbols are valid elements if not all(Element.is_valid_symbol(sym) for sym in atomic_symbols): raise ValueError("Non-valid symbols detected.") - vasp5_symbols = True + vasp5or6_symbols = True except (ValueError, IndexError): # Defaulting to false names @@ -533,7 +555,7 @@ def from_str( struct, comment, selective_dynamics, - vasp5_symbols, + vasp5or6_symbols, velocities=velocities, predictor_corrector=predictor_corrector, predictor_corrector_preamble=predictor_corrector_preamble, diff --git a/tests/io/vasp/test_inputs.py b/tests/io/vasp/test_inputs.py index 3b415eda9fe..859c987d0ff 100644 --- a/tests/io/vasp/test_inputs.py +++ b/tests/io/vasp/test_inputs.py @@ -5,6 +5,7 @@ import os import pickle import re +import warnings from shutil import copyfile from unittest import TestCase from unittest.mock import patch @@ -39,6 +40,13 @@ ) from pymatgen.util.testing import FAKE_POTCAR_DIR, TEST_FILES_DIR, VASP_IN_DIR, VASP_OUT_DIR, PymatgenTest +# Filter some expected warnings +warnings.filterwarnings( + "ignore", message=r"POTCAR data with symbol .* is not known to pymatgen", category=UnknownPotcarWarning +) +warnings.filterwarnings("ignore", message=r"missing .* POTCAR directory", category=UserWarning) + + # make sure _gen_potcar_summary_stats runs and works with all tests in this file _SUMM_STATS = _gen_potcar_summary_stats(append=False, vasp_psp_dir=str(FAKE_POTCAR_DIR), summary_stats_filename=None) @@ -61,6 +69,9 @@ def _mock_complete_potcar_summary_stats(monkeypatch: pytest.MonkeyPatch) -> None _SUMM_STATS[func] = _SUMM_STATS["LDA_64"].copy() +@pytest.mark.filterwarnings( + "ignore:POTCAR data with symbol .* is not known to pymatgen:pymatgen.io.vasp.inputs.UnknownPotcarWarning" +) class TestPoscar(PymatgenTest): def test_init(self): comp = Structure.from_file(f"{VASP_IN_DIR}/POSCAR").composition @@ -95,8 +106,10 @@ def test_init(self): 0.000000 0.000000 0.000000 0.750000 0.500000 0.750000 """ - poscar = Poscar.from_str(poscar_str) + with pytest.warns(BadPoscarWarning, match="Elements in POSCAR cannot be determined"): + poscar = Poscar.from_str(poscar_str) assert poscar.structure.composition == Composition("HHe") + # VASP 4 style file with default names, i.e. no element symbol found. poscar_str = """Test3 1.0 @@ -175,6 +188,95 @@ def test_from_file(self): ] assert [site.specie.symbol for site in poscar.structure] == ordered_expected_elements + def test_from_file_potcar_overwrite_elements(self): + # Make sure overwriting elements triggers warning + # The following POTCAR has elements as ["Fe", "P", "O"] + copyfile(f"{VASP_IN_DIR}/POSCAR_Fe3O4", tmp_poscar_path := f"{self.tmp_path}/POSCAR") + copyfile(f"{VASP_IN_DIR}/fake_potcars/POTCAR.gz", f"{self.tmp_path}/POTCAR.gz") + + with pytest.warns(UserWarning, match="Elements in POSCAR would be overwritten"): + _poscar = Poscar.from_file(tmp_poscar_path) + + # Make sure ValueError is raised when POTCAR has fewer elements + copyfile(f"{VASP_IN_DIR}/POSCAR_Fe3O4", tmp_poscar_path := f"{self.tmp_path}/POSCAR") + copyfile(f"{VASP_IN_DIR}/POTCAR_C2.gz", f"{self.tmp_path}/POTCAR.gz") + + with pytest.raises(ValueError, match="has fewer elements than POSCAR"): + _poscar = Poscar.from_file(tmp_poscar_path) + + # Make sure no warning/error is triggered for compatible elements + copyfile(f"{VASP_IN_DIR}/POSCAR_C2", tmp_poscar_path := f"{self.tmp_path}/POSCAR") + copyfile(f"{VASP_IN_DIR}/POTCAR_C2.gz", f"{self.tmp_path}/POTCAR.gz") + + with warnings.catch_warnings(record=True) as record: + _poscar = Poscar.from_file(tmp_poscar_path) + assert not any("Elements in POSCAR would be overwritten" in str(warning.message) for warning in record) + + def test_from_str_default_names(self): + """Similar to test_from_file_bad_potcar, ensure "default_names" + (likely read from POTCAR) triggers correct warning/error. + """ + poscar_str = """bad default names +1.1 +3.840198 0.000000 0.000000 +1.920099 3.325710 0.000000 +0.000000 -2.217138 3.135509 +Si F +1 1 +cart +0.000000 0.00000000 0.00000000 +3.840198 1.50000000 2.35163175 +""" + # ValueError if default_names shorter than POSCAR + with pytest.raises(ValueError, match=re.escape("default_names=['Si'] (likely from POTCAR) has fewer elements")): + _poscar = Poscar.from_str(poscar_str, default_names=["Si"]) + + # Warning if overwrite triggered + with pytest.warns(UserWarning, match="Elements in POSCAR would be overwritten"): + poscar = Poscar.from_str(poscar_str, default_names=["Si", "O"]) + assert poscar.site_symbols == ["Si", "O"] + + # Assert no warning if using the same elements (or superset) + with warnings.catch_warnings(record=True) as record: + poscar = Poscar.from_str(poscar_str, default_names=["Si", "F"]) + assert poscar.site_symbols == ["Si", "F"] + + poscar = Poscar.from_str(poscar_str, default_names=["Si", "F", "O"]) + assert poscar.site_symbols == ["Si", "F"] + assert not any("Elements in POSCAR would be overwritten" in str(warning.message) for warning in record) + + # Make sure it could be bypassed (by using None, when not check_for_potcar) + with warnings.catch_warnings(record=True) as record: + _poscar = Poscar.from_str(poscar_str, default_names=None) + assert not any("Elements in POSCAR would be overwritten" in str(warning.message) for warning in record) + + def test_from_str_default_names_vasp4(self): + """Poscar.from_str with default_names given could also be used to + convert a VASP 4 formatted POSCAR to VASP 5/6, by inserting + elements to the 5th (0-indexed) line. + """ + poscar_str_vasp4 = """vasp 4 +1.1 +3.840198 0.000000 0.000000 +1.920099 3.325710 0.000000 +0.000000 -2.217138 3.135509 +1 1 +cart +0.000000 0.00000000 0.00000000 +3.840198 1.50000000 2.35163175 +""" + # Test overwrite warning + with pytest.warns(UserWarning, match="Elements in POSCAR would be overwritten"): + poscar_vasp4 = Poscar.from_str(poscar_str_vasp4, default_names=["H", "He"]) + assert poscar_vasp4.site_symbols == ["H", "He"] + + # Test default names longer than need but should work the same + poscar_vasp4 = Poscar.from_str(poscar_str_vasp4, default_names=["H", "He", "Li"]) + assert poscar_vasp4.site_symbols == ["H", "He"] + + with pytest.raises(ValueError, match=re.escape("default_names=['H'] (likely from POTCAR) has fewer elements")): + _poscar_vasp4 = Poscar.from_str(poscar_str_vasp4, default_names=["H"]) + def test_as_from_dict(self): poscar_str = """Test3 1.0 @@ -419,6 +521,7 @@ def test_write(self): poscar = Poscar.from_file(tmp_file) assert_allclose(poscar.structure.lattice.abc, poscar.structure.lattice.abc, 5) + @pytest.mark.filterwarnings("ignore:Elements in POSCAR would be overwritten") def test_selective_dynamics(self): # Previously, this test relied on the existence of a file named POTCAR # that was sorted to the top of a list of POTCARs for the test to work. @@ -471,9 +574,12 @@ def test_invalid_selective_dynamics(self): 0.000000 0.00000000 0.00000000 Si T T F 3.840198 1.50000000 2.35163175 F T T F """ - with pytest.warns( - BadPoscarWarning, - match="Selective dynamics values must be either 'T' or 'F'.", + with ( + pytest.warns( + BadPoscarWarning, + match="Selective dynamics values must be either 'T' or 'F'.", + ), + pytest.warns(BadPoscarWarning, match="Selective dynamics toggled with Fluorine"), ): Poscar.from_str(invalid_poscar_str) @@ -494,12 +600,15 @@ def test_selective_dynamics_with_fluorine(self): 0.000000 0.00000000 0.00000000 Si T T F 3.840198 1.50000000 2.35163175 F T T F """ - with pytest.warns( - BadPoscarWarning, - match=( - "Selective dynamics toggled with Fluorine element detected. " - "Make sure the 4th-6th entry each position line is selective dynamics info." + with ( + pytest.warns( + BadPoscarWarning, + match=( + "Selective dynamics toggled with Fluorine element detected. " + "Make sure the 4th-6th entry each position line is selective dynamics info." + ), ), + pytest.warns(BadPoscarWarning, match="Selective dynamics values must be either"), ): Poscar.from_str(poscar_str_with_fluorine) @@ -931,9 +1040,11 @@ class TestKpoints: def test_init(self): # Automatic KPOINT grid filepath = f"{VASP_IN_DIR}/KPOINTS_auto" - kpoints = Kpoints.from_file(filepath) + with pytest.warns(DeprecationWarning, match="Please use INCAR KSPACING tag"): + kpoints = Kpoints.from_file(filepath) assert kpoints.comment == "Automatic mesh" assert kpoints.kpts == [(10,)], "Wrong kpoint lattice read" + filepath = f"{VASP_IN_DIR}/KPOINTS_cartesian" kpoints = Kpoints.from_file(filepath) assert kpoints.kpts == [ @@ -1183,6 +1294,9 @@ def test_automatic_monkhorst_vs_gamma_style_selection(self): assert kpoints.style == Kpoints.supported_modes.Gamma +@pytest.mark.filterwarnings( + "ignore:POTCAR data with symbol .* is not known to pymatgen:pymatgen.io.vasp.inputs.UnknownPotcarWarning" +) class TestPotcarSingle(TestCase): def setUp(self): self.psingle_Mn_pv = PotcarSingle.from_file(f"{FAKE_POTCAR_DIR}/POT_GGA_PAW_PBE/POTCAR.Mn_pv.gz") @@ -1420,6 +1534,9 @@ def test_copy(self): assert psingle is not self.psingle_Mn_pv +@pytest.mark.filterwarnings( + "ignore:POTCAR data with symbol .* is not known to pymatgen:pymatgen.io.vasp.inputs.UnknownPotcarWarning" +) class TestPotcar(PymatgenTest): def setUp(self): SETTINGS.setdefault("PMG_VASP_PSP_DIR", str(TEST_FILES_DIR)) @@ -1495,7 +1612,10 @@ def test_pickle(self): pickle.dumps(self.potcar) -@pytest.mark.filterwarnings("ignore:Please use INCAR KSPACING tag") +@pytest.mark.filterwarnings("ignore:Please use INCAR KSPACING tag:DeprecationWarning") +@pytest.mark.filterwarnings( + "ignore:POTCAR data with symbol .* is not known to pymatgen:pymatgen.io.vasp.inputs.UnknownPotcarWarning" +) class TestVaspInput(PymatgenTest): def setUp(self): filepath = f"{VASP_IN_DIR}/INCAR"