Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace check for potcar hash with check for potcar summary stats #966

Merged
merged 15 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions emmet-builders/emmet/builders/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from itertools import chain, combinations
from pymatgen.core import Structure
from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
from pymatgen.io.vasp.inputs import PotcarSingle

from emmet.builders.settings import EmmetBuildSettings


def maximal_spanning_non_intersecting_subsets(sets) -> Set[Set]:
Expand Down Expand Up @@ -211,3 +214,31 @@ def __enter__(self):
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout.close()
sys.stdout = self._original_stdout


def get_potcar_stats():
default_settings = EmmetBuildSettings()

stats: dict[str, dict] = {} # type: ignore

for (
calc_type,
input_set,
) in default_settings.VASP_DEFAULT_INPUT_SETS.items():
_input = input_set()

stats[calc_type] = {}
functional = _input._config_dict["POTCAR_FUNCTIONAL"]

for potcar_symbol in _input.CONFIG["POTCAR"].values():
potcar = PotcarSingle.from_symbol_and_functional(
symbol=potcar_symbol, functional=functional
)
summary_stats = potcar._summary_stats.copy()
# fallback method for validation - use header hash and symbol
# note that the potcar_spec assigns PotcarSingle.symbol to "titel"
summary_stats["titel"] = potcar.TITEL
summary_stats["hash"] = potcar.md5_header_hash
stats[calc_type].update({potcar_symbol: summary_stats})

return stats
35 changes: 10 additions & 25 deletions emmet-builders/emmet/builders/vasp/task_validator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Dict, Optional
from collections import defaultdict

from maggma.builders import MapBuilder
from maggma.core import Store

from emmet.builders.settings import EmmetBuildSettings
from emmet.core.vasp.task_valid import TaskDocument
from emmet.builders.utils import get_potcar_stats
from emmet.core.vasp.calc_types.enums import CalcType
from emmet.core.vasp.task_valid import TaskDocument
from emmet.core.vasp.validation import DeprecationMessage, ValidationDoc


Expand All @@ -15,7 +15,7 @@ def __init__(
self,
tasks: Store,
task_validation: Store,
potcar_hashes: Optional[Dict[CalcType, Dict[str, str]]] = None,
potcar_stats: Optional[Dict[CalcType, Dict[str, str]]] = None,
settings: Optional[EmmetBuildSettings] = None,
query: Optional[Dict] = None,
**kwargs,
Expand All @@ -26,37 +26,22 @@ def __init__(
Args:
tasks: Store of task documents
task_validation: Store of task_types for tasks
potcar_hashes: Optional dictionary of potcar hash data.
potcar_stats: Optional dictionary of potcar hash data.
Mapping is calculation type -> potcar symbol -> hash value.
"""
self.tasks = tasks
self.task_validation = task_validation
self.settings = EmmetBuildSettings.autoload(settings)
self.query = query
self.kwargs = kwargs
self.potcar_hashes = potcar_hashes
self.potcar_stats = potcar_stats

# Set up potcar cache if appropriate
if self.settings.VASP_VALIDATE_POTCAR_HASHES:
if not self.potcar_hashes:
from pymatgen.io.vasp.inputs import PotcarSingle

hashes = defaultdict(dict) # type: dict

for (
calc_type,
input_set,
) in self.settings.VASP_DEFAULT_INPUT_SETS.items():
functional = input_set.CONFIG["POTCAR_FUNCTIONAL"]
for potcar_symbol in input_set.CONFIG["POTCAR"].values():
potcar = PotcarSingle.from_symbol_and_functional(
symbol=potcar_symbol, functional=functional
)
hashes[calc_type][potcar_symbol] = potcar._summary_stats

self.potcar_hashes = potcar_hashes
if self.settings.VASP_VALIDATE_POTCAR_STATS:
if not self.potcar_stats:
self.potcar_stats = get_potcar_stats()
else:
self.potcar_hashes = None
self.potcar_stats = None

super().__init__(
source=tasks,
Expand Down Expand Up @@ -88,7 +73,7 @@ def unary_function(self, item):
input_sets=self.settings.VASP_DEFAULT_INPUT_SETS,
LDAU_fields=self.settings.VASP_CHECKED_LDAU_FIELDS,
max_allowed_scf_gradient=self.settings.VASP_MAX_SCF_GRADIENT,
potcar_hashes=self.potcar_hashes,
potcar_stats=self.potcar_stats,
)

bad_tags = list(set(task_doc.tags).intersection(self.settings.DEPRECATED_TAGS))
Expand Down
2 changes: 1 addition & 1 deletion emmet-builders/tests/test_materials.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def tasks_store(test_dir):

@pytest.fixture(scope="session")
def validation_store(tasks_store):
settings = EmmetBuildSettings(VASP_VALIDATE_POTCAR_HASHES=False)
settings = EmmetBuildSettings(VASP_VALIDATE_POTCAR_STATS=False)
validation_store = MemoryStore()
builder = TaskValidator(
tasks=tasks_store, task_validation=validation_store, settings=settings
Expand Down
27 changes: 27 additions & 0 deletions emmet-builders/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
chemsys_permutations,
maximal_spanning_non_intersecting_subsets,
get_hop_cutoff,
get_potcar_stats,
)
from pymatgen.analysis.diffusion.neb.full_path_mapper import MigrationGraph
from numpy.testing import assert_almost_equal
from monty.serialization import loadfn
from emmet.core.settings import EmmetSettings


def test_maximal_spanning_non_intersecting_subsets():
Expand Down Expand Up @@ -55,3 +57,28 @@ def test_get_hop_cutoff(test_dir):
check_mg = MigrationGraph.with_distance(nasicon_mg.structure, "Mg", d)
assert_almost_equal(d, 4.59, decimal=2)
assert len(check_mg.unique_hops) == 6


def test_get_potcar_stats():
calc_type = EmmetSettings().VASP_DEFAULT_INPUT_SETS

try:
potcar_stats = get_potcar_stats()
except Exception as exc:
if "No POTCAR for" in str(exc):
# No Potcar library available, skip test
return
else:
raise exc

# ensure that all calc types are included in potcar_stats
assert potcar_stats.keys() == calc_type.keys()

for calc_type in potcar_stats:
# ensure that each entry has needed fields for both
# legacy and modern potcar validation
assert all(
set(potcar_stats[calc_type][symb])
== set(["hash", "keywords", "titel", "stats"])
for symb in potcar_stats[calc_type]
)
2 changes: 1 addition & 1 deletion emmet-builders/tests/test_vasp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def validation_store():


def test_validator(tasks_store, validation_store):
settings = EmmetBuildSettings(VASP_VALIDATE_POTCAR_HASHES=False)
settings = EmmetBuildSettings(VASP_VALIDATE_POTCAR_STATS=False)
builder = TaskValidator(
tasks=tasks_store, task_validation=validation_store, settings=settings
)
Expand Down
4 changes: 2 additions & 2 deletions emmet-core/emmet/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ class EmmetSettings(BaseSettings):
description="Default input sets for task validation",
)

VASP_VALIDATE_POTCAR_HASHES: bool = Field(
True, description="Whether to validate POTCAR hash values."
VASP_VALIDATE_POTCAR_STATS: bool = Field(
True, description="Whether to validate POTCAR stat values."
)

VASP_CHECKED_LDAU_FIELDS: List[str] = Field(
Expand Down
71 changes: 50 additions & 21 deletions emmet-core/emmet/core/vasp/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@

class DeprecationMessage(DocEnum):
MANUAL = "M", "Manual deprecation"
SYMMETRY = (
"S001",
"Could not determine crystalline space group, needed for input set check.",
)
KPTS = "C001", "Too few KPoints"
KSPACING = "C002", "KSpacing not high enough"
ENCUT = "C002", "ENCUT too low"
Expand Down Expand Up @@ -66,7 +70,7 @@ def from_task_doc(
input_sets: Dict[str, ImportString] = SETTINGS.VASP_DEFAULT_INPUT_SETS,
LDAU_fields: List[str] = SETTINGS.VASP_CHECKED_LDAU_FIELDS,
max_allowed_scf_gradient: float = SETTINGS.VASP_MAX_SCF_GRADIENT,
potcar_hashes: Optional[Dict[CalcType, Dict[str, str]]] = None,
potcar_stats: Optional[Dict[CalcType, Dict[str, str]]] = None,
) -> "ValidationDoc":
"""
Determines if a calculation is valid based on expected input parameters from a pymatgen inputset
Expand All @@ -80,7 +84,7 @@ def from_task_doc(
LDAU_fields: LDAU fields to check for consistency
max_allowed_scf_gradient: maximum uphill gradient allowed for SCF steps after the
initial equillibriation period
potcar_hashes: Dictionary of potcar hash data. Mapping is calculation type -> potcar symbol -> hash value.
potcar_stats: Dictionary of potcar stat data. Mapping is calculation type -> potcar symbol -> hash value.
"""

bandgap = task_doc.output.bandgap
Expand Down Expand Up @@ -110,10 +114,19 @@ def from_task_doc(
reasons.append(DeprecationMessage.SET)
valid_input_set = None

try:
# Sometimes spglib can't determine space group with the default
# `symprec` and `angle_tolerance`. In these cases,
# `Structure.get_space_group_info()` fails
valid_input_set.structure.get_space_group_info()
except Exception:
reasons.append(DeprecationMessage.SYMMETRY)
valid_input_set = None

if valid_input_set:
# Checking POTCAR summary_stats if a directory is supplied
if potcar_hashes:
if _potcar_hash_check(task_doc, potcar_hashes):
if potcar_stats:
if _potcar_stats_check(task_doc, potcar_stats):
if task_type in [
TaskType.NSCF_Line,
TaskType.NSCF_Uniform,
Expand All @@ -130,6 +143,7 @@ def from_task_doc(
if task_type != task_type.NSCF_Line:
# Not validating k-point data for line-mode calculations as constructing
# the k-path is too costly for the builder and the uniform input set is used.

if valid_input_set.kpoints is not None:
if _kpoint_check(
valid_input_set,
Expand Down Expand Up @@ -311,7 +325,7 @@ def _kspacing_warnings(input_set, inputs, data, warnings, kspacing_tolerance):
)


def _potcar_hash_check(task_doc, potcar_hashes):
def _potcar_stats_check(task_doc, potcar_stats: dict):
"""
Checks to make sure the POTCAR summary stats is equal to the correct
value from the pymatgen input set.
Expand All @@ -325,32 +339,47 @@ def _potcar_hash_check(task_doc, potcar_hashes):
# Assume it is an old calculation without potcar_spec data and treat it as passing POTCAR hash check
return False

use_legacy_hash_check = False
if any(len(entry.get("summary_stats", {})) == 0 for entry in potcar_details):
# potcar_spec doesn't include summary_stats kwarg needed to check potcars
# fall back to header hash checking
use_legacy_hash_check = True

all_match = True
for entry in potcar_details:
symbol = entry["titel"].split(" ")[1]
ref_summ_stats = potcar_hashes[str(task_doc.calc_type)].get(symbol, None)
ref_summ_stats = potcar_stats[str(task_doc.calc_type)].get(symbol, None)

if not ref_summ_stats:
# Symbol differs from reference set - deprecate
all_match = False
break

key_match = all(
set(ref_summ_stats["keywords"][key])
== set(entry["summary_stats"]["keywords"][key])
for key in ["header", "data"]
)
if use_legacy_hash_check:
all_match = all(
entry[key] == ref_summ_stats[key]
for key in (
"hash",
"titel",
)
)

data_match = all(
abs(
ref_summ_stats["stats"][key][stat]
- entry["summary_stats"]["stats"][key][stat]
else:
all_match = all(
set(ref_summ_stats["keywords"][key])
== set(entry["summary_stats"]["keywords"][key])
for key in ["header", "data"]
) and all(
abs(
ref_summ_stats["stats"][key][stat]
- entry["summary_stats"]["stats"][key][stat]
)
< data_tol
for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"]
for key in ["header", "data"]
)
< data_tol
for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"]
for key in ["header", "data"]
)

if (not key_match) or (not data_match):
all_match = False
if not all_match:
break

return not all_match
Expand Down
Loading
Loading