Merge branch 'main' into robocrys

materialsproject · Jul 12, 2021 · f651d54 · f651d54
2 parents ac1f9f3 + 8a629ff
commit f651d54
Show file tree

Hide file tree

Showing 18 changed files with 11,241 additions and 86 deletions.
diff --git a/.gitignore b/.gitignore
@@ -106,7 +106,6 @@ ENV/
 
 # PyCharm
 .idea
-<<<<<<< HEAD
 
 # Pytest
 .pytest_cache

diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,5 +1,21 @@
 # Changelog
 
+## [v0.5.0](https://github.com/materialsproject/emmet/tree/v0.5.0) (2021-07-02)
+
+[Full Changelog](https://github.com/materialsproject/emmet/compare/v0.4.1...v0.5.0)
+
+**Merged pull requests:**
+
+- Fix Validation [\#215](https://github.com/materialsproject/emmet/pull/215) ([shyamd](https://github.com/shyamd))
+
+## [v0.4.1](https://github.com/materialsproject/emmet/tree/v0.4.1) (2021-06-27)
+
+[Full Changelog](https://github.com/materialsproject/emmet/compare/v0.4.0...v0.4.1)
+
+**Merged pull requests:**
+
+- Fix oxidation doc composition [\#217](https://github.com/materialsproject/emmet/pull/217) ([shyamd](https://github.com/shyamd))
+
 ## [v0.4.0](https://github.com/materialsproject/emmet/tree/v0.4.0) (2021-06-24)
 
 [Full Changelog](https://github.com/materialsproject/emmet/compare/v0.3.6...v0.4.0)

diff --git a/emmet-builders/emmet/builders/materials/oxidation_states.py b/emmet-builders/emmet/builders/materials/oxidation_states.py
@@ -4,6 +4,7 @@
 from pymatgen.core import __version__ as pymatgen_version
 
 from emmet.core.oxidation_states import OxidationStateDoc
+from emmet.core.utils import jsanitize
 
 
 class OxidationStatesBuilder(MapBuilder):
@@ -37,14 +38,11 @@ def __init__(
 
     def unary_function(self, item):
         structure = Structure.from_dict(item["structure"])
-        oxi_doc = OxidationStateDoc.from_structure(structure)
-        doc = oxi_doc.dict()
-
-        doc.update(
-            {
-                "pymatgen_version": pymatgen_version,
-                "successful": True,
-            }
+        mpid = item["material_id"]
+
+        oxi_doc = OxidationStateDoc.from_structure(
+            structure=structure, material_id=mpid
         )
+        doc = jsanitize(oxi_doc.dict(), allow_bson=True)
 
         return doc
diff --git a/emmet-builders/emmet/builders/materials/provenance.py b/emmet-builders/emmet/builders/materials/provenance.py
@@ -36,7 +36,7 @@ def __init__(
         self.provenance = provenance
         self.source_snls = source_snls
         self.settings = EmmetBuildSettings.autoload(settings)
-        self.query = query
+        self.query = query or {}
         self.kwargs = kwargs
 
         materials.key = "material_id"
@@ -194,7 +194,7 @@ def process_item(self, item) -> List[Dict]:
                 doc.history.append(self.settings.DEFAULT_HISTORY)
                 doc.references.append(self.settings.DEFAULT_REFERENCE)
 
-                snl_docs.append(doc.dict())
+                snl_docs.append(doc.dict(exclude_unset=True))
 
         return snl_docs
 
@@ -211,25 +211,29 @@ def match(self, snls, mat):
         m_strucs = [Structure.from_dict(mat["structure"])] + [
             Structure.from_dict(init_struc) for init_struc in mat["initial_structures"]
         ]
-        snl_strucs = [StructureNL.from_dict(snl) for snl in snls]
+        snl_strucs = []
+        for snl in snls:
+            struc = Structure.from_dict(snl)
+            struc.snl = snl
+            snl_strucs.append(struc)
 
         groups = group_structures(
             m_strucs + snl_strucs,
             ltol=self.settings.LTOL,
             stol=self.settings.STOL,
             angle_tol=self.settings.ANGLE_TOL,
-            comparator=OrderDisorderElementComparator(),
+            # comparator=OrderDisorderElementComparator(),
         )
         matched_groups = [
             group
             for group in groups
-            if any(isinstance(struc, Structure) for struc in group)
+            if any(not hasattr(struc, "snl") for struc in group)
         ]
         snls = [
-            struc
+            struc.snl
             for group in matched_groups
             for struc in group
-            if isinstance(struc, StructureNL)
+            if hasattr(struc, "snl")
         ]
 
         self.logger.debug(f"Found {len(snls)} SNLs for {mat['material_id']}")

diff --git a/emmet-builders/emmet/builders/vasp/materials.py b/emmet-builders/emmet/builders/vasp/materials.py
@@ -176,13 +176,18 @@ def get_items(self) -> Iterator[List[Dict]]:
             "task_id",
             "formula_pretty",
             "output.energy_per_atom",
-            "output.energy",
             "output.structure",
+            "input.parameters",
+            # needed for run_type and task_type
             "calcs_reversed.input.parameters",
+            "calcs_reversed.input.incar",
+            "orig_inputs",
+            # needed for entry from task_doc
+            "output.energy",
             "input.is_hubbard",
             "input.hubbards",
             "input.potcar_spec",
-            "orig_inputs",
+            # misc info for materials doc
             "input.structure",
             "tags",
         ]
@@ -225,6 +230,8 @@ def process_item(self, items: List[Dict]) -> List[Dict]:
                     f"No valid ids found among ids {failed_ids}. This can be the case if the required "
                     "calculation types are missing from your tasks database."
                 )
+                materials.append(MaterialsDoc.construct_deprecated_material(tasks))
+
         self.logger.debug(f"Produced {len(materials)} materials for {formula}")
 
         return jsanitize([mat.dict() for mat in materials], allow_bson=True)

diff --git a/emmet-builders/emmet/builders/vasp/task_validator.py b/emmet-builders/emmet/builders/vasp/task_validator.py
@@ -35,11 +35,14 @@ def __init__(
             target=task_validation,
             projection=[
                 "orig_inputs",
+                "input.hubbards",
                 "output.structure",
                 "output.bandgap",
-                "calcs_reversed.input.parameters",
                 "calcs_reversed.output.ionic_steps.electronic_steps.e_fr_energy",
                 "tags",
+                # Need these two for proper run_type determination
+                "calcs_reversed.input.parameters",
+                "calcs_reversed.input.incar",
             ],
             query=query,
             **kwargs,

diff --git a/emmet-builders/emmet/builders/vasp/thermo.py b/emmet-builders/emmet/builders/vasp/thermo.py
@@ -115,7 +115,9 @@ def get_items(self) -> Iterator[List[Dict]]:
 
         # Yield the chemical systems in order of increasing size
         # Will build them in a similar manner to fast Pourbaix
-        for chemsys in sorted(to_process_chemsys, key=lambda x: len(x.split("-"))):
+        for chemsys in sorted(
+            to_process_chemsys, key=lambda x: len(x.split("-")), reverse=True
+        ):
             entries = self.get_entries(chemsys)
             yield entries
 

diff --git a/emmet-core/emmet/core/provenance.py b/emmet-core/emmet/core/provenance.py
@@ -1,10 +1,12 @@
 """ Core definition of a Provenance Document """
 import warnings
-from datetime import datetime
+from datetime import date, datetime
 from typing import ClassVar, Dict, List, Optional
 
+from monty.json import MontyDecoder
 from pybtex.database import BibliographyData, parse_string
-from pydantic import BaseModel, EmailStr, Field, validator
+from pybtex.errors import set_strict_mode
+from pydantic import BaseModel, Field, root_validator, validator
 
 from emmet.core.material_property import PropertyDoc
 from emmet.core.mpid import MPID
@@ -27,7 +29,7 @@ class Author(BaseModel):
     """
 
     name: str = Field(None)
-    email: EmailStr = Field(None)
+    email: str = Field(None)
 
 
 class History(BaseModel):
@@ -41,6 +43,12 @@ class History(BaseModel):
         None, description="Dictionary of exra data for this history node"
     )
 
+    @root_validator(pre=True)
+    def str_to_dict(cls, values):
+        if isinstance(values.get("description"), str):
+            values["description"] = {"string": values.get("description")}
+        return values
+
 
 class ProvenanceDoc(PropertyDoc):
     """
@@ -95,33 +103,41 @@ def from_SNLs(
         Converts legacy Pymatgen SNLs into a single provenance document
         """
 
+        assert (
+            len(snls) > 0
+        ), "Error must provide a non-zero list of SNLs to convert from SNLs"
+
+        decoder = MontyDecoder()
         # Choose earliest created_at
         created_at = sorted(
-            [
-                snl.get("about", {}).get("created_at", {}).get("string", datetime.max)
-                for snl in snls
-            ]
+            decoder.process_decoded(
+                [snl.get("about", {}).get("created_at", datetime.max) for snl in snls]
+            )
         )[0]
 
         # Choose earliest history
         history = sorted(
             snls,
-            key=lambda snl: snl.get("about", {})
-            .get("created_at", {})
-            .get("string", datetime.max),
+            key=lambda snl: decoder.process_decoded(
+                snl.get("about", {}).get("created_at", datetime.max)
+            ),
         )[0]["about"]["history"]
 
         # Aggregate all references into one dict to remove duplicates
         refs = {}
         for snl in snls:
             try:
+                set_strict_mode(False)
                 entries = parse_string(snl["about"]["references"], bib_format="bibtex")
                 refs.update(entries.entries)
-            except Exception:
-                warnings.warn(f"Failed parsing bibtex: {snl['about']['references']}")
+            except Exception as e:
+                warnings.warn(
+                    f"Failed parsing bibtex: {snl['about']['references']} due to {e}"
+                )
 
         bib_data = BibliographyData(entries=refs)
-        references = [ref.to_string("bibtex") for ref in bib_data.entries]
+
+        references = [ref.to_string("bibtex") for ref in bib_data.entries.values()]
 
         # TODO: Maybe we should combine this robocrystallographer?
         # TODO: Refine these tags / remarks
@@ -143,11 +159,11 @@ def from_SNLs(
         ]
 
         # Check if this entry is experimental
-        if any(
-            snl.get("about", {}).get("history", [{}])[0].get("experimental", False)
+        experimental = any(
+            history.get("experimental", False)
             for snl in snls
-        ):
-            experimental = True
+            for history in snl.get("about", {}).get("history", [{}])
+        )
 
         # Aggregate all the database IDs
         snl_ids = [snl.get("snl_id", "") for snl in snls]
@@ -160,12 +176,6 @@ def from_SNLs(
         db_ids = {k: list(filter(None, v)) for k, v in db_ids.items()}
         db_ids = {k: v for k, v in db_ids.items() if len(v) > 0}
 
-        # Get experimental bool
-        experimental = any(
-            snl.get("about", {}).get("history", [{}])[0].get("experimental", False)
-            for snl in snls
-        )
-
         snl_fields = {
             "created_at": created_at,
             "references": references,

diff --git a/emmet-core/emmet/core/settings.py b/emmet-core/emmet/core/settings.py
@@ -66,6 +66,8 @@ class EmmetSettings(BaseSettings):
         {
             "GGA Structure Optimization": "pymatgen.io.vasp.sets.MPRelaxSet",
             "GGA+U Structure Optimization": "pymatgen.io.vasp.sets.MPRelaxSet",
+            "GGA Static": "pymatgen.io.vasp.sets.MPStaticSet",
+            "GGA+U Static": "pymatgen.io.vasp.sets.MPStaticSet",
         },
         description="Default input sets for task validation",
     )

diff --git a/emmet-core/emmet/core/structure.py b/emmet-core/emmet/core/structure.py
@@ -89,13 +89,15 @@ def from_composition(
             if fields is None
             else fields
         )
+        composition = composition.remove_charges()
+
         elsyms = sorted(set([e.symbol for e in composition.elements]))
 
         data = {
             "elements": elsyms,
             "nelements": len(elsyms),
             "composition": composition,
-            "composition_reduced": composition.reduced_composition,
+            "composition_reduced": composition.reduced_composition.remove_charges(),
             "formula_pretty": composition.reduced_formula,
             "formula_anonymous": composition.anonymized_formula,
             "chemsys": "-".join(elsyms),
@@ -130,7 +132,7 @@ def from_structure(
             if fields is None
             else fields
         )
-        comp = structure.composition
+        comp = structure.composition.remove_charges()
         elsyms = sorted(set([e.symbol for e in comp.elements]))
         symmetry = SymmetryData.from_structure(structure)
 

diff --git a/emmet-core/emmet/core/utils.py b/emmet-core/emmet/core/utils.py
@@ -1,7 +1,7 @@
 import datetime
 from enum import Enum
 from itertools import groupby
-from typing import Iterator, List
+from typing import Any, Iterator, List
 
 import bson
 import numpy as np
@@ -139,6 +139,17 @@ class ValueEnum(Enum):
     def __str__(self):
         return str(self.value)
 
+    def __eq__(self, o: object) -> bool:
+        """Special Equals to enable converting strings back to the enum"""
+        if isinstance(o, str):
+            return super().__eq__(self.__class__(o))
+        elif isinstance(o, self.__class__):
+            return super().__eq__(o)
+        return False
+
+    def __hash__(self) -> Any:
+        return super().__hash__()
+
 
 class DocEnum(ValueEnum):
     """