Skip to content

Commit

Permalink
Merge branch 'main' into robocrys
Browse files Browse the repository at this point in the history
  • Loading branch information
shyamd committed Jul 12, 2021
2 parents ac1f9f3 + 8a629ff commit f651d54
Show file tree
Hide file tree
Showing 18 changed files with 11,241 additions and 86 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ ENV/

# PyCharm
.idea
<<<<<<< HEAD

# Pytest
.pytest_cache
Expand Down
16 changes: 16 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
# Changelog

## [v0.5.0](https://github.com/materialsproject/emmet/tree/v0.5.0) (2021-07-02)

[Full Changelog](https://github.com/materialsproject/emmet/compare/v0.4.1...v0.5.0)

**Merged pull requests:**

- Fix Validation [\#215](https://github.com/materialsproject/emmet/pull/215) ([shyamd](https://github.com/shyamd))

## [v0.4.1](https://github.com/materialsproject/emmet/tree/v0.4.1) (2021-06-27)

[Full Changelog](https://github.com/materialsproject/emmet/compare/v0.4.0...v0.4.1)

**Merged pull requests:**

- Fix oxidation doc composition [\#217](https://github.com/materialsproject/emmet/pull/217) ([shyamd](https://github.com/shyamd))

## [v0.4.0](https://github.com/materialsproject/emmet/tree/v0.4.0) (2021-06-24)

[Full Changelog](https://github.com/materialsproject/emmet/compare/v0.3.6...v0.4.0)
Expand Down
14 changes: 6 additions & 8 deletions emmet-builders/emmet/builders/materials/oxidation_states.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pymatgen.core import __version__ as pymatgen_version

from emmet.core.oxidation_states import OxidationStateDoc
from emmet.core.utils import jsanitize


class OxidationStatesBuilder(MapBuilder):
Expand Down Expand Up @@ -37,14 +38,11 @@ def __init__(

def unary_function(self, item):
structure = Structure.from_dict(item["structure"])
oxi_doc = OxidationStateDoc.from_structure(structure)
doc = oxi_doc.dict()

doc.update(
{
"pymatgen_version": pymatgen_version,
"successful": True,
}
mpid = item["material_id"]

oxi_doc = OxidationStateDoc.from_structure(
structure=structure, material_id=mpid
)
doc = jsanitize(oxi_doc.dict(), allow_bson=True)

return doc
18 changes: 11 additions & 7 deletions emmet-builders/emmet/builders/materials/provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(
self.provenance = provenance
self.source_snls = source_snls
self.settings = EmmetBuildSettings.autoload(settings)
self.query = query
self.query = query or {}
self.kwargs = kwargs

materials.key = "material_id"
Expand Down Expand Up @@ -194,7 +194,7 @@ def process_item(self, item) -> List[Dict]:
doc.history.append(self.settings.DEFAULT_HISTORY)
doc.references.append(self.settings.DEFAULT_REFERENCE)

snl_docs.append(doc.dict())
snl_docs.append(doc.dict(exclude_unset=True))

return snl_docs

Expand All @@ -211,25 +211,29 @@ def match(self, snls, mat):
m_strucs = [Structure.from_dict(mat["structure"])] + [
Structure.from_dict(init_struc) for init_struc in mat["initial_structures"]
]
snl_strucs = [StructureNL.from_dict(snl) for snl in snls]
snl_strucs = []
for snl in snls:
struc = Structure.from_dict(snl)
struc.snl = snl
snl_strucs.append(struc)

groups = group_structures(
m_strucs + snl_strucs,
ltol=self.settings.LTOL,
stol=self.settings.STOL,
angle_tol=self.settings.ANGLE_TOL,
comparator=OrderDisorderElementComparator(),
# comparator=OrderDisorderElementComparator(),
)
matched_groups = [
group
for group in groups
if any(isinstance(struc, Structure) for struc in group)
if any(not hasattr(struc, "snl") for struc in group)
]
snls = [
struc
struc.snl
for group in matched_groups
for struc in group
if isinstance(struc, StructureNL)
if hasattr(struc, "snl")
]

self.logger.debug(f"Found {len(snls)} SNLs for {mat['material_id']}")
Expand Down
11 changes: 9 additions & 2 deletions emmet-builders/emmet/builders/vasp/materials.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,18 @@ def get_items(self) -> Iterator[List[Dict]]:
"task_id",
"formula_pretty",
"output.energy_per_atom",
"output.energy",
"output.structure",
"input.parameters",
# needed for run_type and task_type
"calcs_reversed.input.parameters",
"calcs_reversed.input.incar",
"orig_inputs",
# needed for entry from task_doc
"output.energy",
"input.is_hubbard",
"input.hubbards",
"input.potcar_spec",
"orig_inputs",
# misc info for materials doc
"input.structure",
"tags",
]
Expand Down Expand Up @@ -225,6 +230,8 @@ def process_item(self, items: List[Dict]) -> List[Dict]:
f"No valid ids found among ids {failed_ids}. This can be the case if the required "
"calculation types are missing from your tasks database."
)
materials.append(MaterialsDoc.construct_deprecated_material(tasks))

self.logger.debug(f"Produced {len(materials)} materials for {formula}")

return jsanitize([mat.dict() for mat in materials], allow_bson=True)
Expand Down
5 changes: 4 additions & 1 deletion emmet-builders/emmet/builders/vasp/task_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,14 @@ def __init__(
target=task_validation,
projection=[
"orig_inputs",
"input.hubbards",
"output.structure",
"output.bandgap",
"calcs_reversed.input.parameters",
"calcs_reversed.output.ionic_steps.electronic_steps.e_fr_energy",
"tags",
# Need these two for proper run_type determination
"calcs_reversed.input.parameters",
"calcs_reversed.input.incar",
],
query=query,
**kwargs,
Expand Down
4 changes: 3 additions & 1 deletion emmet-builders/emmet/builders/vasp/thermo.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,9 @@ def get_items(self) -> Iterator[List[Dict]]:

# Yield the chemical systems in order of increasing size
# Will build them in a similar manner to fast Pourbaix
for chemsys in sorted(to_process_chemsys, key=lambda x: len(x.split("-"))):
for chemsys in sorted(
to_process_chemsys, key=lambda x: len(x.split("-")), reverse=True
):
entries = self.get_entries(chemsys)
yield entries

Expand Down
56 changes: 33 additions & 23 deletions emmet-core/emmet/core/provenance.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
""" Core definition of a Provenance Document """
import warnings
from datetime import datetime
from datetime import date, datetime
from typing import ClassVar, Dict, List, Optional

from monty.json import MontyDecoder
from pybtex.database import BibliographyData, parse_string
from pydantic import BaseModel, EmailStr, Field, validator
from pybtex.errors import set_strict_mode
from pydantic import BaseModel, Field, root_validator, validator

from emmet.core.material_property import PropertyDoc
from emmet.core.mpid import MPID
Expand All @@ -27,7 +29,7 @@ class Author(BaseModel):
"""

name: str = Field(None)
email: EmailStr = Field(None)
email: str = Field(None)


class History(BaseModel):
Expand All @@ -41,6 +43,12 @@ class History(BaseModel):
None, description="Dictionary of exra data for this history node"
)

@root_validator(pre=True)
def str_to_dict(cls, values):
if isinstance(values.get("description"), str):
values["description"] = {"string": values.get("description")}
return values


class ProvenanceDoc(PropertyDoc):
"""
Expand Down Expand Up @@ -95,33 +103,41 @@ def from_SNLs(
Converts legacy Pymatgen SNLs into a single provenance document
"""

assert (
len(snls) > 0
), "Error must provide a non-zero list of SNLs to convert from SNLs"

decoder = MontyDecoder()
# Choose earliest created_at
created_at = sorted(
[
snl.get("about", {}).get("created_at", {}).get("string", datetime.max)
for snl in snls
]
decoder.process_decoded(
[snl.get("about", {}).get("created_at", datetime.max) for snl in snls]
)
)[0]

# Choose earliest history
history = sorted(
snls,
key=lambda snl: snl.get("about", {})
.get("created_at", {})
.get("string", datetime.max),
key=lambda snl: decoder.process_decoded(
snl.get("about", {}).get("created_at", datetime.max)
),
)[0]["about"]["history"]

# Aggregate all references into one dict to remove duplicates
refs = {}
for snl in snls:
try:
set_strict_mode(False)
entries = parse_string(snl["about"]["references"], bib_format="bibtex")
refs.update(entries.entries)
except Exception:
warnings.warn(f"Failed parsing bibtex: {snl['about']['references']}")
except Exception as e:
warnings.warn(
f"Failed parsing bibtex: {snl['about']['references']} due to {e}"
)

bib_data = BibliographyData(entries=refs)
references = [ref.to_string("bibtex") for ref in bib_data.entries]

references = [ref.to_string("bibtex") for ref in bib_data.entries.values()]

# TODO: Maybe we should combine this robocrystallographer?
# TODO: Refine these tags / remarks
Expand All @@ -143,11 +159,11 @@ def from_SNLs(
]

# Check if this entry is experimental
if any(
snl.get("about", {}).get("history", [{}])[0].get("experimental", False)
experimental = any(
history.get("experimental", False)
for snl in snls
):
experimental = True
for history in snl.get("about", {}).get("history", [{}])
)

# Aggregate all the database IDs
snl_ids = [snl.get("snl_id", "") for snl in snls]
Expand All @@ -160,12 +176,6 @@ def from_SNLs(
db_ids = {k: list(filter(None, v)) for k, v in db_ids.items()}
db_ids = {k: v for k, v in db_ids.items() if len(v) > 0}

# Get experimental bool
experimental = any(
snl.get("about", {}).get("history", [{}])[0].get("experimental", False)
for snl in snls
)

snl_fields = {
"created_at": created_at,
"references": references,
Expand Down
2 changes: 2 additions & 0 deletions emmet-core/emmet/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ class EmmetSettings(BaseSettings):
{
"GGA Structure Optimization": "pymatgen.io.vasp.sets.MPRelaxSet",
"GGA+U Structure Optimization": "pymatgen.io.vasp.sets.MPRelaxSet",
"GGA Static": "pymatgen.io.vasp.sets.MPStaticSet",
"GGA+U Static": "pymatgen.io.vasp.sets.MPStaticSet",
},
description="Default input sets for task validation",
)
Expand Down
6 changes: 4 additions & 2 deletions emmet-core/emmet/core/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,15 @@ def from_composition(
if fields is None
else fields
)
composition = composition.remove_charges()

elsyms = sorted(set([e.symbol for e in composition.elements]))

data = {
"elements": elsyms,
"nelements": len(elsyms),
"composition": composition,
"composition_reduced": composition.reduced_composition,
"composition_reduced": composition.reduced_composition.remove_charges(),
"formula_pretty": composition.reduced_formula,
"formula_anonymous": composition.anonymized_formula,
"chemsys": "-".join(elsyms),
Expand Down Expand Up @@ -130,7 +132,7 @@ def from_structure(
if fields is None
else fields
)
comp = structure.composition
comp = structure.composition.remove_charges()
elsyms = sorted(set([e.symbol for e in comp.elements]))
symmetry = SymmetryData.from_structure(structure)

Expand Down
13 changes: 12 additions & 1 deletion emmet-core/emmet/core/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
from enum import Enum
from itertools import groupby
from typing import Iterator, List
from typing import Any, Iterator, List

import bson
import numpy as np
Expand Down Expand Up @@ -139,6 +139,17 @@ class ValueEnum(Enum):
def __str__(self):
return str(self.value)

def __eq__(self, o: object) -> bool:
"""Special Equals to enable converting strings back to the enum"""
if isinstance(o, str):
return super().__eq__(self.__class__(o))
elif isinstance(o, self.__class__):
return super().__eq__(o)
return False

def __hash__(self) -> Any:
return super().__hash__()


class DocEnum(ValueEnum):
"""
Expand Down
Loading

0 comments on commit f651d54

Please sign in to comment.