Skip to content

Commit

Permalink
Merge pull request #214 from materialsproject/r2scan
Browse files Browse the repository at this point in the history
Add R2SCAN to run_types
  • Loading branch information
shyamd authored Jun 24, 2021
2 parents 7b9627f + ccfe9f7 commit 1fc2aa5
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 793 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,6 @@ emmet/scripts/token.json
.vscode

Error_Record

# emmet autogenerated enums
emmet-core/emmet/core/vasp/calc_types/enums.py
50 changes: 22 additions & 28 deletions emmet-builders/emmet/builders/vasp/materials.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,20 +94,18 @@ def prechunk(self, number_splits: int) -> Iterable[Dict]:
temp_query["tags"] = {"$in": self.settings.BUILD_TAGS}

self.logger.info("Finding tasks to process")
all_tasks = {
doc[self.tasks.key]
for doc in self.tasks.query(temp_query, [self.tasks.key])
}
processed_tasks = {
t_id
for d in self.materials.query({}, ["task_ids"])
for t_id in d.get("task_ids", [])
}
to_process_tasks = all_tasks - processed_tasks
to_process_forms = self.tasks.distinct(
"formula_pretty", {self.tasks.key: {"$in": list(to_process_tasks)}}
all_tasks = list(
self.tasks.query(temp_query, [self.tasks.key, "formula_pretty"])
)

processed_tasks = set(self.materials.distinct("task_ids"))
to_process_tasks = {d[self.tasks.key] for d in all_tasks} - processed_tasks
to_process_forms = {
d["formula_pretty"]
for d in all_tasks
if d[self.tasks.key] in to_process_tasks
}

for formula_chunk in grouper(to_process_forms, number_splits):
yield {"formula_pretty": {"$in": list(formula_chunk)}}

Expand Down Expand Up @@ -144,19 +142,18 @@ def get_items(self) -> Iterator[List[Dict]]:
temp_query["tags"] = {"$in": self.settings.BUILD_TAGS}

self.logger.info("Finding tasks to process")
all_tasks = {
doc[self.tasks.key]
for doc in self.tasks.query(temp_query, [self.tasks.key])
}
processed_tasks = {
t_id
for d in self.materials.query({}, ["task_ids"])
for t_id in d.get("task_ids", [])
}
to_process_tasks = all_tasks - processed_tasks
to_process_forms = self.tasks.distinct(
"formula_pretty", {self.tasks.key: {"$in": list(to_process_tasks)}}
all_tasks = list(
self.tasks.query(temp_query, [self.tasks.key, "formula_pretty"])
)

processed_tasks = set(self.materials.distinct("task_ids"))
to_process_tasks = {d[self.tasks.key] for d in all_tasks} - processed_tasks
to_process_forms = {
d["formula_pretty"]
for d in all_tasks
if d[self.tasks.key] in to_process_tasks
}

self.logger.info(f"Found {len(to_process_tasks)} unprocessed tasks")
self.logger.info(f"Found {len(to_process_forms)} unprocessed formulas")

Expand Down Expand Up @@ -197,10 +194,7 @@ def get_items(self) -> Iterator[List[Dict]]:
self.tasks.query(criteria=tasks_query, properties=projected_fields)
)
for t in tasks:
if t[self.tasks.key] in invalid_ids:
t["is_valid"] = False
else:
t["is_valid"] = True
t["is_valid"] = t[self.tasks.key] not in invalid_ids

yield tasks

Expand Down
Loading

0 comments on commit 1fc2aa5

Please sign in to comment.