-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add ALIGNN FF (aborted) #47
Changes from 8 commits
3c7e186
7f515cd
2d5c6df
abc1c2d
755a7a2
3673059
0bdd45a
2e6ecc1
ae833bd
0aadbb2
cbf3fce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,15 +11,14 @@ authors: | |
- name: Brian DeCost | ||
affiliation: National Institute of Standards and Technology | ||
orcid: https://orcid.org/0000-0002-3459-5888 | ||
email: [email protected] | ||
- name: Philipp Benner | ||
affiliation: Bundesanstalt für Materialforschung und -prüfung BAM | ||
orcid: https://orcid.org/0000-0002-0912-8137 | ||
github: https://github.com/pbenner | ||
repo: https://github.com/usnistgov/alignn | ||
url: https://jarvis.nist.gov/jalignn | ||
doi: https://nature.com/articles/s41524-021-00650-1 | ||
preprint: https://arxiv.org/abs/2209.05554 | ||
preprint: https://arxiv.org/abs/2106.01829 | ||
requirements: | ||
ase: 3.22.0 | ||
dgl-cu111: 0.6.1 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
diff --git a/alignn/ff/ff.py b/alignn/ff/ff.py | ||
index 2dc916f..a569184 100644 | ||
--- a/alignn/ff/ff.py | ||
+++ b/alignn/ff/ff.py | ||
@@ -46,6 +46,8 @@ from jarvis.analysis.defects.surface import Surface | ||
# from jarvis.core.kpoints import Kpoints3D as Kpoints | ||
# from jarvis.core.atoms import get_supercell_dims | ||
|
||
+import torch | ||
+ | ||
try: | ||
from gpaw import GPAW, PW | ||
except Exception: | ||
@@ -62,7 +64,6 @@ __author__ = "Kamal Choudhary, Brian DeCost, Keith Butler, Lily Major" | ||
def default_path(): | ||
"""Get default model path.""" | ||
dpath = os.path.abspath(str(os.path.join(os.path.dirname(__file__), "."))) | ||
- print("model_path", dpath) | ||
return dpath | ||
|
||
|
||
@@ -138,8 +139,6 @@ class AlignnAtomwiseCalculator(ase.calculators.calculator.Calculator): | ||
|
||
config.model.output_features = 1 | ||
|
||
- import torch | ||
- | ||
if self.device is None: | ||
self.device = torch.device( | ||
"cuda" if torch.cuda.is_available() else "cpu" | ||
@@ -193,6 +192,7 @@ class ForceField(object): | ||
logfile="alignn_ff.log", | ||
dyn=None, | ||
communicator=None, | ||
+ device="cuda" if torch.cuda.is_available() else "cpu", | ||
): | ||
"""Initialize class.""" | ||
self.jarvis_atoms = jarvis_atoms | ||
@@ -225,12 +225,13 @@ class ForceField(object): | ||
# print ('STRUCTURE PROVIDED:') | ||
# print (ase_to_atoms(self.atoms)) | ||
# print () | ||
+ import torch | ||
self.atoms.set_calculator( | ||
AlignnAtomwiseCalculator( | ||
path=self.model_path, | ||
include_stress=self.include_stress, | ||
model_filename=self.model_filename, | ||
- # device="cuda" if torch.cuda.is_available() else "cpu", | ||
+ device=device, | ||
) | ||
) | ||
|
||
@@ -238,6 +239,7 @@ class ForceField(object): | ||
"""Print info.""" | ||
if isinstance(self.atoms, ExpCellFilter): | ||
self.atoms = self.atoms.atoms | ||
+ return | ||
line = "" | ||
try: | ||
line = f"time={self.dyn.get_time() / units.fs: 5.0f} fs " | ||
@@ -297,9 +299,9 @@ class ForceField(object): | ||
raise ValueError("Check optimizer", optimizer) | ||
if optimize_lattice: | ||
self.atoms = ExpCellFilter(self.atoms) | ||
- print("OPTIMIZATION") | ||
+ | ||
self.dyn = optimizer( | ||
- self.atoms, trajectory="opt.traj", logfile="opt.log" | ||
+ self.atoms, trajectory=trajectory, logfile=logfile | ||
) | ||
self.dyn.attach(self.print_format, interval=interval) | ||
self.dyn.run(fmax=fmax, steps=steps) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
# %% | ||
from __future__ import annotations | ||
|
||
import os | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from pqdm.processes import pqdm | ||
from pymatgen.core import Structure | ||
from pymatgen.io.jarvis import JarvisAtomsAdaptor | ||
|
||
from matbench_discovery import DEBUG, today | ||
from matbench_discovery.data import DATA_FILES, df_wbm | ||
|
||
__author__ = "Janosh Riebesell, Philipp Benner" | ||
__date__ = "2023-07-11" | ||
|
||
|
||
# %% read environment variables | ||
batch = int(os.getenv("TASK_ID", default="0")) | ||
out_dir = os.getenv("SBATCH_OUTPUT", default=f"{today}-alignn-wbm-IS2RE") | ||
|
||
|
||
# %% | ||
n_splits = 100 | ||
n_processes_per_task = 10 | ||
module_dir = os.path.dirname(__file__) | ||
# model_name = "mp_e_form_alignn" # pre-trained by NIST | ||
model_name = f"{out_dir}/best-model.pth" | ||
task_type = "IS2RE" | ||
target_col = "e_form_per_atom_mp2020_corrected" | ||
input_col = "initial_structure" | ||
id_col = "material_id" | ||
job_name = f"{model_name}-wbm-{task_type}{'-debug' if DEBUG else ''}" | ||
out_path = ( | ||
f"{out_dir}/{'alignn-relaxed-structs' if batch == 0 else f'{batch=}'}.json.gz" | ||
) | ||
|
||
if batch < 0 or batch > n_splits: | ||
raise SystemExit(f"Invalid task_id={batch}") | ||
if batch > 0 and not os.path.exists(out_dir): | ||
os.mkdir(out_dir) | ||
if os.path.isfile(out_path): | ||
raise SystemExit(f"{out_path = } already exists, exiting") | ||
|
||
|
||
# %% Load data | ||
data_path = { | ||
"IS2RE": DATA_FILES.wbm_initial_structures, | ||
"RS2RE": DATA_FILES.wbm_computed_structure_entries, | ||
}[task_type] | ||
input_col = {"IS2RE": "initial_structure", "RS2RE": "relaxed_structure"}[task_type] | ||
|
||
df_in = pd.read_json(data_path).set_index(id_col) | ||
|
||
df_in[target_col] = df_wbm[target_col] | ||
if task_type == "RS2RE": | ||
df_in[input_col] = [x["structure"] for x in df_in.computed_structure_entry] | ||
assert input_col in df_in, f"{input_col=} not in {list(df_in)}" | ||
|
||
# Split data into parts and process only one batch | ||
if batch != 0: | ||
df_in = np.array_split(df_in, 100)[batch - 1] | ||
print(f"Relaxing materials in range {df_in.index[0]} - {df_in.index[-1]}") | ||
else: | ||
print("Relaxing full range of materials") | ||
|
||
|
||
# %% Relax structures | ||
def alignn_relax(structure: Structure) -> Structure: | ||
"""Relax structure using Alignn FF. | ||
|
||
Args: | ||
structure (Structure): pymatgen object to relax. | ||
|
||
Returns: | ||
Structure: Relaxed structure. | ||
""" | ||
# Cuda must be only initialized in child processes | ||
import torch | ||
from alignn.ff.ff import ForceField, default_path | ||
|
||
ff = ForceField( | ||
jarvis_atoms=JarvisAtomsAdaptor.get_atoms(Structure.from_dict(structure)), | ||
model_path=default_path(), | ||
device=f"cuda:{batch % 4}" if torch.cuda.is_available() else "cpu", | ||
logfile="/dev/null", | ||
) | ||
# Relax structure | ||
opt, _, _ = ff.optimize_atoms(trajectory=None, logfile="/dev/null") | ||
|
||
return JarvisAtomsAdaptor.get_structure(opt) | ||
|
||
|
||
structures = [ | ||
df_in.loc[material_id]["initial_structure"] for material_id in df_in.index | ||
] | ||
df_relaxed = pqdm(structures, alignn_relax, n_jobs=n_processes_per_task) | ||
|
||
df_in = df_in.assign(relaxed_structure=df_relaxed) | ||
|
||
|
||
# %% save results | ||
df_in.to_json(out_path) | ||
|
||
# Examples of materials that take ages to converge: | ||
# task_id = 75, df_in.iloc[856]: wbm-3-76848 | ||
# task_id = 75, df_in.iloc[986]: wbm-3-76978 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
model_name: ALIGNN FF | ||
model_version: 2023.07.01 | ||
matbench_discovery_version: 1.0 | ||
date_added: "2023-07-11" | ||
date_published: "2022-09-16" | ||
authors: | ||
- name: Kamal Choudhary | ||
affiliation: National Institute of Standards and Technology | ||
email: [email protected] | ||
orcid: https://orcid.org/0000-0001-9737-8074 | ||
- name: Brian DeCost | ||
affiliation: National Institute of Standards and Technology | ||
orcid: https://orcid.org/0000-0002-3459-5888 | ||
- name: Lily Major | ||
affiliation: Aberystwyth University, UK | ||
orcid: https://orcid.org/0000-0002-5783-8432 | ||
- name: Keith Butler | ||
affiliation: Rutherford Appleton Laboratory, UK | ||
orcid: https://orcid.org/0000-0001-5432-5597 | ||
- name: Jeyan Thiyagalingam | ||
affiliation: Rutherford Appleton Laboratory, UK | ||
orcid: https://orcid.org/0000-0002-2167-1343 | ||
- name: Francesca Tavazza | ||
affiliation: National Institute of Standards and Technology | ||
orcid: https://orcid.org/0000-0002-5602-180X | ||
- name: Philipp Benner | ||
affiliation: Bundesanstalt für Materialforschung und -prüfung BAM | ||
orcid: https://orcid.org/0000-0002-0912-8137 | ||
github: https://github.com/pbenner | ||
repo: https://github.com/usnistgov/alignn | ||
url: https://jarvis.nist.gov/jalignn | ||
doi: https://doi.org/10.1039/D2DD00096B | ||
preprint: https://arxiv.org/abs/2209.05554 | ||
requirements: | ||
ase: 3.22.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pbenner Could you check if these package version numbers are correct (i.e. match the ones you were using for this submission). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the versions are correct, except for ase -> 3.22.1 |
||
dgl-cu111: 0.6.1 | ||
numpy: 1.24.3 | ||
pandas: 2.0.1 | ||
scikit-learn: 1.2.2 | ||
torch: 1.9.0+cu111 | ||
trained_for_benchmark: false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pbenner Just to clarify, you used There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exactly, it was called best_model.pt before: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if it makes sense to test some of the newly added models. Can give it a try, at least check if the convergence is better |
||
# hyperparams: see align-config.json |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# ALIGNN-FF formation energy predictions on WBM test set after ML relaxation | ||
|
||
The patch `alignn-ff-2023.07.05.patch` fixes the following issue: | ||
|
||
```bash | ||
Traceback (most recent call last): | ||
File "alignn_relax.py", line 96, in <module> | ||
File "alignn_relax.py", line 88, in alignn_relax | ||
File "../alignn/ff/ff.py", line 310, in optimize_atoms | ||
File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 269, in run | ||
File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 156, in run | ||
File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 129, in irun | ||
File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 108, in call_observers | ||
File "../alignn/lib/python3.9/site-packages/ase/io/trajectory.py", line 132, in write | ||
File "../alignn/lib/python3.9/site-packages/ase/io/trajectory.py", line 156, in _write_atoms | ||
File "../alignn/lib/python3.9/site-packages/ase/io/trajectory.py", line 381, in write_atoms | ||
File "../alignn/lib/python3.9/site-packages/ase/io/ulm.py", line 400, in write | ||
File "../alignn/lib/python3.9/site-packages/ase/io/ulm.py", line 325, in fill | ||
OSError: [Errno 24] Too many open files | ||
``` | ||
|
||
To reproduce the ALIGNN relaxed predictions, run the following scripts: | ||
|
||
1. `alignn_relax.py`: Set the variable `n_splits` to the number of GPU compute nodes. On each compute node, set the environment variable `TASK_ID` to a value in the range 1-`n_splits`. Set the variable `n_processes_per_task` to the number of processes on a single node. For 48 CPU cores with 4 GPUs a good setting is to use 10 processes. | ||
2. `test_alignn_relaxed.py`: Read the relaxed structures and compute predictions. Set the variable `n_splits` accordingly. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we work around this new dependency? I'm not sure how it differs from
tqdm
? Istqdm
lacking parallel process support?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, you can just use tqdm. For parallel processing pqdm has a bit nicer output, that's all.