janosh · janosh · Aug 9, 2023 · Jul 11, 2023 · Jul 11, 2023 · Jul 11, 2023
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -7,6 +7,13 @@ on:
     branches: [main]
   release:
     types: [published]
+  workflow_dispatch:
+    inputs:
+      task:
+        type: choice
+        options: [tests, release]
+        default: tests
+        description: Only run tests or release a new version of pymatgen to PyPI after tests pass.
 
 jobs:
   tests:

diff --git a/models/alignn/metadata.yml b/models/alignn/metadata.yml
@@ -11,15 +11,14 @@ authors:
   - name: Brian DeCost
     affiliation: National Institute of Standards and Technology
     orcid: https://orcid.org/0000-0002-3459-5888
-    email: [email protected]
   - name: Philipp Benner
     affiliation: Bundesanstalt für Materialforschung und -prüfung BAM
     orcid: https://orcid.org/0000-0002-0912-8137
     github: https://github.com/pbenner
 repo: https://github.com/usnistgov/alignn
 url: https://jarvis.nist.gov/jalignn
 doi: https://nature.com/articles/s41524-021-00650-1
-preprint: https://arxiv.org/abs/2209.05554
+preprint: https://arxiv.org/abs/2106.01829
 requirements:
   ase: 3.22.0
   dgl-cu111: 0.6.1

diff --git a/models/alignn/readme.md b/models/alignn/readme.md
@@ -20,6 +20,5 @@ Replace `/path/to/` with the actual path to the patch file.
 
 The directory contains the following files, which must be executed in the given order to reproduce the results:
 
-1. `train_data.py`: Export Matbench Discovery training data to ALIGNN compatible format. This script outputs training data in the directory `data_train`. In addition, a small test data set is set apart and stored in the directory `data_test`
-1. `train_alignn.py`: Train an ALIGNN model on previously exported data. The resulting model is stored in the directory `data-train-result`
-1. `test_alignn.py`: Test a trained ALIGNN model on the WBM data. Generates `2023-06-03-mp-e-form-alignn-wbm-IS2RE.csv.gz`.
+1. `train_alignn.py`: Train an ALIGNN model on all 154k MP computed structure entries. The resulting model checkpoint is saved to the `out_dir` variable in that script and also uploaded to `wandb` from where it is publicly available for 3rd party reproducibility.
+1. `test_alignn.py`: Test a trained ALIGNN model on the WBM data. Generated `2023-06-03-mp-e-form-alignn-wbm-IS2RE.csv.gz`.
diff --git a/models/alignn/test_alignn.py b/models/alignn/test_alignn.py
@@ -30,6 +30,7 @@
 
 # %%
 model_name = "mp_e_form_alignn"  # pre-trained by NIST
+# TODO fix this to load checkpoint from figshare
 # model_name = f"{module_dir}/data-train-result/best-model.pth"
 task_type = "IS2RE"
 target_col = "e_form_per_atom_mp2020_corrected"

diff --git a/models/alignn_ff/2023-07-11-alignn-ff-wbm-IS2RE.csv.gz b/models/alignn_ff/2023-07-11-alignn-ff-wbm-IS2RE.csv.gz
diff --git a/models/alignn_ff/alignn-ff-2023.07.05.patch b/models/alignn_ff/alignn-ff-2023.07.05.patch
@@ -0,0 +1,73 @@
+diff --git a/alignn/ff/ff.py b/alignn/ff/ff.py
+index 2dc916f..a569184 100644
+--- a/alignn/ff/ff.py
++++ b/alignn/ff/ff.py
+@@ -46,6 +46,8 @@ from jarvis.analysis.defects.surface import Surface
+ # from jarvis.core.kpoints import Kpoints3D as Kpoints
+ # from jarvis.core.atoms import get_supercell_dims
+
++import torch
++
+ try:
+     from gpaw import GPAW, PW
+ except Exception:
+@@ -62,7 +64,6 @@ __author__ = "Kamal Choudhary, Brian DeCost, Keith Butler, Lily Major"
+ def default_path():
+     """Get default model path."""
+     dpath = os.path.abspath(str(os.path.join(os.path.dirname(__file__), ".")))
+-    print("model_path", dpath)
+     return dpath
+
+
+@@ -138,8 +139,6 @@ class AlignnAtomwiseCalculator(ase.calculators.calculator.Calculator):
+
+         config.model.output_features = 1
+
+-        import torch
+-
+         if self.device is None:
+             self.device = torch.device(
+                 "cuda" if torch.cuda.is_available() else "cpu"
+@@ -193,6 +192,7 @@ class ForceField(object):
+         logfile="alignn_ff.log",
+         dyn=None,
+         communicator=None,
++        device="cuda" if torch.cuda.is_available() else "cpu",
+     ):
+         """Initialize class."""
+         self.jarvis_atoms = jarvis_atoms
+@@ -225,12 +225,13 @@ class ForceField(object):
+         # print ('STRUCTURE PROVIDED:')
+         # print (ase_to_atoms(self.atoms))
+         # print ()
++        import torch
+         self.atoms.set_calculator(
+             AlignnAtomwiseCalculator(
+                 path=self.model_path,
+                 include_stress=self.include_stress,
+                 model_filename=self.model_filename,
+-                # device="cuda" if torch.cuda.is_available() else "cpu",
++                device=device,
+             )
+         )
+
+@@ -238,6 +239,7 @@ class ForceField(object):
+         """Print info."""
+         if isinstance(self.atoms, ExpCellFilter):
+             self.atoms = self.atoms.atoms
++        return
+         line = ""
+         try:
+             line = f"time={self.dyn.get_time() / units.fs: 5.0f} fs "
+@@ -297,9 +299,9 @@ class ForceField(object):
+             raise ValueError("Check optimizer", optimizer)
+         if optimize_lattice:
+             self.atoms = ExpCellFilter(self.atoms)
+-        print("OPTIMIZATION")
++
+         self.dyn = optimizer(
+-            self.atoms, trajectory="opt.traj", logfile="opt.log"
++            self.atoms, trajectory=trajectory, logfile=logfile
+         )
+         self.dyn.attach(self.print_format, interval=interval)
+         self.dyn.run(fmax=fmax, steps=steps)
diff --git a/models/alignn_ff/alignn_ff_relax.py b/models/alignn_ff/alignn_ff_relax.py
@@ -0,0 +1,108 @@
+# %%
+from __future__ import annotations
+
+import os
+
+import numpy as np
+import pandas as pd
+from pqdm.processes import pqdm
+from pymatgen.core import Structure
+from pymatgen.io.jarvis import JarvisAtomsAdaptor
+
+from matbench_discovery import DEBUG, today
+from matbench_discovery.data import DATA_FILES, df_wbm
+
+__author__ = "Janosh Riebesell, Philipp Benner"
+__date__ = "2023-07-11"
+
+
+# %% read environment variables
+batch = int(os.getenv("TASK_ID", default="0"))
+out_dir = os.getenv("SBATCH_OUTPUT", default=f"{today}-alignn-wbm-IS2RE")
+
+
+# %%
+n_splits = 100
+n_processes_per_task = 10
+module_dir = os.path.dirname(__file__)
+# model_name = "mp_e_form_alignn"  # pre-trained by NIST
+model_name = f"{out_dir}/best-model.pth"
+task_type = "IS2RE"
+target_col = "e_form_per_atom_mp2020_corrected"
+input_col = "initial_structure"
+id_col = "material_id"
+job_name = f"{model_name}-wbm-{task_type}{'-debug' if DEBUG else ''}"
+out_path = (
+    f"{out_dir}/{'alignn-relaxed-structs' if batch == 0 else f'{batch=}'}.json.gz"
+)
+
+if batch < 0 or batch > n_splits:
+    raise SystemExit(f"Invalid task_id={batch}")
+if batch > 0 and not os.path.exists(out_dir):
+    os.mkdir(out_dir)
+if os.path.isfile(out_path):
+    raise SystemExit(f"{out_path = } already exists, exiting")
+
+
+# %% Load data
+data_path = {
+    "IS2RE": DATA_FILES.wbm_initial_structures,
+    "RS2RE": DATA_FILES.wbm_computed_structure_entries,
+}[task_type]
+input_col = {"IS2RE": "initial_structure", "RS2RE": "relaxed_structure"}[task_type]
+
+df_in = pd.read_json(data_path).set_index(id_col)
+
+df_in[target_col] = df_wbm[target_col]
+if task_type == "RS2RE":
+    df_in[input_col] = [x["structure"] for x in df_in.computed_structure_entry]
+assert input_col in df_in, f"{input_col=} not in {list(df_in)}"
+
+# Split data into parts and process only one batch
+if batch != 0:
+    df_in = np.array_split(df_in, 100)[batch - 1]
+    print(f"Relaxing materials in range {df_in.index[0]} - {df_in.index[-1]}")
+else:
+    print("Relaxing full range of materials")
+
+
+# %% Relax structures
+def alignn_relax(structure: Structure) -> Structure:
+    """Relax structure using Alignn FF.
+
+    Args:
+        structure (Structure): pymatgen object to relax.
+
+    Returns:
+        Structure: Relaxed structure.
+    """
+    # Cuda must be only initialized in child processes
+    import torch
+    from alignn.ff.ff import ForceField, default_path
+
+    ff = ForceField(
+        jarvis_atoms=JarvisAtomsAdaptor.get_atoms(Structure.from_dict(structure)),
+        model_path=default_path(),
+        device=f"cuda:{batch % 4}" if torch.cuda.is_available() else "cpu",
+        logfile="/dev/null",
+    )
+    # Relax structure
+    opt, _, _ = ff.optimize_atoms(trajectory=None, logfile="/dev/null")
+
+    return JarvisAtomsAdaptor.get_structure(opt)
+
+
+structures = [
+    df_in.loc[material_id]["initial_structure"] for material_id in df_in.index
+]
+df_relaxed = pqdm(structures, alignn_relax, n_jobs=n_processes_per_task)
+
+df_in = df_in.assign(relaxed_structure=df_relaxed)
+
+
+# %% save results
+df_in.to_json(out_path)
+
+# Examples of materials that take ages to converge:
+# task_id = 75, df_in.iloc[856]: wbm-3-76848
+# task_id = 75, df_in.iloc[986]: wbm-3-76978
diff --git a/models/alignn_ff/metadata.yml b/models/alignn_ff/metadata.yml
@@ -0,0 +1,42 @@
+model_name: ALIGNN FF
+model_version: 2023.07.01
+matbench_discovery_version: 1.0
+date_added: "2023-07-11"
+date_published: "2022-09-16"
+authors:
+  - name: Kamal Choudhary
+    affiliation: National Institute of Standards and Technology
+    email: [email protected]
+    orcid: https://orcid.org/0000-0001-9737-8074
+  - name: Brian DeCost
+    affiliation: National Institute of Standards and Technology
+    orcid: https://orcid.org/0000-0002-3459-5888
+  - name: Lily Major
+    affiliation: Aberystwyth University, UK
+    orcid: https://orcid.org/0000-0002-5783-8432
+  - name: Keith Butler
+    affiliation: Rutherford Appleton Laboratory, UK
+    orcid: https://orcid.org/0000-0001-5432-5597
+  - name: Jeyan Thiyagalingam
+    affiliation: Rutherford Appleton Laboratory, UK
+    orcid: https://orcid.org/0000-0002-2167-1343
+  - name: Francesca Tavazza
+    affiliation: National Institute of Standards and Technology
+    orcid: https://orcid.org/0000-0002-5602-180X
+  - name: Philipp Benner
+    affiliation: Bundesanstalt für Materialforschung und -prüfung BAM
+    orcid: https://orcid.org/0000-0002-0912-8137
+    github: https://github.com/pbenner
+repo: https://github.com/usnistgov/alignn
+url: https://jarvis.nist.gov/jalignn
+doi: https://doi.org/10.1039/D2DD00096B
+preprint: https://arxiv.org/abs/2209.05554
+requirements:
+  ase: 3.22.0
+  dgl-cu111: 0.6.1
+  numpy: 1.24.3
+  pandas: 2.0.1
+  scikit-learn: 1.2.2
+  torch: 1.9.0+cu111
+trained_for_benchmark: false
+# hyperparams: see align-config.json
diff --git a/models/alignn_ff/readme.md b/models/alignn_ff/readme.md
@@ -0,0 +1,25 @@
+# ALIGNN-FF formation energy predictions on WBM test set after ML relaxation
+
+The patch `alignn-ff-2023.07.05.patch` fixes the following issue:
+
+```bash
+Traceback (most recent call last):
+  File "alignn_relax.py", line 96, in <module>
+  File "alignn_relax.py", line 88, in alignn_relax
+  File "../alignn/ff/ff.py", line 310, in optimize_atoms
+  File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 269, in run
+  File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 156, in run
+  File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 129, in irun
+  File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 108, in call_observers
+  File "../alignn/lib/python3.9/site-packages/ase/io/trajectory.py", line 132, in write
+  File "../alignn/lib/python3.9/site-packages/ase/io/trajectory.py", line 156, in _write_atoms
+  File "../alignn/lib/python3.9/site-packages/ase/io/trajectory.py", line 381, in write_atoms
+  File "../alignn/lib/python3.9/site-packages/ase/io/ulm.py", line 400, in write
+  File "../alignn/lib/python3.9/site-packages/ase/io/ulm.py", line 325, in fill
+OSError: [Errno 24] Too many open files
+```
+
+To reproduce the ALIGNN relaxed predictions, run the following scripts:
+
+1. `alignn_relax.py`: Set the variable `n_splits` to the number of GPU compute nodes. On each compute node, set the environment variable `TASK_ID` to a value in the range 1-`n_splits`. Set the variable `n_processes_per_task` to the number of processes on a single node. For 48 CPU cores with 4 GPUs a good setting is to use 10 processes.
+2. `test_alignn_relaxed.py`: Read the relaxed structures and compute predictions. Set the variable `n_splits` accordingly.