diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index fbecc133..902a148d 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -7,6 +7,13 @@ on:
     branches: [main]
   release:
     types: [published]
+  workflow_dispatch:
+    inputs:
+      task:
+        type: choice
+        options: [tests, release]
+        default: tests
+        description: Only run tests or release a new version of pymatgen to PyPI after tests pass.
 
 jobs:
   tests:
diff --git a/matbench_discovery/plots.py b/matbench_discovery/plots.py
index 5f0f011a..6b8d5ae3 100644
--- a/matbench_discovery/plots.py
+++ b/matbench_discovery/plots.py
@@ -65,6 +65,7 @@ def unit(text: str) -> str:
 )
 model_labels = dict(
     alignn="ALIGNN",
+    alignn_ff="ALIGNN FF",
     alignn_pretrained="ALIGNN Pretrained",
     bowsr_megnet="BOWSR",
     chgnet="CHGNet",
diff --git a/matbench_discovery/preds.py b/matbench_discovery/preds.py
index 097e1094..3d70663c 100644
--- a/matbench_discovery/preds.py
+++ b/matbench_discovery/preds.py
@@ -65,6 +65,7 @@ class PredFiles(Files):
 
     alignn = "alignn/2023-06-02-alignn-wbm-IS2RE.csv.gz"
     # alignn_pretrained = "alignn/2023-06-03-mp-e-form-alignn-wbm-IS2RE.csv.gz"
+    alignn_ff = "alignn_ff/2023-07-11-alignn-ff-wbm-IS2RE.csv.gz"
 
 
 # model_labels remaps model keys to pretty plot labels (see Files)
diff --git a/models/alignn/metadata.yml b/models/alignn/metadata.yml
index f7ed783a..520609ad 100644
--- a/models/alignn/metadata.yml
+++ b/models/alignn/metadata.yml
@@ -11,7 +11,6 @@ authors:
   - name: Brian DeCost
     affiliation: National Institute of Standards and Technology
     orcid: https://orcid.org/0000-0002-3459-5888
-    email: zhongpc@berkeley.edu
   - name: Philipp Benner
     affiliation: Bundesanstalt für Materialforschung und -prüfung BAM
     orcid: https://orcid.org/0000-0002-0912-8137
@@ -19,7 +18,7 @@ authors:
 repo: https://github.com/usnistgov/alignn
 url: https://jarvis.nist.gov/jalignn
 doi: https://nature.com/articles/s41524-021-00650-1
-preprint: https://arxiv.org/abs/2209.05554
+preprint: https://arxiv.org/abs/2106.01829
 requirements:
   ase: 3.22.0
   dgl-cu111: 0.6.1
diff --git a/models/alignn/readme.md b/models/alignn/readme.md
index 85a59d01..34330676 100644
--- a/models/alignn/readme.md
+++ b/models/alignn/readme.md
@@ -20,6 +20,5 @@ Replace `/path/to/` with the actual path to the patch file.
 
 The directory contains the following files, which must be executed in the given order to reproduce the results:
 
-1. `train_data.py`: Export Matbench Discovery training data to ALIGNN compatible format. This script outputs training data in the directory `data_train`. In addition, a small test data set is set apart and stored in the directory `data_test`
-1. `train_alignn.py`: Train an ALIGNN model on previously exported data. The resulting model is stored in the directory `data-train-result`
-1. `test_alignn.py`: Test a trained ALIGNN model on the WBM data. Generates `2023-06-03-mp-e-form-alignn-wbm-IS2RE.csv.gz`.
+1. `train_alignn.py`: Train an ALIGNN model on all 154k MP computed structure entries. The resulting model checkpoint is saved to the `out_dir` variable in that script and also uploaded to `wandb` from where it is publicly available for 3rd party reproducibility.
+1. `test_alignn.py`: Test a trained ALIGNN model on the WBM data. Generated `2023-06-03-mp-e-form-alignn-wbm-IS2RE.csv.gz`.
diff --git a/models/alignn/test_alignn.py b/models/alignn/test_alignn.py
index 30a703a2..eb3abce1 100644
--- a/models/alignn/test_alignn.py
+++ b/models/alignn/test_alignn.py
@@ -30,6 +30,7 @@
 
 # %%
 model_name = "mp_e_form_alignn"  # pre-trained by NIST
+# TODO fix this to load checkpoint from figshare
 # model_name = f"{module_dir}/data-train-result/best-model.pth"
 task_type = "IS2RE"
 target_col = "e_form_per_atom_mp2020_corrected"
diff --git a/models/alignn_ff/2023-07-11-alignn-ff-wbm-IS2RE.csv.gz b/models/alignn_ff/2023-07-11-alignn-ff-wbm-IS2RE.csv.gz
new file mode 100644
index 00000000..e1da1929
Binary files /dev/null and b/models/alignn_ff/2023-07-11-alignn-ff-wbm-IS2RE.csv.gz differ
diff --git a/models/alignn_ff/alignn-ff-2023.07.05.patch b/models/alignn_ff/alignn-ff-2023.07.05.patch
new file mode 100644
index 00000000..4e16c8ad
--- /dev/null
+++ b/models/alignn_ff/alignn-ff-2023.07.05.patch
@@ -0,0 +1,73 @@
+diff --git a/alignn/ff/ff.py b/alignn/ff/ff.py
+index 2dc916f..a569184 100644
+--- a/alignn/ff/ff.py
++++ b/alignn/ff/ff.py
+@@ -46,6 +46,8 @@ from jarvis.analysis.defects.surface import Surface
+ # from jarvis.core.kpoints import Kpoints3D as Kpoints
+ # from jarvis.core.atoms import get_supercell_dims
+
++import torch
++
+ try:
+     from gpaw import GPAW, PW
+ except Exception:
+@@ -62,7 +64,6 @@ __author__ = "Kamal Choudhary, Brian DeCost, Keith Butler, Lily Major"
+ def default_path():
+     """Get default model path."""
+     dpath = os.path.abspath(str(os.path.join(os.path.dirname(__file__), ".")))
+-    print("model_path", dpath)
+     return dpath
+
+
+@@ -138,8 +139,6 @@ class AlignnAtomwiseCalculator(ase.calculators.calculator.Calculator):
+
+         config.model.output_features = 1
+
+-        import torch
+-
+         if self.device is None:
+             self.device = torch.device(
+                 "cuda" if torch.cuda.is_available() else "cpu"
+@@ -193,6 +192,7 @@ class ForceField(object):
+         logfile="alignn_ff.log",
+         dyn=None,
+         communicator=None,
++        device="cuda" if torch.cuda.is_available() else "cpu",
+     ):
+         """Initialize class."""
+         self.jarvis_atoms = jarvis_atoms
+@@ -225,12 +225,13 @@ class ForceField(object):
+         # print ('STRUCTURE PROVIDED:')
+         # print (ase_to_atoms(self.atoms))
+         # print ()
++        import torch
+         self.atoms.set_calculator(
+             AlignnAtomwiseCalculator(
+                 path=self.model_path,
+                 include_stress=self.include_stress,
+                 model_filename=self.model_filename,
+-                # device="cuda" if torch.cuda.is_available() else "cpu",
++                device=device,
+             )
+         )
+
+@@ -238,6 +239,7 @@ class ForceField(object):
+         """Print info."""
+         if isinstance(self.atoms, ExpCellFilter):
+             self.atoms = self.atoms.atoms
++        return
+         line = ""
+         try:
+             line = f"time={self.dyn.get_time() / units.fs: 5.0f} fs "
+@@ -297,9 +299,9 @@ class ForceField(object):
+             raise ValueError("Check optimizer", optimizer)
+         if optimize_lattice:
+             self.atoms = ExpCellFilter(self.atoms)
+-        print("OPTIMIZATION")
++
+         self.dyn = optimizer(
+-            self.atoms, trajectory="opt.traj", logfile="opt.log"
++            self.atoms, trajectory=trajectory, logfile=logfile
+         )
+         self.dyn.attach(self.print_format, interval=interval)
+         self.dyn.run(fmax=fmax, steps=steps)
diff --git a/models/alignn_ff/alignn_ff_relax.py b/models/alignn_ff/alignn_ff_relax.py
new file mode 100644
index 00000000..cb84c59d
--- /dev/null
+++ b/models/alignn_ff/alignn_ff_relax.py
@@ -0,0 +1,108 @@
+# %%
+from __future__ import annotations
+
+import os
+
+import numpy as np
+import pandas as pd
+from pymatgen.core import Structure
+from pymatgen.io.jarvis import JarvisAtomsAdaptor
+from tqdm import tqdm
+
+from matbench_discovery import DEBUG, today
+from matbench_discovery.data import DATA_FILES, df_wbm
+
+__author__ = "Janosh Riebesell, Philipp Benner"
+__date__ = "2023-07-11"
+
+
+# %% read environment variables
+batch = int(os.getenv("TASK_ID", default="0"))
+out_dir = os.getenv("SBATCH_OUTPUT", default=f"{today}-alignn-wbm-IS2RE")
+
+
+# %%
+n_splits = 100
+n_processes_per_task = 10
+module_dir = os.path.dirname(__file__)
+# model_name = "mp_e_form_alignn"  # pre-trained by NIST
+model_name = f"{out_dir}/best-model.pth"
+task_type = "IS2RE"
+target_col = "e_form_per_atom_mp2020_corrected"
+input_col = "initial_structure"
+id_col = "material_id"
+job_name = f"{model_name}-wbm-{task_type}{'-debug' if DEBUG else ''}"
+out_path = (
+    f"{out_dir}/{'alignn-relaxed-structs' if batch == 0 else f'{batch=}'}.json.gz"
+)
+
+if batch < 0 or batch > n_splits:
+    raise SystemExit(f"Invalid task_id={batch}")
+if batch > 0 and not os.path.exists(out_dir):
+    os.mkdir(out_dir)
+if os.path.isfile(out_path):
+    raise SystemExit(f"{out_path = } already exists, exiting")
+
+
+# %% Load data
+data_path = {
+    "IS2RE": DATA_FILES.wbm_initial_structures,
+    "RS2RE": DATA_FILES.wbm_computed_structure_entries,
+}[task_type]
+input_col = {"IS2RE": "initial_structure", "RS2RE": "relaxed_structure"}[task_type]
+
+df_in = pd.read_json(data_path).set_index(id_col)
+
+df_in[target_col] = df_wbm[target_col]
+if task_type == "RS2RE":
+    df_in[input_col] = [x["structure"] for x in df_in.computed_structure_entry]
+assert input_col in df_in, f"{input_col=} not in {list(df_in)}"
+
+# Split data into parts and process only one batch
+if batch != 0:
+    df_in = np.array_split(df_in, 100)[batch - 1]
+    print(f"Relaxing materials in range {df_in.index[0]} - {df_in.index[-1]}")
+else:
+    print("Relaxing full range of materials")
+
+
+# %% Relax structures
+def alignn_relax(structure: Structure) -> Structure:
+    """Relax structure using Alignn FF.
+
+    Args:
+        structure (Structure): pymatgen object to relax.
+
+    Returns:
+        Structure: Relaxed structure.
+    """
+    # Cuda must be only initialized in child processes
+    import torch
+    from alignn.ff.ff import ForceField, default_path
+
+    ff = ForceField(
+        jarvis_atoms=JarvisAtomsAdaptor.get_atoms(Structure.from_dict(structure)),
+        model_path=default_path(),
+        device=f"cuda:{batch % 4}" if torch.cuda.is_available() else "cpu",
+        logfile="/dev/null",
+    )
+    # Relax structure
+    opt, _, _ = ff.optimize_atoms(trajectory=None, logfile="/dev/null")
+
+    return JarvisAtomsAdaptor.get_structure(opt)
+
+
+structures = [
+    df_in.loc[material_id]["initial_structure"] for material_id in df_in.index
+]
+df_relaxed = tqdm(structures, alignn_relax, n_jobs=n_processes_per_task)
+
+df_in = df_in.assign(relaxed_structure=df_relaxed)
+
+
+# %% save results
+df_in.to_json(out_path)
+
+# Examples of materials that take ages to converge:
+# task_id = 75, df_in.iloc[856]: wbm-3-76848
+# task_id = 75, df_in.iloc[986]: wbm-3-76978
diff --git a/models/alignn_ff/metadata_aborted.yml b/models/alignn_ff/metadata_aborted.yml
new file mode 100644
index 00000000..0b0f3ed0
--- /dev/null
+++ b/models/alignn_ff/metadata_aborted.yml
@@ -0,0 +1,42 @@
+model_name: ALIGNN FF
+model_version: 2023.07.01
+matbench_discovery_version: 1.0
+date_added: "2023-07-11"
+date_published: "2022-09-16"
+authors:
+  - name: Kamal Choudhary
+    affiliation: National Institute of Standards and Technology
+    email: kamal.choudhary@nist.gov
+    orcid: https://orcid.org/0000-0001-9737-8074
+  - name: Brian DeCost
+    affiliation: National Institute of Standards and Technology
+    orcid: https://orcid.org/0000-0002-3459-5888
+  - name: Lily Major
+    affiliation: Aberystwyth University, UK
+    orcid: https://orcid.org/0000-0002-5783-8432
+  - name: Keith Butler
+    affiliation: Rutherford Appleton Laboratory, UK
+    orcid: https://orcid.org/0000-0001-5432-5597
+  - name: Jeyan Thiyagalingam
+    affiliation: Rutherford Appleton Laboratory, UK
+    orcid: https://orcid.org/0000-0002-2167-1343
+  - name: Francesca Tavazza
+    affiliation: National Institute of Standards and Technology
+    orcid: https://orcid.org/0000-0002-5602-180X
+  - name: Philipp Benner
+    affiliation: Bundesanstalt für Materialforschung und -prüfung BAM
+    orcid: https://orcid.org/0000-0002-0912-8137
+    github: https://github.com/pbenner
+repo: https://github.com/usnistgov/alignn
+url: https://jarvis.nist.gov/jalignn
+doi: https://doi.org/10.1039/D2DD00096B
+preprint: https://arxiv.org/abs/2209.05554
+requirements:
+  ase: 3.22.0
+  dgl-cu111: 0.6.1
+  numpy: 1.24.3
+  pandas: 2.0.1
+  scikit-learn: 1.2.2
+  torch: 1.9.0+cu111
+trained_for_benchmark: false
+# hyperparams: see align-config.json
diff --git a/models/alignn_ff/readme.md b/models/alignn_ff/readme.md
new file mode 100644
index 00000000..1b6c7979
--- /dev/null
+++ b/models/alignn_ff/readme.md
@@ -0,0 +1,39 @@
+# ALIGNN-FF (aborted)
+
+The [ALIGNN FF model submission](https://github.com/janosh/matbench-discovery/pull/47) intended to get a complete set of formation energy predictions for the WBM test set post-ALIGNN-FF structure relaxation (i.e. the WBM IS2RE task).
+
+This effort was aborted for the following reasons:
+
+1. **Incompatibility issues**: ALIGNN-FF was pre-trained on the JARVIS data, which among other differences uses the OptB88vdW functional and is incompatible with the WBM test set generated using Materials Project workflows.
+1. **Training difficulties**: ALIGNN-FF proved to be very resource-hungry. [12 GB of MPtrj training data](https://figshare.com/articles/dataset/23713842) turned into 600 GB of ALIGNN graph data. This forces small batch size even on nodes with large GPU memory, which slowed down training.
+1. **Ineffectiveness of fine-tuning**: Efforts to fine-tune the ALIGNN-FF WT10 model on the CHGNet data suffered high initial loss, even worse than the untrained model, indicating significant dataset incompatibility.
+
+The decision to abort adding ALIGNN FF to Matbench Discovery v1 was made after weeks of work due to ongoing technical challenges and resource limitations. See the [PR discussion](https://github.com/janosh/matbench-discovery/pull/47) for further details.
+
+## Fine-tuning
+
+We attempted fine-tuning the [`alignnff_wt10` checkpoint](https://github.com/usnistgov/alignn/blob/461b35fe6e5ed7ade7cbf9b345773e941371ecfc/alignn/ff/alignnff_wt10/best_model.pt).
+
+The patch `alignn-ff-2023.07.05.patch` fixes the following issue:
+
+```bash
+Traceback (most recent call last):
+  File "alignn_relax.py", line 96, in <module>
+  File "alignn_relax.py", line 88, in alignn_relax
+  File "../alignn/ff/ff.py", line 310, in optimize_atoms
+  File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 269, in run
+  File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 156, in run
+  File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 129, in irun
+  File "../alignn/lib/python3.9/site-packages/ase/optimize/optimize.py", line 108, in call_observers
+  File "../alignn/lib/python3.9/site-packages/ase/io/trajectory.py", line 132, in write
+  File "../alignn/lib/python3.9/site-packages/ase/io/trajectory.py", line 156, in _write_atoms
+  File "../alignn/lib/python3.9/site-packages/ase/io/trajectory.py", line 381, in write_atoms
+  File "../alignn/lib/python3.9/site-packages/ase/io/ulm.py", line 400, in write
+  File "../alignn/lib/python3.9/site-packages/ase/io/ulm.py", line 325, in fill
+OSError: [Errno 24] Too many open files
+```
+
+## Scripts
+
+1. `alignn_ff_relax.py`: Relax WBM test set structures. Set the variable `n_splits` to the number of GPU compute nodes. On each compute node, set the environment variable `TASK_ID` to a value in the range 1-`n_splits`. Set the variable `n_processes_per_task` to the number of processes on a single node. For 48 CPU cores with 4 GPUs a good setting is to use 10 processes.
+2. `test_alignn_ff.py`: Read the relaxed structures from `alignn_ff_relax.py` and make formation energy predictions. Set the variable `n_splits` accordingly.
diff --git a/models/alignn_ff/test_alignn_ff.py b/models/alignn_ff/test_alignn_ff.py
new file mode 100644
index 00000000..6fdf658c
--- /dev/null
+++ b/models/alignn_ff/test_alignn_ff.py
@@ -0,0 +1,128 @@
+# %%
+from __future__ import annotations
+
+import json
+import os
+from glob import glob
+from importlib.metadata import version
+
+import pandas as pd
+import torch
+import wandb
+from alignn.config import TrainingConfig
+from alignn.models.alignn import ALIGNN
+from alignn.pretrained import all_models, get_figshare_model
+from jarvis.core.graphs import Graph
+from pymatgen.core import Structure
+from pymatgen.io.jarvis import JarvisAtomsAdaptor
+from sklearn.metrics import r2_score
+from tqdm import tqdm
+
+from matbench_discovery import DEBUG, today
+from matbench_discovery.data import DATA_FILES, df_wbm
+from matbench_discovery.plots import wandb_scatter
+
+__author__ = "Philipp Benner, Janosh Riebesell"
+__date__ = "2023-07-11"
+
+module_dir = os.path.dirname(__file__)
+
+
+# %%
+n_splits = 100
+# model_name = "mp_e_form_alignnn"  # pre-trained by NIST
+task_type = "IS2RE"
+target_col = "e_form_per_atom_mp2020_corrected"
+input_col = "initial_structure"
+id_col = "material_id"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_name = f"alignn-ff-wbm-{task_type}"
+job_name = f"{model_name}-relaxed-wbm-{task_type}{'-debug' if DEBUG else ''}"
+out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
+in_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
+
+
+if model_name in all_models:  # load pre-trained model
+    model = get_figshare_model(model_name)
+    pred_col = "e_form_per_atom_alignn_pretrained"
+elif os.path.isfile(model_name):
+    pred_col = "e_form_per_atom_alignn"
+    with open(f"{module_dir}/alignn-config.json") as file:
+        config = TrainingConfig(**json.load(file))
+
+    model = ALIGNN(config.model)
+    # load trained ALIGNN model
+    state_dict = torch.load(model_name, map_location=device)
+    model.load_state_dict(state_dict)
+    model = model.to(device)
+else:
+    raise ValueError(
+        f"{model_name=} not found, train a model or use pre-trained {list(all_models)}"
+    )
+
+
+# %% Load data
+data_path = {
+    "IS2RE": DATA_FILES.wbm_initial_structures,
+    "RS2RE": DATA_FILES.wbm_computed_structure_entries,
+}[task_type]
+input_col = "relaxed_structure"
+# load ALIGNN-FF relaxed structures (TODO fix directory we're loading from)
+df_in = pd.concat(map(pd.read_json, glob(f"{module_dir}/data-train-result/*.json.gz")))
+
+
+# %%
+run_params = dict(
+    data_path=data_path,
+    **{f"{dep}_version": version(dep) for dep in ("alignn", "numpy")},
+    model_name=model_name,
+    task_type=task_type,
+    target_col=target_col,
+    df=dict(shape=str(df_in.shape), columns=", ".join(df_in)),
+)
+
+wandb.init(project="matbench-discovery", name=job_name, config=run_params)
+
+
+# %% Predict
+model.eval()
+e_form_preds: dict[str, float] = {}
+with torch.no_grad():  # get predictions
+    for material_id, structure in tqdm(
+        df_in[input_col].items(),
+        total=len(df_in),
+        desc=f"Predicting {target_col=} {task_type}",
+    ):
+        atoms = JarvisAtomsAdaptor.get_atoms(Structure.from_dict(structure))
+
+        atom_graph, line_graph = Graph.atom_dgl_multigraph(atoms)
+        e_form = model([atom_graph.to(device), line_graph.to(device)]).item()
+
+        e_form_preds[material_id] = e_form
+
+df_wbm[pred_col] = e_form_preds
+
+df_wbm[pred_col] -= df_wbm.e_correction_per_atom_mp_legacy
+df_wbm[pred_col] += df_wbm.e_correction_per_atom_mp2020
+
+if model_name in all_models:
+    df_wbm[pred_col].round(4).to_csv(
+        f"{module_dir}/{today}-{model_name}-relaxed-wbm-IS2RE.csv.gz"
+    )
+else:
+    df_wbm[pred_col].round(4).to_csv(
+        f"{module_dir}/{today}-alignn-relaxed-wbm-IS2RE.csv.gz"
+    )
+
+
+# %%
+df_wbm = df_wbm.dropna()
+
+table = wandb.Table(dataframe=df_wbm[[target_col, pred_col]].reset_index())
+
+MAE = (df_wbm[target_col] - df_wbm[pred_col]).abs().mean()
+R2 = r2_score(df_wbm[target_col], df_wbm[pred_col])
+title = f"{model_name} {task_type} {MAE=:.4} {R2=:.4}"
+print(title)
+
+wandb_scatter(table, fields=dict(x=target_col, y=pred_col), title=title)