add git submodule paper to prepare journal submission in separate repo

https://github.com/janosh/matbench-discovery-paper fix per-elem EACH errors ptable heatmap color scale when normalizing by test set std rename /paper route to /preprint
janosh · Jun 20, 2023 · 3ac72e6 · 3ac72e6
1 parent 01e88f0
commit 3ac72e6
Show file tree

Hide file tree

Showing 23 changed files with 113 additions and 52 deletions.
diff --git a/.gitignore b/.gitignore
@@ -23,5 +23,4 @@ wandb/
 site/src/routes/api/*.md
 
 # temporary ignore rules
-notes
 2023-02-05-ens=10-perturb=5
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "paper"]
+	path = paper
+	url = https://github.com/janosh/matbench-discovery-paper
diff --git a/matbench_discovery/__init__.py b/matbench_discovery/__init__.py
@@ -6,9 +6,9 @@
 
 ROOT = os.path.dirname(os.path.dirname(__file__))  # repo root directory
 FIGS = f"{ROOT}/site/src/figs"  # directory to store interactive figures
-STATIC = f"{ROOT}/site/static/figs"  # directory to store static figures, is symlinked
-# into site/src/routes/paper/figs dir
+STATIC = f"{ROOT}/site/static/figs"  # directory to store static figures
 MODELS = f"{ROOT}/site/src/routes/models"  # directory to write model analysis
+
 # whether a currently running slurm job is in debug mode
 DEBUG = "DEBUG" in os.environ or (
     "slurm-submit" not in sys.argv and "SLURM_JOB_ID" not in os.environ

diff --git a/paper b/paper
diff --git a/readme.md b/readme.md
@@ -17,12 +17,12 @@ Matbench Discovery
 
 Matbench Discovery is an [interactive leaderboard](https://janosh.github.io/matbench-discovery) and associated [PyPI package](https://pypi.org/project/matbench-discovery) which together make it easy to benchmark ML energy models on a task designed to closely simulate a high-throughput discovery campaign for new stable inorganic crystals.
 
-In version 1 of this benchmark, we explore 8 models covering multiple methodologies ranging from random forests to graph neural networks, from one-shot predictors to iterative Bayesian optimizers and interatomic potential-based relaxers. We find [CHGNet](https://github.com/CederGroupHub/chgnet) ([paper](https://doi.org/10.48550/arXiv.2302.14231)) to achieve the highest F1 score of 0.59, $R^2$ of 0.61 and a discovery acceleration factor (DAF) of 3.06 (meaning a 3x higher rate of stable structures compared to dummy selection in our already enriched search space). See the [**full results**](https://janosh.github.io/matbench-discovery/paper#results) in our interactive dashboard which provides valuable insights for maintainers of large-scale materials databases. We show these models have become powerful enough to warrant deploying them as triaging steps to more effectively allocate compute in high-throughput DFT relaxations.
+In version 1 of this benchmark, we explore 8 models covering multiple methodologies ranging from random forests to graph neural networks, from one-shot predictors to iterative Bayesian optimizers and interatomic potential-based relaxers. We find [CHGNet](https://github.com/CederGroupHub/chgnet) ([paper](https://doi.org/10.48550/arXiv.2302.14231)) to achieve the highest F1 score of 0.59, $R^2$ of 0.61 and a discovery acceleration factor (DAF) of 3.06 (meaning a 3x higher rate of stable structures compared to dummy selection in our already enriched search space). See the [**full results**](https://janosh.github.io/matbench-discovery/preprint#results) in our interactive dashboard which provides valuable insights for maintainers of large-scale materials databases. We show these models have become powerful enough to warrant deploying them as triaging steps to more effectively allocate compute in high-throughput DFT relaxations.
 
 <slot name="metrics-table" />
 
 We welcome contributions that add new models to the leaderboard through [GitHub PRs](https://github.com/janosh/matbench-discovery/pulls). See the [usage and contributing guide](https://janosh.github.io/matbench-discovery/contribute) for details.
 
 For a version 2 release of this benchmark, we plan to merge the current training and test sets into the new training set and acquire a much larger test set (potentially at meta-GGA level of theory) compared to the v1 test set of 257k structures. Anyone interested in joining this effort please [open a GitHub discussion](https://github.com/janosh/matbench-discovery/discussions) or [reach out privately](mailto:[email protected]?subject=Matbench%20Discovery).
 
-For detailed results and analysis, check out the [paper](https://janosh.github.io/matbench-discovery/paper) and [supplementary material](https://janosh.github.io/matbench-discovery/si).
+For detailed results and analysis, check out the [preprint](https://janosh.github.io/matbench-discovery/preprint) and [supplementary material](https://janosh.github.io/matbench-discovery/si).
diff --git a/scripts/analyze_element_errors.py b/scripts/analyze_element_errors.py
@@ -112,7 +112,7 @@
 
 
 # %%
-test_set_std_col = "Test set standard deviation (eV/atom)"
+test_set_std_col = "Test set standard deviation"
 df_elem_err[test_set_std_col] = (
     df_frac_comp.where(pd.isna, 1) * df_wbm[each_true_col].values[:, None]
 ).std()

diff --git a/scripts/compile_metrics.py b/scripts/compile_metrics.py
@@ -29,6 +29,13 @@
 __author__ = "Janosh Riebesell"
 __date__ = "2022-11-28"
 
+try:
+    # pdfkit used to export pandas Styler to PDF, requires:
+    # pip install pdfkit && brew install homebrew/cask/wkhtmltopdf
+    import pdfkit
+except ImportError:
+    pdfkit = None
+
 
 # %%
 train_run_filters: dict[str, tuple[int, str, str, str]] = {
@@ -137,16 +144,34 @@
 
 
 # %%
-time_cols = list(df_stats.filter(like=time_col))
+ontology = {
+    "CHGNet": ("S2E", "IS2RE-SR", "UIP-GNN"),
+    "M3GNet": ("S2E", "IS2RE-SR", "UIP-GNN"),
+    "MEGNet": ("RS2RE", "IS2RE", "GNN"),
+    "CGCNN": ("RS2RE", "IS2RE", "GNN"),
+    "CGCNN+P": ("S2RE", "IS2RE", "GNN"),
+    "Wrenformer": ("RP2RE", "IP2RE", "Transformer"),
+    "BOWSR + MEGNet": ("RS2RE", "IS2RE-BO", "BO+GNN"),
+    "Voronoi RF": ("RS2RE", "IS2RE", "Fingerprint+RF"),
+    "dummy": ("", "", "dummy clf"),
+}
+
+df_table = pd.concat(
+    [df_metrics, pd.DataFrame(ontology, index=["trained", "deployed", "model class"])]
+)
+
+
+# %%
+# time_cols = list(df_stats.filter(like=time_col))
 # for col in time_cols:  # uncomment to include run times in metrics table
-#     df_metrics.loc[col] = df_stats[col]
-higher_is_better = {"DAF", "R²", "Precision", "F1", "Accuracy", "TPR", "TNR"}
+#     df_ont.loc[col] = df_stats[col]
+higher_is_better = {"DAF", "R<sup>2</sup>", "Precision", "F1", "Accuracy", "TPR", "TNR"}
 lower_is_better = {"MAE", "RMSE", "FNR", "FPR"}
-df_metrics = df_metrics.rename(index={"R2": "R²"})
-idx_set = set(df_metrics.index)
+df_table = df_table.rename(index={"R2": "R<sup>2</sup>"})
+idx_set = set(df_table.index)
 
 styler = (
-    df_metrics.T
+    df_table.T
     # append arrow up/down to table headers to indicate higher/lower metric is better
     # .rename(columns=lambda x: x + " ↑" if x in higher_is_better else x + " ↓")
     .style.format(precision=2)
@@ -167,22 +192,40 @@
 styler.set_table_styles([dict(selector=sel, props=styles[sel]) for sel in styles])
 styler.set_uuid("")
 # hide redundant metrics (TPR = Recall, FPR = 1 - TNR, FNR = 1 - TPR)
-styler.hide(["Recall", "FPR", "FNR"], axis=1)
+styler.hide(["Recall", "FPR", "FNR", "trained", "deployed"], axis=1)
 
 
 # %% export model metrics as styled HTML table and Svelte component
-styler.to_html(f"{ROOT}/tmp/figures/model-metrics.html")
 # draw dotted line between classification and regression metrics
-styles = "#T_ :is(td, th):nth-last-child(3) { border-left: 1px dotted white; }"
 df_to_svelte_table(
-    styler, f"{FIGS}/metrics-table.svelte", inline_props="class='roomy'", styles=styles
+    styler,
+    f"{FIGS}/metrics-table.svelte",
+    inline_props="class='roomy'",
+    styles="#T_ :is(td, th):nth-last-child(3) { border-left: 1px dotted white; }",
 )
 
 
+# %%
+if pdfkit is not None:
+    pdfkit.from_string(
+        styler.to_html(),
+        f"{ROOT}/paper/figures/metrics-table.pdf",
+        options={
+            "margin-top": "0",
+            "margin-right": "0",
+            "margin-bottom": "0",
+            "margin-left": "0",
+            # fit page size to content
+            "page-width": f"{(len(styler.columns) + 1) * 10}",
+            "page-height": f"{(len(styler.index) + 1) * 6}",
+        },
+    )
+
+
 # %%
 # hide_rows = list(set(df_metrics) - set(df_metrics.T.F1.nlargest(6).index))
 # styler.hide(hide_rows)  # show only the best models by F1 score
-png_metrics = f"{ROOT}/tmp/figures/model-metrics.png"
+png_metrics = f"{ROOT}/tmp/figures/metrics-table.png"
 dfi.export(styler, png_metrics, dpi=300)
 print(f"{png_metrics=}")
 

diff --git a/scripts/cumulative_clf_metrics.py b/scripts/cumulative_clf_metrics.py
@@ -92,4 +92,4 @@
 img_name = "cumulative-clf-metrics"
 save_fig(fig, f"{FIGS}/{img_name}.svelte")
 # save_fig(fig, f"{STATIC}/{img_name}.webp", scale=3)
-save_fig(fig, f"{ROOT}/tmp/figures/{img_name}.pdf", width=720, height=370)
+save_fig(fig, f"{ROOT}/paper/figures/{img_name}.pdf", width=720, height=370)
diff --git a/scripts/rolling_mae_vs_hull_dist_all_models.py b/scripts/rolling_mae_vs_hull_dist_all_models.py
@@ -56,4 +56,4 @@
 img_name = "rolling-mae-vs-hull-dist-models"
 save_fig(fig, f"{FIGS}/{img_name}.svelte")
 # save_fig(fig, f"{STATIC}/{img_name}.webp", scale=3)
-save_fig(fig, f"{ROOT}/tmp/figures/{img_name}.pdf", width=520, height=350)
+save_fig(fig, f"{ROOT}/paper/figures/{img_name}.pdf", width=520, height=350)
diff --git a/scripts/scatter_e_above_hull_models.py b/scripts/scatter_e_above_hull_models.py
@@ -204,7 +204,7 @@
 axis_titles = dict(xref="paper", yref="paper", showarrow=False)
 fig.add_annotation(  # x-axis title
     x=0.5,
-    y=-0.05,
+    y=-0.06,
     text=x_title,
     **axis_titles,
 )
@@ -224,7 +224,7 @@
 
 # %%
 save_fig(fig, f"{FIGS}/each-scatter-models.svelte")
-save_fig(fig, f"{ROOT}/tmp/figures/each-scatter-models.pdf", width=600, height=700)
+save_fig(fig, f"{ROOT}/paper/figures/each-scatter-models.pdf", width=600, height=700)
 # save_fig(fig, f"{STATIC}/each-scatter-models.webp", scale=4, width=700, height=800)
 
 

diff --git a/site/package.json b/site/package.json
@@ -18,11 +18,11 @@
   "devDependencies": {
     "@iconify/svelte": "^3.1.1",
     "@rollup/plugin-yaml": "^4.0.1",
-    "@sveltejs/adapter-static": "^2.0.1",
-    "@sveltejs/kit": "^1.15.2",
+    "@sveltejs/adapter-static": "^2.0.2",
+    "@sveltejs/kit": "^1.15.4",
     "@sveltejs/vite-plugin-svelte": "^2.0.4",
-    "@typescript-eslint/eslint-plugin": "^5.57.1",
-    "@typescript-eslint/parser": "^5.57.1",
+    "@typescript-eslint/eslint-plugin": "^5.58.0",
+    "@typescript-eslint/parser": "^5.58.0",
     "elementari": "^0.1.6",
     "eslint": "^8.38.0",
     "eslint-plugin-svelte3": "^4.0.0",

diff --git a/site/src/figs/metrics-table.svelte b/site/src/figs/metrics-table.svelte
diff --git a/site/src/routes/+layout.svelte b/site/src/routes/+layout.svelte
@@ -24,8 +24,8 @@
     '/api': `API docs for the Matbench Discovery PyPI package.`,
     '/contribute': `Steps for contributing a new model to the benchmark.`,
     '/models': `Details on each model sortable by metrics.`,
-    '/paper': `The paper released with the Matbench Discovery benchmark.`,
-    '/paper/iclr-ml4mat': `Extended abstract submitted to the ICLR ML4Materials workshop.`,
+    '/preprint': `The preprint released with the Matbench Discovery benchmark.`,
+    '/preprint/iclr-ml4mat': `Extended abstract submitted to the ICLR ML4Materials workshop.`,
     '/si': `Supplementary information including interesting but non-essential plots.`,
   }[url ?? ``]
   if (url && !description) console.warn(`No description for url=${url}`)

diff --git a/site/src/routes/models/element-errors-ptable-heatmap.svelte b/site/src/routes/models/element-errors-ptable-heatmap.svelte
@@ -11,7 +11,7 @@
   export let models: string[] = Object.keys(per_elem_errors)
   export let current_model: string[] = [models[1]]
   export let manual_cbar_max: boolean = false
-  export let normalized: boolean = false
+  export let normalized: boolean = true
   export let cbar_max: number | null = 0.03
 
   const test_set_std_key = Object.keys(per_elem_errors).find((key) =>
@@ -20,7 +20,11 @@
   const test_set_std = Object.values(per_elem_errors[test_set_std_key]) as number[]
 
   $: heatmap_values = (Object.values(per_elem_errors[current_model[0]]) as number[]).map(
-    (val, idx) => (normalized ? val / test_set_std[idx] || null : val)
+    (val, idx) => {
+      const denom = normalized ? test_set_std[idx] : 1
+      if (denom) return val / denom
+      return null
+    }
   )
   $: current_data_max = Math.max(...heatmap_values)
   $: cs_range = [0, manual_cbar_max ? cbar_max : current_data_max]

diff --git a/site/src/routes/paper/+layout.server.ts → site/src/routes/preprint/+layout.server.ts b/site/src/routes/paper/+layout.server.ts → site/src/routes/preprint/+layout.server.ts
diff --git a/site/src/routes/paper/+layout.svelte → site/src/routes/preprint/+layout.svelte b/site/src/routes/paper/+layout.svelte → site/src/routes/preprint/+layout.svelte