Skip to content

Commit

Permalink
fix trainable params in test_m3gnet.py
Browse files Browse the repository at this point in the history
update site to sveltekit v2, vite v5
ruff unignore NPY002 and fix violations
  • Loading branch information
janosh committed Dec 28, 2023
1 parent e42d70c commit e203f8f
Show file tree
Hide file tree
Showing 13 changed files with 57 additions and 55 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ default_install_hook_types: [pre-commit, commit-msg]

repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.7
rev: v0.1.9
hooks:
- id: ruff
args: [--fix]
Expand All @@ -30,7 +30,7 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.7.1
rev: v1.8.0
hooks:
- id: mypy
additional_dependencies: [types-pyyaml, types-requests]
Expand All @@ -45,7 +45,7 @@ repos:
args: [--ignore-words-list, "nd,te,fpr", --check-filenames]

- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.3
rev: v4.0.0-alpha.8
hooks:
- id: prettier
args: [--write] # edit files in-place
Expand All @@ -56,7 +56,7 @@ repos:
exclude: ^(site/src/figs/.+\.svelte|data/wbm/20.+\..+|site/src/routes/.+\.(yaml|json)|changelog.md)$

- repo: https://github.com/pre-commit/mirrors-eslint
rev: v8.55.0
rev: v8.56.0
hooks:
- id: eslint
types: [file]
Expand Down
8 changes: 4 additions & 4 deletions matbench_discovery/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
__author__ = "Janosh Riebesell"
__date__ = "2022-12-02"

np.random.seed(0) # ensure reproducible structure perturbations
rng = np.random.default_rng(0) # ensure reproducible structure perturbations


def perturb_structure(struct: Structure, gamma: float = 1.5) -> Structure:
Expand All @@ -29,8 +29,8 @@ def perturb_structure(struct: Structure, gamma: float = 1.5) -> Structure:
"""
perturbed = struct.copy()
for site in perturbed:
magnitude = np.random.weibull(gamma)
vec = np.random.randn(3) # TODO maybe make func recursive to deal with 0-vector
magnitude = rng.weibull(gamma)
vec = rng.normal(3) # TODO maybe make func recursive to deal with 0-vector
vec /= np.linalg.norm(vec) # unit vector
site.coords += vec * magnitude
site.to_unit_cell(in_place=True)
Expand All @@ -42,7 +42,7 @@ def perturb_structure(struct: Structure, gamma: float = 1.5) -> Structure:
import matplotlib.pyplot as plt

gamma = 1.5
samples = np.array([np.random.weibull(gamma) for _ in range(10000)])
samples = np.array([rng.weibull(gamma) for _ in range(10_000)])
mean = samples.mean()

# reproduces the dist in https://www.nature.com/articles/s41524-022-00891-8#Fig5
Expand Down
4 changes: 3 additions & 1 deletion models/cgcnn/plot_structure_perturbation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
__author__ = "Janosh Riebesell"
__date__ = "2022-12-02"

rng = np.random.default_rng(0)


# %%
ax = pd.Series(np.random.weibull(1.5, 100000)).hist(bins=100)
ax = pd.Series(rng.weibull(1.5, 100_000)).hist(bins=100)
title = "Distribution of perturbation magnitudes"
ax.set(xlabel="magnitude of perturbation", ylabel="count", title=title)

Expand Down
2 changes: 1 addition & 1 deletion models/chgnet/join_chgnet_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# %%
module_dir = os.path.dirname(__file__)
task_type = "IS2RE"
date = "2023-10-23"
date = "2023-12-21"
glob_pattern = f"{date}-chgnet-*-wbm-{task_type}*/*.json.gz"
file_paths = sorted(glob(f"{module_dir}/{glob_pattern}"))
print(f"Found {len(file_paths):,} files for {glob_pattern = }")
Expand Down
19 changes: 12 additions & 7 deletions models/m3gnet/test_m3gnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,17 @@
# direct: DIRECT cluster sampling, ms: manual sampling
model_type: Literal["orig", "direct", "manual-sampling"] = "orig"
# set large job array size for smaller data splits and faster testing/debugging
slurm_array_task_count = 100
slurm_array_task_count = 50
job_name = f"m3gnet-{model_type}-wbm-{task_type}"
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")

slurm_vars = slurm_submit(
job_name=job_name,
out_dir=out_dir,
partition="icelake-himem",
account="LEE-SL3-CPU",
time="3:0:0",
account="matgen",
time="11:55:0",
array=f"1-{slurm_array_task_count}",
slurm_flags=("--mem", "12G"),
slurm_flags="--qos shared --constraint cpu --mem 16G",
# TF_CPP_MIN_LOG_LEVEL=2 means INFO and WARNING logs are not printed
# https://stackoverflow.com/a/40982782
pre_cmd="TF_CPP_MIN_LOG_LEVEL=2",
Expand Down Expand Up @@ -88,7 +87,13 @@
task_type=task_type,
df=dict(shape=str(df_in.shape), columns=", ".join(df_in)),
slurm_vars=slurm_vars,
trainable_params=sum(param.numel() for param in m3gnet.parameters()),
trainable_params=sum(
[np.prod(weight.shape) for weight in m3gnet.potential.model.trainable_weights]
),
checkpoint=checkpoint,
model_type=model_type,
out_path=out_path,
job_name=job_name,
)

run_name = f"{job_name}-{slurm_array_task_id}"
Expand All @@ -103,7 +108,7 @@

structures = df_in[input_col].map(Structure.from_dict).to_dict()

for material_id in tqdm(structures, desc="Relaxing", disable=None):
for material_id in tqdm(structures, desc="Relaxing"):
if material_id in relax_results:
continue
try:
Expand Down
6 changes: 2 additions & 4 deletions models/mace/join_mace_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
module_dir = os.path.dirname(__file__)
task_type = "IS2RE"
e_form_mace_col = "e_form_per_atom_mace"

date = "2023-12-11"
glob_pattern = f"{date}-mace-wbm-{task_type}*/*.json.gz"
file_paths = sorted(glob(f"{module_dir}/{glob_pattern}"))
Expand Down Expand Up @@ -92,16 +91,15 @@


# %%
bad_mask = (df_wbm[e_form_mace_col] - df_wbm[e_form_col]) < -3
df_wbm[bad_mask].to_csv(f"{module_dir}/mace-underpredictions<-3.csv")
bad_mask = (df_wbm[e_form_mace_col] - df_wbm[e_form_col]) < -5
print(f"{sum(bad_mask)=}")
ax = density_scatter(df=df_wbm[~bad_mask], x=e_form_col, y=e_form_mace_col)


# %%
out_path = file_paths[0].rsplit("/", 1)[0]
df_mace = df_mace.round(4)
df_mace[~bad_mask].select_dtypes("number").to_csv(f"{out_path}.csv.gz")
df_mace.select_dtypes("number").to_csv(f"{out_path}.csv.gz")
df_mace.reset_index().to_json(f"{out_path}.json.gz", default_handler=as_dict_handler)

df_bad = df_mace[bad_mask].drop(columns=[entry_col, struct_col])
Expand Down
17 changes: 10 additions & 7 deletions models/mace/test_mace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

import os
from importlib.metadata import version
from typing import Any
from typing import Any, Literal

import numpy as np
import pandas as pd
import torch
import wandb
from ase.filters import FrechetCellFilter
from ase.filters import ExpCellFilter, FrechetCellFilter
from ase.optimize import FIRE, LBFGS
from mace.calculators import mace_mp
from mace.tools import count_parameters
Expand All @@ -31,7 +31,7 @@
task_type = "IS2RE" # "RS2RE"
module_dir = os.path.dirname(__file__)
# set large job array size for smaller data splits and faster testing/debugging
slurm_array_task_count = 20
slurm_array_task_count = 50
ase_optimizer = "FIRE"
job_name = f"mace-wbm-{task_type}-{ase_optimizer}"
out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}")
Expand All @@ -42,15 +42,16 @@
"2023-10-29-mace-16M-pbenner-mptrj-no-conditional-loss",
"https://tinyurl.com/y7uhwpje",
][-1]
ase_filter: Literal["frechet", "exp"] = "frechet"

slurm_vars = slurm_submit(
job_name=job_name,
out_dir=out_dir,
account="matgen",
time="9:55:0",
time="11:55:0",
array=f"1-{slurm_array_task_count}",
slurm_flags="--qos shared --constraint gpu --gpus 1",
# slurm_flags="--qos shared --constraint cpu --mem 16G",
# slurm_flags="--qos shared --constraint gpu --gpus 1",
slurm_flags="--qos shared --constraint cpu --mem 32G",
)


Expand Down Expand Up @@ -98,6 +99,7 @@
trainable_params=count_parameters(mace_calc.models[0]),
model_name=model_name,
dtype=dtype,
ase_filter=ase_filter,
)

run_name = f"{job_name}-{slurm_array_task_id}"
Expand All @@ -112,6 +114,7 @@
df_in[input_col] = [x["structure"] for x in df_in.computed_structure_entry]

structs = df_in[input_col].map(Structure.from_dict).to_dict()
filter_cls = {"frechet": FrechetCellFilter, "exp": ExpCellFilter}[ase_filter]

for material_id in tqdm(structs, desc="Relaxing"):
if material_id in relax_results:
Expand All @@ -121,7 +124,7 @@
atoms = structs[material_id].to_ase_atoms()
atoms.calc = mace_calc
if max_steps > 0:
atoms = FrechetCellFilter(atoms)
atoms = filter_cls(atoms)
optim_cls = {"FIRE": FIRE, "LBFGS": LBFGS}[ase_optimizer]
optimizer = optim_cls(atoms, logfile="/dev/null")

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ ignore = [
"FIX002",
"INP001",
"N806", # non-lowercase-variable-in-function
"NPY002",
"PD901", # pandas-df-variable-name
"PERF203", # try-except-in-loop
"PLC0414", # useless-import-alias
Expand All @@ -119,6 +118,7 @@ ignore = [
]
pydocstyle.convention = "google"
isort.known-third-party = ["wandb"]
isort.split-on-trailing-comma = false

[tool.ruff.per-file-ignores]
"tests/*" = ["D", "S101"]
Expand Down
2 changes: 1 addition & 1 deletion scripts/model_figs/make_hull_dist_box_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# different fill colors for each box
# patch_artist=True,
# notch=True,
# bootstrap=10000,
# bootstrap=10_000,
showmeans=True,
# meanline=True,
)
Expand Down
2 changes: 1 addition & 1 deletion scripts/model_figs/model_run_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@
).update_traces(
textinfo="percent+label",
textfont_size=14,
marker=dict(line=dict(color="#000000", width=2)),
marker=dict(line=dict(color="black", width=2)),
hoverinfo="label+percent+name",
texttemplate="%{label}<br>%{percent:.1%}",
hovertemplate="%{label} %{percent:.1%} (%{value:.1f} h)",
Expand Down
32 changes: 16 additions & 16 deletions site/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,38 +17,38 @@
"changelog": "npx auto-changelog --output ../changelog.md --hide-credit --commit-limit false --latest-version x.y.z"
},
"devDependencies": {
"@iconify/svelte": "^3.1.4",
"@iconify/svelte": "^3.1.6",
"@rollup/plugin-yaml": "^4.1.2",
"@sveltejs/adapter-static": "^2.0.3",
"@sveltejs/kit": "^1.27.4",
"@sveltejs/vite-plugin-svelte": "^2.5.1",
"@typescript-eslint/eslint-plugin": "^6.10.0",
"@typescript-eslint/parser": "^6.10.0",
"@sveltejs/adapter-static": "^3.0.1",
"@sveltejs/kit": "^2.0.6",
"@sveltejs/vite-plugin-svelte": "^3.0.1",
"@typescript-eslint/eslint-plugin": "^6.16.0",
"@typescript-eslint/parser": "^6.16.0",
"d3-scale-chromatic": "^3.0.0",
"elementari": "^0.2.2",
"eslint": "^8.53.0",
"eslint-plugin-svelte": "^2.35.0",
"eslint": "^8.56.0",
"eslint-plugin-svelte": "^2.35.1",
"hastscript": "^8.0.0",
"highlight.js": "^11.9.0",
"js-yaml": "^4.1.0",
"katex": "^0.16.9",
"mdsvex": "^0.11.0",
"prettier": "^3.0.3",
"prettier-plugin-svelte": "^3.0.3",
"prettier": "^3.1.1",
"prettier-plugin-svelte": "^3.1.2",
"rehype-autolink-headings": "^7.1.0",
"rehype-katex-svelte": "^1.2.0",
"rehype-slug": "^6.0.0",
"remark-math": "3.0.0",
"svelte": "^4.2.2",
"svelte-check": "^3.5.2",
"svelte": "^4.2.8",
"svelte-check": "^3.6.2",
"svelte-multiselect": "^10.2.0",
"svelte-preprocess": "^5.0.4",
"svelte-preprocess": "^5.1.3",
"svelte-toc": "^0.5.6",
"svelte-zoo": "^0.4.9",
"svelte2tsx": "^0.6.23",
"svelte2tsx": "^0.6.27",
"tslib": "^2.6.2",
"typescript": "5.2.2",
"vite": "^4.5.0"
"typescript": "5.3.3",
"vite": "^5.0.10"
},
"prettier": {
"semi": false,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ def test_stable_metrics() -> None:

# test stable_metrics gives the same result as sklearn.metrics.classification_report
# for random numpy data
np.random.seed(0)
y_true, y_pred = np.random.randn(100, 2).T
rng = np.random.default_rng(0)
y_true, y_pred = rng.normal(size=(2, 100))
metrics = stable_metrics(y_true, y_pred)

from sklearn.metrics import classification_report
Expand Down
6 changes: 0 additions & 6 deletions tests/test_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,19 @@

from typing import TYPE_CHECKING

import numpy as np

from matbench_discovery.structure import perturb_structure

if TYPE_CHECKING:
from pymatgen.core import Structure


def test_perturb_structure(dummy_struct: Structure) -> None:
np.random.seed(0)
perturbed = perturb_structure(dummy_struct)
assert len(perturbed) == len(dummy_struct)

for site, new in zip(dummy_struct, perturbed):
assert site.specie == new.specie
assert tuple(site.coords) != tuple(new.coords)

# test that the perturbation is reproducible
np.random.seed(0)
assert perturbed == perturb_structure(dummy_struct)
# but different on subsequent calls
assert perturb_structure(dummy_struct) != perturb_structure(dummy_struct)

0 comments on commit e203f8f

Please sign in to comment.