diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8483c6d4..62354572 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ default_install_hook_types: [pre-commit, commit-msg] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.7 + rev: v0.1.9 hooks: - id: ruff args: [--fix] @@ -30,7 +30,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.7.1 + rev: v1.8.0 hooks: - id: mypy additional_dependencies: [types-pyyaml, types-requests] @@ -45,7 +45,7 @@ repos: args: [--ignore-words-list, "nd,te,fpr", --check-filenames] - repo: https://github.com/pre-commit/mirrors-prettier - rev: v4.0.0-alpha.3 + rev: v4.0.0-alpha.8 hooks: - id: prettier args: [--write] # edit files in-place @@ -56,7 +56,7 @@ repos: exclude: ^(site/src/figs/.+\.svelte|data/wbm/20.+\..+|site/src/routes/.+\.(yaml|json)|changelog.md)$ - repo: https://github.com/pre-commit/mirrors-eslint - rev: v8.55.0 + rev: v8.56.0 hooks: - id: eslint types: [file] diff --git a/matbench_discovery/structure.py b/matbench_discovery/structure.py index aa443176..cc7f32d2 100644 --- a/matbench_discovery/structure.py +++ b/matbench_discovery/structure.py @@ -10,7 +10,7 @@ __author__ = "Janosh Riebesell" __date__ = "2022-12-02" -np.random.seed(0) # ensure reproducible structure perturbations +rng = np.random.default_rng(0) # ensure reproducible structure perturbations def perturb_structure(struct: Structure, gamma: float = 1.5) -> Structure: @@ -29,8 +29,8 @@ def perturb_structure(struct: Structure, gamma: float = 1.5) -> Structure: """ perturbed = struct.copy() for site in perturbed: - magnitude = np.random.weibull(gamma) - vec = np.random.randn(3) # TODO maybe make func recursive to deal with 0-vector + magnitude = rng.weibull(gamma) + vec = rng.normal(3) # TODO maybe make func recursive to deal with 0-vector vec /= np.linalg.norm(vec) # unit vector site.coords += vec * magnitude site.to_unit_cell(in_place=True) @@ -42,7 +42,7 @@ def perturb_structure(struct: Structure, gamma: float = 1.5) -> Structure: import matplotlib.pyplot as plt gamma = 1.5 - samples = np.array([np.random.weibull(gamma) for _ in range(10000)]) + samples = np.array([rng.weibull(gamma) for _ in range(10_000)]) mean = samples.mean() # reproduces the dist in https://www.nature.com/articles/s41524-022-00891-8#Fig5 diff --git a/models/cgcnn/plot_structure_perturbation.py b/models/cgcnn/plot_structure_perturbation.py index fe1921e4..98deba13 100644 --- a/models/cgcnn/plot_structure_perturbation.py +++ b/models/cgcnn/plot_structure_perturbation.py @@ -10,9 +10,11 @@ __author__ = "Janosh Riebesell" __date__ = "2022-12-02" +rng = np.random.default_rng(0) + # %% -ax = pd.Series(np.random.weibull(1.5, 100000)).hist(bins=100) +ax = pd.Series(rng.weibull(1.5, 100_000)).hist(bins=100) title = "Distribution of perturbation magnitudes" ax.set(xlabel="magnitude of perturbation", ylabel="count", title=title) diff --git a/models/chgnet/join_chgnet_results.py b/models/chgnet/join_chgnet_results.py index 334cd23a..25ba8238 100644 --- a/models/chgnet/join_chgnet_results.py +++ b/models/chgnet/join_chgnet_results.py @@ -30,7 +30,7 @@ # %% module_dir = os.path.dirname(__file__) task_type = "IS2RE" -date = "2023-10-23" +date = "2023-12-21" glob_pattern = f"{date}-chgnet-*-wbm-{task_type}*/*.json.gz" file_paths = sorted(glob(f"{module_dir}/{glob_pattern}")) print(f"Found {len(file_paths):,} files for {glob_pattern = }") diff --git a/models/m3gnet/test_m3gnet.py b/models/m3gnet/test_m3gnet.py index 8e3dfa97..fd54117f 100644 --- a/models/m3gnet/test_m3gnet.py +++ b/models/m3gnet/test_m3gnet.py @@ -32,18 +32,17 @@ # direct: DIRECT cluster sampling, ms: manual sampling model_type: Literal["orig", "direct", "manual-sampling"] = "orig" # set large job array size for smaller data splits and faster testing/debugging -slurm_array_task_count = 100 +slurm_array_task_count = 50 job_name = f"m3gnet-{model_type}-wbm-{task_type}" out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}") slurm_vars = slurm_submit( job_name=job_name, out_dir=out_dir, - partition="icelake-himem", - account="LEE-SL3-CPU", - time="3:0:0", + account="matgen", + time="11:55:0", array=f"1-{slurm_array_task_count}", - slurm_flags=("--mem", "12G"), + slurm_flags="--qos shared --constraint cpu --mem 16G", # TF_CPP_MIN_LOG_LEVEL=2 means INFO and WARNING logs are not printed # https://stackoverflow.com/a/40982782 pre_cmd="TF_CPP_MIN_LOG_LEVEL=2", @@ -88,7 +87,13 @@ task_type=task_type, df=dict(shape=str(df_in.shape), columns=", ".join(df_in)), slurm_vars=slurm_vars, - trainable_params=sum(param.numel() for param in m3gnet.parameters()), + trainable_params=sum( + [np.prod(weight.shape) for weight in m3gnet.potential.model.trainable_weights] + ), + checkpoint=checkpoint, + model_type=model_type, + out_path=out_path, + job_name=job_name, ) run_name = f"{job_name}-{slurm_array_task_id}" @@ -103,7 +108,7 @@ structures = df_in[input_col].map(Structure.from_dict).to_dict() -for material_id in tqdm(structures, desc="Relaxing", disable=None): +for material_id in tqdm(structures, desc="Relaxing"): if material_id in relax_results: continue try: diff --git a/models/mace/join_mace_results.py b/models/mace/join_mace_results.py index ed59eae5..2498068a 100644 --- a/models/mace/join_mace_results.py +++ b/models/mace/join_mace_results.py @@ -29,7 +29,6 @@ module_dir = os.path.dirname(__file__) task_type = "IS2RE" e_form_mace_col = "e_form_per_atom_mace" - date = "2023-12-11" glob_pattern = f"{date}-mace-wbm-{task_type}*/*.json.gz" file_paths = sorted(glob(f"{module_dir}/{glob_pattern}")) @@ -92,8 +91,7 @@ # %% -bad_mask = (df_wbm[e_form_mace_col] - df_wbm[e_form_col]) < -3 -df_wbm[bad_mask].to_csv(f"{module_dir}/mace-underpredictions<-3.csv") +bad_mask = (df_wbm[e_form_mace_col] - df_wbm[e_form_col]) < -5 print(f"{sum(bad_mask)=}") ax = density_scatter(df=df_wbm[~bad_mask], x=e_form_col, y=e_form_mace_col) @@ -101,7 +99,7 @@ # %% out_path = file_paths[0].rsplit("/", 1)[0] df_mace = df_mace.round(4) -df_mace[~bad_mask].select_dtypes("number").to_csv(f"{out_path}.csv.gz") +df_mace.select_dtypes("number").to_csv(f"{out_path}.csv.gz") df_mace.reset_index().to_json(f"{out_path}.json.gz", default_handler=as_dict_handler) df_bad = df_mace[bad_mask].drop(columns=[entry_col, struct_col]) diff --git a/models/mace/test_mace.py b/models/mace/test_mace.py index e8776f47..a9e3b849 100644 --- a/models/mace/test_mace.py +++ b/models/mace/test_mace.py @@ -3,13 +3,13 @@ import os from importlib.metadata import version -from typing import Any +from typing import Any, Literal import numpy as np import pandas as pd import torch import wandb -from ase.filters import FrechetCellFilter +from ase.filters import ExpCellFilter, FrechetCellFilter from ase.optimize import FIRE, LBFGS from mace.calculators import mace_mp from mace.tools import count_parameters @@ -31,7 +31,7 @@ task_type = "IS2RE" # "RS2RE" module_dir = os.path.dirname(__file__) # set large job array size for smaller data splits and faster testing/debugging -slurm_array_task_count = 20 +slurm_array_task_count = 50 ase_optimizer = "FIRE" job_name = f"mace-wbm-{task_type}-{ase_optimizer}" out_dir = os.getenv("SBATCH_OUTPUT", f"{module_dir}/{today}-{job_name}") @@ -42,15 +42,16 @@ "2023-10-29-mace-16M-pbenner-mptrj-no-conditional-loss", "https://tinyurl.com/y7uhwpje", ][-1] +ase_filter: Literal["frechet", "exp"] = "frechet" slurm_vars = slurm_submit( job_name=job_name, out_dir=out_dir, account="matgen", - time="9:55:0", + time="11:55:0", array=f"1-{slurm_array_task_count}", - slurm_flags="--qos shared --constraint gpu --gpus 1", - # slurm_flags="--qos shared --constraint cpu --mem 16G", + # slurm_flags="--qos shared --constraint gpu --gpus 1", + slurm_flags="--qos shared --constraint cpu --mem 32G", ) @@ -98,6 +99,7 @@ trainable_params=count_parameters(mace_calc.models[0]), model_name=model_name, dtype=dtype, + ase_filter=ase_filter, ) run_name = f"{job_name}-{slurm_array_task_id}" @@ -112,6 +114,7 @@ df_in[input_col] = [x["structure"] for x in df_in.computed_structure_entry] structs = df_in[input_col].map(Structure.from_dict).to_dict() +filter_cls = {"frechet": FrechetCellFilter, "exp": ExpCellFilter}[ase_filter] for material_id in tqdm(structs, desc="Relaxing"): if material_id in relax_results: @@ -121,7 +124,7 @@ atoms = structs[material_id].to_ase_atoms() atoms.calc = mace_calc if max_steps > 0: - atoms = FrechetCellFilter(atoms) + atoms = filter_cls(atoms) optim_cls = {"FIRE": FIRE, "LBFGS": LBFGS}[ase_optimizer] optimizer = optim_cls(atoms, logfile="/dev/null") diff --git a/pyproject.toml b/pyproject.toml index 2f85af72..d9d5621a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,7 +98,6 @@ ignore = [ "FIX002", "INP001", "N806", # non-lowercase-variable-in-function - "NPY002", "PD901", # pandas-df-variable-name "PERF203", # try-except-in-loop "PLC0414", # useless-import-alias @@ -119,6 +118,7 @@ ignore = [ ] pydocstyle.convention = "google" isort.known-third-party = ["wandb"] +isort.split-on-trailing-comma = false [tool.ruff.per-file-ignores] "tests/*" = ["D", "S101"] diff --git a/scripts/model_figs/make_hull_dist_box_plot.py b/scripts/model_figs/make_hull_dist_box_plot.py index bde0870a..ca4475c5 100644 --- a/scripts/model_figs/make_hull_dist_box_plot.py +++ b/scripts/model_figs/make_hull_dist_box_plot.py @@ -20,7 +20,7 @@ # different fill colors for each box # patch_artist=True, # notch=True, - # bootstrap=10000, + # bootstrap=10_000, showmeans=True, # meanline=True, ) diff --git a/scripts/model_figs/model_run_times.py b/scripts/model_figs/model_run_times.py index 29e431ad..9f7a6acf 100644 --- a/scripts/model_figs/model_run_times.py +++ b/scripts/model_figs/model_run_times.py @@ -155,7 +155,7 @@ ).update_traces( textinfo="percent+label", textfont_size=14, - marker=dict(line=dict(color="#000000", width=2)), + marker=dict(line=dict(color="black", width=2)), hoverinfo="label+percent+name", texttemplate="%{label}
%{percent:.1%}", hovertemplate="%{label} %{percent:.1%} (%{value:.1f} h)", diff --git a/site/package.json b/site/package.json index 30774628..24db81f2 100644 --- a/site/package.json +++ b/site/package.json @@ -17,38 +17,38 @@ "changelog": "npx auto-changelog --output ../changelog.md --hide-credit --commit-limit false --latest-version x.y.z" }, "devDependencies": { - "@iconify/svelte": "^3.1.4", + "@iconify/svelte": "^3.1.6", "@rollup/plugin-yaml": "^4.1.2", - "@sveltejs/adapter-static": "^2.0.3", - "@sveltejs/kit": "^1.27.4", - "@sveltejs/vite-plugin-svelte": "^2.5.1", - "@typescript-eslint/eslint-plugin": "^6.10.0", - "@typescript-eslint/parser": "^6.10.0", + "@sveltejs/adapter-static": "^3.0.1", + "@sveltejs/kit": "^2.0.6", + "@sveltejs/vite-plugin-svelte": "^3.0.1", + "@typescript-eslint/eslint-plugin": "^6.16.0", + "@typescript-eslint/parser": "^6.16.0", "d3-scale-chromatic": "^3.0.0", "elementari": "^0.2.2", - "eslint": "^8.53.0", - "eslint-plugin-svelte": "^2.35.0", + "eslint": "^8.56.0", + "eslint-plugin-svelte": "^2.35.1", "hastscript": "^8.0.0", "highlight.js": "^11.9.0", "js-yaml": "^4.1.0", "katex": "^0.16.9", "mdsvex": "^0.11.0", - "prettier": "^3.0.3", - "prettier-plugin-svelte": "^3.0.3", + "prettier": "^3.1.1", + "prettier-plugin-svelte": "^3.1.2", "rehype-autolink-headings": "^7.1.0", "rehype-katex-svelte": "^1.2.0", "rehype-slug": "^6.0.0", "remark-math": "3.0.0", - "svelte": "^4.2.2", - "svelte-check": "^3.5.2", + "svelte": "^4.2.8", + "svelte-check": "^3.6.2", "svelte-multiselect": "^10.2.0", - "svelte-preprocess": "^5.0.4", + "svelte-preprocess": "^5.1.3", "svelte-toc": "^0.5.6", "svelte-zoo": "^0.4.9", - "svelte2tsx": "^0.6.23", + "svelte2tsx": "^0.6.27", "tslib": "^2.6.2", - "typescript": "5.2.2", - "vite": "^4.5.0" + "typescript": "5.3.3", + "vite": "^5.0.10" }, "prettier": { "semi": false, diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 91c88647..c9faf097 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -55,8 +55,8 @@ def test_stable_metrics() -> None: # test stable_metrics gives the same result as sklearn.metrics.classification_report # for random numpy data - np.random.seed(0) - y_true, y_pred = np.random.randn(100, 2).T + rng = np.random.default_rng(0) + y_true, y_pred = rng.normal(size=(2, 100)) metrics = stable_metrics(y_true, y_pred) from sklearn.metrics import classification_report diff --git a/tests/test_structure.py b/tests/test_structure.py index 0fd632de..fcd39137 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -import numpy as np - from matbench_discovery.structure import perturb_structure if TYPE_CHECKING: @@ -11,7 +9,6 @@ def test_perturb_structure(dummy_struct: Structure) -> None: - np.random.seed(0) perturbed = perturb_structure(dummy_struct) assert len(perturbed) == len(dummy_struct) @@ -19,8 +16,5 @@ def test_perturb_structure(dummy_struct: Structure) -> None: assert site.specie == new.specie assert tuple(site.coords) != tuple(new.coords) - # test that the perturbation is reproducible - np.random.seed(0) - assert perturbed == perturb_structure(dummy_struct) # but different on subsequent calls assert perturb_structure(dummy_struct) != perturb_structure(dummy_struct)