diff --git a/.gitignore b/.gitignore index 95060f27..432bfc1c 100644 --- a/.gitignore +++ b/.gitignore @@ -29,5 +29,4 @@ models/voronoi/*.zip site/static/figures # generated docs -site/src/routes/api/.* site/src/routes/api/*.md diff --git a/matbench_discovery/__init__.py b/matbench_discovery/__init__.py index acd539d7..64b500d7 100644 --- a/matbench_discovery/__init__.py +++ b/matbench_discovery/__init__.py @@ -4,9 +4,12 @@ import sys from datetime import datetime -ROOT = os.path.dirname(os.path.dirname(__file__)) +ROOT = os.path.dirname(os.path.dirname(__file__)) # repository root +# whether a currently running slurm job is in debug mode DEBUG = "slurm-submit" not in sys.argv and "SLURM_JOB_ID" not in os.environ +# directory to store model checkpoints downloaded from wandb cloud storage CHECKPOINT_DIR = f"{ROOT}/wandb/checkpoints" +# wandb / to record new runs to WANDB_PATH = "materialsproject/matbench-discovery" timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}" diff --git a/matbench_discovery/data.py b/matbench_discovery/data.py index 54da5648..709f8e55 100644 --- a/matbench_discovery/data.py +++ b/matbench_discovery/data.py @@ -2,7 +2,7 @@ import os import urllib.error -from collections.abc import Generator, Sequence +from collections.abc import Sequence from glob import glob from pathlib import Path from typing import Any, Callable @@ -17,7 +17,9 @@ df_wbm = pd.read_csv(f"{ROOT}/data/wbm/2022-10-19-wbm-summary.csv") df_wbm.index = df_wbm.material_id +# repo URL to raw files on GitHub RAW_REPO_URL = "https://raw.githubusercontent.com/janosh/matbench-discovery" +# directory to cache downloaded data files default_cache_dir = os.path.expanduser("~/.cache/matbench-discovery") DATA_FILENAMES = { @@ -31,10 +33,6 @@ } -def chunks(xs: Sequence[Any], n: int) -> Generator[Sequence[Any], None, None]: - return (xs[i : i + n] for i in range(0, len(xs), n)) - - def as_dict_handler(obj: Any) -> dict[str, Any] | None: """Pass this to json.dump(default=) or as pandas.to_json(default_handler=) to convert Python classes with a as_dict() method to dictionaries on serialization. @@ -50,7 +48,7 @@ def as_dict_handler(obj: Any) -> dict[str, Any] | None: def load_train_test( data_names: str | Sequence[str] = ("summary",), version: str = "1.0.0", - cache_dir: str | Path | None = default_cache_dir, + cache_dir: str | Path = default_cache_dir, hydrate: bool = False, **kwargs: Any, ) -> pd.DataFrame: diff --git a/site/src/app.css b/site/src/app.css index 699ecdc8..38341672 100644 --- a/site/src/app.css +++ b/site/src/app.css @@ -1,5 +1,5 @@ :root { - --night: #102030; + --night: #061e25; --blue: cornflowerblue; --text-color: rgb(208, 208, 208); diff --git a/site/src/routes/+layout.svelte b/site/src/routes/+layout.svelte index 75d32517..42a19099 100644 --- a/site/src/routes/+layout.svelte +++ b/site/src/routes/+layout.svelte @@ -19,13 +19,13 @@ const routes = Object.keys(import.meta.glob(`./*/+page.{svx,svelte,md}`)).map( (filename) => `/` + filename.split(`/`)[1] ) + + $: headingSelector = `main > :is(${ + $page.url.pathname === `/api` ? `h1, ` : `` + }h2, h3, h4):not(.toc-exclude)` - + {#if $page.url.pathname !== `/`} « home diff --git a/site/src/routes/api/+page.svelte b/site/src/routes/api/+page.svelte index c2ff1efd..c29e7f06 100644 --- a/site/src/routes/api/+page.svelte +++ b/site/src/routes/api/+page.svelte @@ -1 +1,28 @@ - + + +{#each Object.values(import.meta.glob(`./*.md`, { eager: true })) as file} + +{/each} + + diff --git a/tests/test_data.py b/tests/test_data.py index fa55cff1..5be7f103 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -3,6 +3,7 @@ import os import urllib.request from pathlib import Path +from random import random from tempfile import TemporaryDirectory from typing import Any from unittest.mock import patch @@ -18,7 +19,6 @@ PRED_FILENAMES, RAW_REPO_URL, as_dict_handler, - chunks, df_wbm, glob_to_df, load_df_wbm_with_preds, @@ -38,29 +38,34 @@ @pytest.mark.parametrize( - "data_names, cache_dir, hydrate", + "data_names, hydrate", [ - (["wbm-summary"], None, True), - (["wbm-initial-structures"], TemporaryDirectory().name, True), - (["wbm-computed-structure-entries"], None, False), - (["wbm-summary", "wbm-initial-structures"], TemporaryDirectory().name, True), - (["mp-elemental-ref-energies"], None, True), - (["mp-energies"], None, True), + (["wbm-summary"], True), + (["wbm-initial-structures"], True), + (["wbm-computed-structure-entries"], False), + (["wbm-summary", "wbm-initial-structures"], True), + (["mp-elemental-ref-energies"], True), + (["mp-energies"], True), ], ) def test_load_train_test( data_names: list[str], - cache_dir: str | None, hydrate: bool, dummy_df_with_structures: pd.DataFrame, capsys: CaptureFixture[str], + tmp_path: Path, ) -> None: # intercept HTTP requests to GitHub raw user content and return dummy df instead with patch("matbench_discovery.data.pd.read_csv") as read_csv, patch( "matbench_discovery.data.pd.read_json" ) as read_json: read_csv.return_value = read_json.return_value = dummy_df_with_structures - out = load_train_test(data_names, cache_dir=cache_dir, hydrate=hydrate) + out = load_train_test( + data_names, + hydrate=hydrate, + # test both str and Path cache_dir + cache_dir=TemporaryDirectory().name if random() < 0.5 else tmp_path, + ) stdout, stderr = capsys.readouterr() @@ -152,17 +157,6 @@ def test_load_train_test_no_mock( ) -def test_chunks() -> None: - assert list(chunks([], 1)) == [] - assert list(chunks([1], 1)) == [[1]] - assert list(chunks([1, 2], 1)) == [[1], [2]] - assert list(chunks([1, 2, 3], 1)) == [[1], [2], [3]] - assert list(chunks([1, 2, 3], 2)) == [[1, 2], [3]] - assert list(chunks(range(1, 4), 2)) == [range(1, 3), range(3, 4)] - assert list(chunks(range(1, 5), 2)) == [range(1, 3), range(3, 5)] - assert list(chunks(range(1, 5), 3)) == [range(1, 4), range(4, 5)] - - def test_as_dict_handler() -> None: class C: def as_dict(self) -> dict[str, Any]: