Skip to content

Commit

Permalink
replace figshare wbm-steps-summary.csv imports with data/wbm/2022-10-…
Browse files Browse the repository at this point in the history
…19-wbm-summary.csv
  • Loading branch information
janosh committed Jun 20, 2023
1 parent 5d9a3b2 commit 73655f6
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 15 deletions.
4 changes: 1 addition & 3 deletions data/wbm/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ Source: [Predicting stable crystalline compounds using chemical similarity](http
Load with

```py
df_wbm_summary = pd.read_csv( # download wbm-steps-summary.csv (23.31 MB)
"https://figshare.com/files/37570234?private_link=ff0ad14505f9624f0c05"
).set_index("material_id")
df_wbm_summary = pd.read_csv("data/wbm/2022-10-19-wbm-summary.csv").set_index("material_id")
```

## Comprehensive Link Collection for WBM dataset
Expand Down
9 changes: 4 additions & 5 deletions models/bowsr/join_bowsr_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pymatgen.core import Structure
from tqdm import tqdm

from matbench_discovery import ROOT, as_dict_handler
from matbench_discovery import ROOT

__author__ = "Janosh Riebesell"
__date__ = "2022-09-22"
Expand Down Expand Up @@ -48,9 +48,8 @@


# %% compare against WBM formation energy targets to make sure we got sensible results
df_wbm = pd.read_csv( # download wbm-steps-summary.csv (23.31 MB)
"https://figshare.com/files/37570234?private_link=ff0ad14505f9624f0c05"
).set_index("material_id")
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-summary.csv"
df_wbm = pd.read_csv(data_path).set_index("material_id")

df_bowsr["e_form_wbm"] = df_wbm.e_form_per_atom

Expand All @@ -71,7 +70,7 @@

# %%
out_path = f"{ROOT}/models/bowsr/{today}-bowsr-megnet-wbm-{task_type}.json.gz"
df_bowsr.reset_index().to_json(out_path, default_handler=as_dict_handler)
df_bowsr.reset_index().to_json(out_path, default_handler=lambda x: x.as_dict())

# out_path = f"{ROOT}/models/bowsr/2022-08-16-bowsr-megnet-wbm-IS2RE.json.gz"
# df_bowsr = pd.read_json(out_path).set_index("material_id")
8 changes: 3 additions & 5 deletions models/cgcnn/use_cgcnn_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,10 @@
df = df.dropna() # two missing initial structures
assert len(df) == old_len - 2

assert all(
df.index == df_wbm.drop(index=no_init_structs).index
), "df and df_wbm must have same index"
df["e_form_per_atom_mp2020_corrected"] = df_wbm.e_form_per_atom_mp2020_corrected
assert all(df.index == df_wbm.drop(index=no_init_structs).index)

target_col = "e_form_per_atom_mp2020_corrected"
df[target_col] = df_wbm[target_col]
input_col = "initial_structure"
assert target_col in df, f"{target_col=} not in {list(df)}"
assert input_col in df, f"{input_col=} not in {list(df)}"
Expand Down Expand Up @@ -84,4 +82,4 @@
data_loader=data_loader,
)

df.round(6).to_csv(f"{module_dir}/{today}-{run_name}-preds.csv")
df.round(6).to_csv(f"{module_dir}/{today}-{run_name}-preds.csv", index=False)
4 changes: 2 additions & 2 deletions models/wrenformer/mp/use_wrenformer_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from aviary.wrenformer.data import df_to_in_mem_dataloader
from aviary.wrenformer.model import Wrenformer

from matbench_discovery import ROOT
from matbench_discovery.slurm import slurm_submit_python

__author__ = "Janosh Riebesell"
Expand Down Expand Up @@ -37,8 +38,7 @@


# %%
# download wbm-steps-summary.csv (23.31 MB)
data_path = "https://figshare.com/files/37570234?private_link=ff0ad14505f9624f0c05"
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-summary.csv"
df = pd.read_csv(data_path).set_index("material_id")

target_col = "e_form_per_atom"
Expand Down

0 comments on commit 73655f6

Please sign in to comment.