Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

modify ff.py file and add pre-trained folder. #123

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
614 changes: 296 additions & 318 deletions alignn/ff/ff.py

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion alignn/ff/revised/__init__.py

This file was deleted.

Binary file removed alignn/ff/revised/best_model.pt
Binary file not shown.
Binary file added alignn/pre-trained/ocp2020_all/checkpoint_45.pt
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -7,58 +7,52 @@
"id_tag": "jid",
"random_seed": 123,
"classification_threshold": null,
"n_val": null,
"n_test": null,
"n_train": null,
"train_ratio": 0.9,
"val_ratio": 0.05,
"test_ratio": 0.05,
"n_val": 24943,
"n_test": 24943,
"n_train": 460328,
"train_ratio": 0.8,
"val_ratio": 0.1,
"test_ratio": 0.1,
"target_multiplication_factor": null,
"epochs": 100,
"batch_size": 16,
"epochs": 50,
"batch_size": 32,
"weight_decay": 1e-05,
"learning_rate": 0.001,
"filename": "sample",
"warmup_steps": 2000,
"criterion": "l1",
"criterion": "mse",
"optimizer": "adamw",
"scheduler": "onecycle",
"pin_memory": false,
"save_dataloader": false,
"write_checkpoint": true,
"write_predictions": true,
"store_outputs": false,
"store_outputs": true,
"progress": true,
"log_tensorboard": false,
"standard_scalar_and_pca": false,
"use_canonize": false,
"use_canonize": true,
"num_workers": 0,
"cutoff": 8.0,
"max_neighbors": 12,
"keep_data_order": false,
"keep_data_order": true,
"normalize_graph_level_loss": false,
"distributed": false,
"n_early_stopping": null,
"output_dir": "out_continue",
"output_dir": "tempall",
"model": {
"name": "alignn_atomwise",
"alignn_layers": 4,
"name": "alignn",
"alignn_layers": 2,
"gcn_layers": 4,
"atom_input_features": 92,
"edge_input_features": 80,
"triplet_input_features": 40,
"embedding_features": 64,
"hidden_features": 256,
"output_features": 1,
"grad_multiplier": -1,
"calculate_gradient": true,
"atomwise_output_features": 0,
"graphwise_weight": 1.0,
"gradwise_weight": 10.0,
"stresswise_weight": 0.0,
"atomwise_weight": 0.0,
"link": "identity",
"zero_inflated": false,
"classification": false
"classification": false,
"num_classes": 2
}
}
}
1 change: 1 addition & 0 deletions alignn/pre-trained/ocp2020_all/history_train.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"loss": [1.242798488008342, 1.093458558720021, 1.0134414646332985, 0.951067599713243, 0.8745518742396594, 0.898653789754953, 0.8362365446863051, 0.8224021468760861, 0.7795552550399721, 0.7730100799443865, 0.7515591056221759, 0.7388777779588113, 0.716031103362878, 0.7136948209940911, 0.7261467566041014, 0.6732144214893987, 0.6532027991397289, 0.63671875, 0.6265350734271811, 0.6092405826381647, 0.5846296462243656, 0.5580424012100278, 0.5445762933937262, 0.5313807514337852, 0.4952561872718978, 0.47810256316258254, 0.4577896897810219, 0.4254376710766423, 0.40602780402763294, 0.38824577875173794, 0.36579682340545705, 0.34168839068474105, 0.31409366038842546, 0.2938517973909454, 0.26776810832681613, 0.25274615349104973, 0.22926905807373132, 0.20771340113942474, 0.1936585215176399, 0.17776635003910324, 0.16531178378627912, 0.15266229538690476, 0.14320146854362184, 0.13564101806895204], "mae": [0.7805721427485227, 0.7108072916666667, 0.6873055021506778, 0.6571108821037539, 0.6313307183046576, 0.6484085799009385, 0.6195709642205423, 0.6060081845238096, 0.5954230888295099, 0.5953284535757734, 0.5896742347714633, 0.5823936337765033, 0.573410209202294, 0.5723786577815433, 0.5791733875347584, 0.555680525232447, 0.5490652562347932, 0.5480471804940041, 0.5465672657064651, 0.5360462219217066, 0.5294444286040146, 0.5187364564324818, 0.5157378970064304, 0.5108121456269552, 0.49474927088764337, 0.4873516996328641, 0.47867342962504345, 0.46357377748088285, 0.4568008396332986, 0.44969644079118876, 0.43762644057394856, 0.42328173200165103, 0.4096803242852798, 0.3972902681830031, 0.38103306531326037, 0.37213606159845325, 0.3561765117201077, 0.34038708805830725, 0.33032418344847064, 0.31716869514685436, 0.30663672146550225, 0.2952301184502085, 0.286680054092805, 0.2790509353547532]}
1 change: 1 addition & 0 deletions alignn/pre-trained/ocp2020_all/history_val.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"loss": [1.2597084290255938, 1.1104618018643695, 1.0429591128700657, 0.9956854647635189, 0.9269066003841062, 0.9477316546654364, 0.9062190514933007, 0.9093923311637516, 0.8623399452924422, 0.8719415823822609, 0.8572578087391688, 0.8553321848172738, 0.8403540477826139, 0.8580992163681402, 0.8637968756268052, 0.8405780522905969, 0.8313192776446165, 0.8366862188101734, 0.8379199281430519, 0.8410041231245988, 0.8346832627968549, 0.8368975305128771, 0.8294504579438784, 0.8397048342977776, 0.8445738570834002, 0.8382498627296614, 0.8487222889421534, 0.8316056492698973, 0.8399800801307766, 0.8383183411976493, 0.8575983206635109, 0.8469434941381178, 0.8415695797647224, 0.8477357759096197, 0.8376467194269496, 0.8512415757381258, 0.8520017337431803, 0.8489415924111441, 0.8490380420611361, 0.838059078897224, 0.8546613465530728, 0.8441100212361602, 0.8518101664042442, 0.848463183343028], "mae": [0.7875224396261232, 0.7159629106827664, 0.6968473892309852, 0.6713935352565389, 0.6488075501193437, 0.6651109100459323, 0.6430156350288831, 0.6328940630265164, 0.6233489167552552, 0.6253252335476172, 0.6230350832271944, 0.6184036587873074, 0.6099239246530006, 0.6156974273407413, 0.6214085629111842, 0.6077242301578546, 0.6040773991595796, 0.6076087804752688, 0.608113397835767, 0.6042435025373074, 0.6057539855408577, 0.6031434202377848, 0.6023897653366697, 0.6057553958525553, 0.6024301159213535, 0.5975024476743019, 0.6010299506328225, 0.596541751180901, 0.5968718816441351, 0.5997773979686497, 0.6019575439766528, 0.5958905789298179, 0.5967279906756459, 0.5944703166995948, 0.5925527628946365, 0.5984745833625842, 0.5966328338113768, 0.5940972500802311, 0.5976680417728056, 0.5939875983457357, 0.599169945380195, 0.5955705165250923, 0.5989375965280006, 0.5971947254969312]}
1 change: 1 addition & 0 deletions alignn/pre-trained/ocp2020_all/ids_train_val_test.json

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions alignn/pre-trained/ocp2020_all/mad
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MAX val:9.999100859999942
MIN val:-9.992999119999922
MAD val:1.750781823274932
203 changes: 203 additions & 0 deletions alignn/pre-trained/ocp2020_all/run_100k_pre.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
# /wrk/knc6/oc/oc2/ocp/data/val_cgcnn_test/pred2.py
# conda activate ocp-models
from jarvis.core.atoms import Atoms
from jarvis.core.specie import atomic_numbers_to_symbols

# from ocpmodels.datasets import SinglePointLmdbDataset
import os, torch
from ase.io import read

# from ocpmodels.preprocessing import AtomsToGraphs
# from ocpmodels.models import CGCNN
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.distributed as dist
from jarvis.core.atoms import Atoms

# from ocpmodels.datasets import data_list_collater
from tqdm import tqdm
from jarvis.db.figshare import get_request_data
import json, zipfile
import numpy as np
import pandas as pd
from jarvis.db.jsonutils import loadjson, dumpjson
import os
from jarvis.core.atoms import Atoms
from jarvis.core.atoms import Atoms
from jarvis.core.specie import atomic_numbers_to_symbols
import os, torch
from ase.io import read
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.distributed as dist
from jarvis.core.atoms import Atoms
from alignn.models.alignn import ALIGNN, ALIGNNConfig
import os
import zipfile
from tqdm import tqdm
from alignn.models.alignn import ALIGNN, ALIGNNConfig
from alignn.data import get_torch_dataset
from torch.utils.data import DataLoader

dat = get_request_data(js_tag="ocp_all.json",url="https://figshare.com/ndownloader/files/40974599")
df = pd.DataFrame(dat)
path = "../../../../../benchmarks/AI/SinglePropertyPrediction/ocp_all_relaxed_energy.json.zip"
js_tag = "ocp_all_relaxed_energy.json"
id_data = json.loads(zipfile.ZipFile(path).read(js_tag))
train_ids = np.array(list(id_data["train"].keys()))
val_ids = np.array(list(id_data["val"].keys()))
test_ids = np.array(list(id_data["test"].keys()))
train_df = df[df["id"].isin(train_ids)]
val_df = df[df["id"].isin(val_ids)] # [:take_val]
test_df = df[df["id"].isin(test_ids)]

print(test_df)
# https://github.com/Open-Catalyst-Project/ocp/blob/main/configs/is2re/10k/base.yml

device = "cpu"
if torch.cuda.is_available():
device = torch.device("cuda")

model_path = "/wrk/knc6/Software/alignn_calc/jarvis_leaderboard/jarvis_leaderboard/contributions/alignn_model/OCP/all/tempall/checkpoint_45.pt"
config = "/wrk/knc6/Software/alignn_calc/jarvis_leaderboard/jarvis_leaderboard/contributions/alignn_model/OCP/all/tempall/config.json"
config_params = loadjson(config)

model = ALIGNN(ALIGNNConfig(name="alignn",alignn_layers=2))
# model = ALIGNN(ALIGNNConfig(name="alignn",output_features=1,config_params))
model.load_state_dict(torch.load(model_path, map_location=device)["model"])
model.to(device)
model.eval()
def get_multiple_predictions(
model="",
atoms_array=[],
ids_array=[],
cutoff=8,
neighbor_strategy="k-nearest",
max_neighbors=12,
use_canonize=True,
target="prop",
atom_features="cgcnn",
line_graph=True,
workers=0,
filename="pred_data.json",
include_atoms=True,
pin_memory=False,
output_features=1,
batch_size=1,
model_name="jv_formation_energy_peratom_alignn",
print_freq=100,
):
"""Use pretrained model on a number of structures."""
# import glob
# atoms_array=[]
# for i in glob.glob("alignn/examples/sample_data/*.vasp"):
# atoms=Atoms.from_poscar(i)
# atoms_array.append(atoms)
# get_multiple_predictions(atoms_array=atoms_array)

mem = []
for i, ii in enumerate(atoms_array):
info = {}
info["atoms"] = ii.to_dict()
info["prop"] = -9999 # place-holder only
info["jid"] = str(ids_array[i])
mem.append(info)

# Note cut-off is usually 8 for solids and 5 for molecules
def atoms_to_graph(atoms):
"""Convert structure dict to DGLGraph."""
structure = Atoms.from_dict(atoms)
return Graph.atom_dgl_multigraph(
structure,
cutoff=cutoff,
atom_features="atomic_number",
max_neighbors=max_neighbors,
compute_line_graph=True,
use_canonize=use_canonize,
)

test_data = get_torch_dataset(
dataset=mem,
target="prop",
neighbor_strategy=neighbor_strategy,
atom_features=atom_features,
use_canonize=use_canonize,
line_graph=line_graph,
)

collate_fn = test_data.collate_line_graph
test_loader = DataLoader(
test_data,
batch_size=batch_size,
shuffle=False,
collate_fn=collate_fn,
drop_last=False,
num_workers=workers,
pin_memory=pin_memory,
)

results = []
with torch.no_grad():
ids = test_loader.dataset.ids
for dat, id in zip(test_loader, ids):
g, lg, target = dat
out_data = model([g.to(device), lg.to(device)])
out_data = out_data.cpu().numpy().tolist()
target = target.cpu().numpy().flatten().tolist()
info = {}
info["id"] = id
info["pred"] = out_data
results.append(info)
print_freq = int(print_freq)
if len(results) % print_freq == 0:
print(len(results))
df1 = pd.DataFrame(mem)
df2 = pd.DataFrame(results)
df2["jid"] = df2["id"]
df3 = pd.merge(df1, df2, on="jid")
save = []
for i, ii in df3.iterrows():
info = {}
info["id"] = ii["id"]
info["atoms"] = ii["atoms"]
info["pred"] = ii["pred"]
save.append(info)

dumpjson(data=save, filename=filename)


# f=open('AI-SinglePropertyPrediction-relaxed_energy-ocp100k-test-mae.csv','w')
# f.write('id,target,prediction\n')
# f.write('id,target,scaled_target,prediction\n')
# print('id,actual,scaled,pred')
atoms_array = []
ids_array = []
for ii, i in tqdm(test_df.iterrows()):
fname = i["id"]
ids_array.append(fname)
atoms = (Atoms.from_dict(i["atoms"]))
atoms_array.append(atoms)

# actual=i['relaxed_energy']
# relaxed_energy = (actual-target_mean)/target_std
# scaled=relaxed_energy
# data = a2g.convert(atoms).to(device)
# batch = data_list_collater([data], otf_graph=False)
# out = model(batch)
# pred=(out[0].detach().cpu().numpy().flatten().tolist()[0])*target_std+target_mean
# line=str(fname)+','+str(actual)+','+str(pred) #+'\n'
# line=str(i.sid)+','+str(actual)+','+str(scaled)+','+str(pred) #+'\n'
# f.write(line+'\n')
# f.close()
#atoms_array=atoms_array[0:10]
#ids_array=ids_array[0:10]
get_multiple_predictions(
model=model, atoms_array=atoms_array, ids_array=ids_array
)
from jarvis.db.jsonutils import loadjson
d=loadjson('pred_data.json')
f=open('AI-SinglePropertyPrediction-relaxed_energy-ocp_all-test-mae.csv','w')
f.write('id,prediction\n')
for i in d:
line=i['id']+','+str(i['pred'])+'\n'
f.write(line)
f.close()

2 changes: 2 additions & 0 deletions alignn/pre-trained/ocp2020_all/test_data_data_range
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Max=9.999100859999942
Min=-9.975282650000054
2 changes: 2 additions & 0 deletions alignn/pre-trained/ocp2020_all/train_data_data_range
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Max=9.963571024999993
Min=-9.992999119999922
2 changes: 2 additions & 0 deletions alignn/pre-trained/ocp2020_all/val_data_data_range
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Max=9.999100859999942
Min=-9.975282650000054