diff --git a/Dockerfile b/Dockerfile index 5f5c68e..ac1779c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,12 +3,10 @@ MAINTAINER ersilia RUN pip install rdkit RUN pip install git+https://github.com/bp-kelley/descriptastorus -RUN pip install tqdm>=4.62.2 RUN pip install typed-argument-parser==1.6.1 RUN pip install scikit-learn RUN pip install torch RUN pip install pandas -RUN pip install tensorboardX==2.0 RUN pip install scipy>=1.4.1 RUN pip install hyperopt diff --git a/README.md b/README.md index 46656b8..290d7c0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,45 @@ -# Ersilia Model In Progress +# Prediction of hERG Channel Blockers with Directed Message Passing Neural Networks -This model is work in progress. Please edit the [metadata.json](metadata.json) file to complete the information about the model. This README file will be updated automatically based on the information contained in that folder. \ No newline at end of file +This model leverages the ChemProp network (D-MPNN, see original Stokes et al, Cell, 2020 for more information) to build a predictor of hERG-mediated cardiotoxicity. The model has been trained using a dataset published by Cai et al, J Chem Inf Model, 2019, which contains 7889 molecules with several cut-offs for hERG blocking activity. The authors select a 10 uM cut-off. This implementation of the model does not use any specific featurizer, though the authors suggest the moe206 descriptors (closed-source) improve performance even further. + +## Identifiers + +* EOS model ID: `eos30f3` +* Slug: `dmpnn-herg` + +## Characteristics + +* Input: `Compound` +* Input Shape: `Single` +* Task: `Classification` +* Output: `Score` +* Output Type: `Float` +* Output Shape: `Single` +* Interpretation: Probability of blocking hERG (cut-off: 10uM) + +## References + +* [Publication](https://pubs.rsc.org/en/content/articlehtml/2022/ra/d1ra07956e) +* [Source Code](https://github.com/AI-amateur/DMPNN-hERG) +* Ersilia contributor: [russelljeffrey](https://github.com/russelljeffrey) + +## Ersilia model URLs +* [GitHub](https://github.com/ersilia-os/eos30f3) +* [AWS S3](https://ersilia-models-zipped.s3.eu-central-1.amazonaws.com/eos30f3.zip) +* [DockerHub](https://hub.docker.com/r/ersiliaos/eos30f3) (AMD64) + +## Citation + +If you use this model, please cite the [original authors](https://pubs.rsc.org/en/content/articlehtml/2022/ra/d1ra07956e) of the model and the [Ersilia Model Hub](https://github.com/ersilia-os/ersilia/blob/master/CITATION.cff). + +## License + +This package is licensed under a GPL-3.0 license. The model contained within this package is licensed under a None license. + +Notice: Ersilia grants access to these models 'as is' provided by the original authors, please refer to the original code repository and/or publication if you use the model in your research. + +## About Us + +The [Ersilia Open Source Initiative](https://ersilia.io) is a Non Profit Organization ([1192266](https://register-of-charities.charitycommission.gov.uk/charity-search/-/charity-details/5170657/full-print)) with the mission is to equip labs, universities and clinics in LMIC with AI/ML tools for infectious disease research. + +[Help us](https://www.ersilia.io/donate) achieve our mission! \ No newline at end of file diff --git a/metadata.json b/metadata.json index dca0e13..4bd0e5f 100644 --- a/metadata.json +++ b/metadata.json @@ -3,14 +3,21 @@ "Slug": "dmpnn-herg", "Status": "In progress", "Title": "Prediction of hERG Channel Blockers with Directed Message Passing Neural Networks", - "Description": - "This model leverages the ChemProp network (D-MPNN, see original Stokes et al, Cell, 2020 for more information) to build a predictor of hERG-mediated cardiotoxicity. The model has been trained using a dataset published by Cai et al, J Chem Inf Model, 2019, which contains 7889 molecules with several cut-offs for hERG blocking activity. The authors select a 10 uM cut-off. This implementation of the model does not use any specific featurizer, though the authors suggest the moe206 descriptors (closed-source) improve performance even further.", + "Description": "This model leverages the ChemProp network (D-MPNN, see original Stokes et al, Cell, 2020 for more information) to build a predictor of hERG-mediated cardiotoxicity. The model has been trained using a dataset published by Cai et al, J Chem Inf Model, 2019, which contains 7889 molecules with several cut-offs for hERG blocking activity. The authors select a 10 uM cut-off. This implementation of the model does not use any specific featurizer, though the authors suggest the moe206 descriptors (closed-source) improve performance even further.", "Mode": "Pretrained", - "Task": ["Classification"], - "Input": ["Compound"], + "Task": [ + "Classification" + ], + "Input": [ + "Compound" + ], "Input Shape": "Single", - "Output": ["Score"], - "Output Type": ["Float"], + "Output": [ + "Score" + ], + "Output Type": [ + "Float" + ], "Output Shape": "Single", "Interpretation": "Probability of blocking hERG (cut-off: 10uM)", "Tag": [ @@ -21,5 +28,10 @@ ], "Publication": "https://pubs.rsc.org/en/content/articlehtml/2022/ra/d1ra07956e", "Source Code": "https://github.com/AI-amateur/DMPNN-hERG", - "License": "None" + "License": "None", + "S3": "https://ersilia-models-zipped.s3.eu-central-1.amazonaws.com/eos30f3.zip", + "DockerHub": "https://hub.docker.com/r/ersiliaos/eos30f3", + "Docker Architecture": [ + "AMD64" + ] } \ No newline at end of file diff --git a/model/framework/code/chemprop/train/run_training.py b/model/framework/code/chemprop/train/run_training.py index 214b622..e578be7 100644 --- a/model/framework/code/chemprop/train/run_training.py +++ b/model/framework/code/chemprop/train/run_training.py @@ -7,7 +7,7 @@ import warnings warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) import pandas as pd -from tensorboardX import SummaryWriter +#from tensorboardX import SummaryWriter import torch from tqdm import trange from torch.optim.lr_scheduler import ExponentialLR @@ -243,13 +243,13 @@ def run_training(args: TrainArgs, # Train ensemble of models for model_idx in range(args.ensemble_size): - # Tensorboard writer - save_dir = os.path.join(args.save_dir, f'model_{model_idx}') - makedirs(save_dir) - try: - writer = SummaryWriter(log_dir=save_dir) - except: - writer = SummaryWriter(logdir=save_dir) + # # Tensorboard writer + # save_dir = os.path.join(args.save_dir, f'model_{model_idx}') + # makedirs(save_dir) + # try: + # writer = SummaryWriter(log_dir=save_dir) + # except: + # writer = SummaryWriter(logdir=save_dir) # Load/build model if args.checkpoint_paths is not None: diff --git a/model/framework/code/chemprop/train/train.py b/model/framework/code/chemprop/train/train.py index 8901a6e..549c296 100644 --- a/model/framework/code/chemprop/train/train.py +++ b/model/framework/code/chemprop/train/train.py @@ -2,7 +2,7 @@ from typing import Callable import numpy as np -from tensorboardX import SummaryWriter +#from tensorboardX import SummaryWriter import torch import torch.nn as nn from torch.optim import Optimizer @@ -25,7 +25,7 @@ def train( n_iter: int = 0, atom_bond_scaler: AtomBondScaler = None, logger: logging.Logger = None, - writer: SummaryWriter = None, + #writer: SummaryWriter = None, ) -> int: """ Trains a model for an epoch.