Merge branch 'ersilia-os:main' into master

ersilia-os · Dec 15, 2023 · 26eb9ac · 26eb9ac
2 parents 00753ec + 3e98394
commit 26eb9ac
Show file tree

Hide file tree

Showing 5 changed files with 73 additions and 21 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -3,12 +3,10 @@ MAINTAINER ersilia
 
 RUN pip install rdkit
 RUN pip install git+https://github.com/bp-kelley/descriptastorus
-RUN pip install tqdm>=4.62.2
 RUN pip install typed-argument-parser==1.6.1
 RUN pip install scikit-learn
 RUN pip install torch
 RUN pip install pandas
-RUN pip install tensorboardX==2.0
 RUN pip install scipy>=1.4.1
 RUN pip install hyperopt
 

diff --git a/README.md b/README.md
@@ -1,3 +1,45 @@
-# Ersilia Model In Progress
+# Prediction of hERG Channel Blockers with Directed Message Passing Neural Networks
 
-This model is work in progress. Please edit the [metadata.json](metadata.json) file to complete the information about the model. This README file will be updated automatically based on the information contained in that folder.
+This model leverages the ChemProp network (D-MPNN, see original Stokes et al, Cell, 2020 for more information) to build a predictor of hERG-mediated cardiotoxicity. The model has been trained using a dataset published by Cai et al, J Chem Inf Model, 2019, which contains 7889 molecules with several cut-offs for hERG blocking activity. The authors select a 10 uM cut-off. This implementation of the model does not use any specific featurizer, though the authors suggest the moe206 descriptors (closed-source) improve performance even further.
+
+## Identifiers
+
+* EOS model ID: `eos30f3`
+* Slug: `dmpnn-herg`
+
+## Characteristics
+
+* Input: `Compound`
+* Input Shape: `Single`
+* Task: `Classification`
+* Output: `Score`
+* Output Type: `Float`
+* Output Shape: `Single`
+* Interpretation: Probability of blocking hERG (cut-off: 10uM)
+
+## References
+
+* [Publication](https://pubs.rsc.org/en/content/articlehtml/2022/ra/d1ra07956e)
+* [Source Code](https://github.com/AI-amateur/DMPNN-hERG)
+* Ersilia contributor: [russelljeffrey](https://github.com/russelljeffrey)
+
+## Ersilia model URLs
+* [GitHub](https://github.com/ersilia-os/eos30f3)
+* [AWS S3](https://ersilia-models-zipped.s3.eu-central-1.amazonaws.com/eos30f3.zip)
+* [DockerHub](https://hub.docker.com/r/ersiliaos/eos30f3) (AMD64)
+
+## Citation
+
+If you use this model, please cite the [original authors](https://pubs.rsc.org/en/content/articlehtml/2022/ra/d1ra07956e) of the model and the [Ersilia Model Hub](https://github.com/ersilia-os/ersilia/blob/master/CITATION.cff).
+
+## License
+
+This package is licensed under a GPL-3.0 license. The model contained within this package is licensed under a None license.
+
+Notice: Ersilia grants access to these models 'as is' provided by the original authors, please refer to the original code repository and/or publication if you use the model in your research.
+
+## About Us
+
+The [Ersilia Open Source Initiative](https://ersilia.io) is a Non Profit Organization ([1192266](https://register-of-charities.charitycommission.gov.uk/charity-search/-/charity-details/5170657/full-print)) with the mission is to equip labs, universities and clinics in LMIC with AI/ML tools for infectious disease research.
+
+[Help us](https://www.ersilia.io/donate) achieve our mission!
diff --git a/metadata.json b/metadata.json
@@ -3,14 +3,21 @@
     "Slug": "dmpnn-herg",
     "Status": "In progress",
     "Title": "Prediction of hERG Channel Blockers with Directed Message Passing Neural Networks",
-    "Description": 
-        "This model leverages the ChemProp network (D-MPNN, see original Stokes et al, Cell, 2020 for more information) to build a predictor of hERG-mediated cardiotoxicity. The model has been trained using a dataset published by Cai et al, J Chem Inf Model, 2019, which contains 7889 molecules with several cut-offs for hERG blocking activity. The authors select a 10 uM cut-off. This implementation of the model does not use any specific featurizer, though the authors suggest the moe206 descriptors (closed-source) improve performance even further.",
+    "Description": "This model leverages the ChemProp network (D-MPNN, see original Stokes et al, Cell, 2020 for more information) to build a predictor of hERG-mediated cardiotoxicity. The model has been trained using a dataset published by Cai et al, J Chem Inf Model, 2019, which contains 7889 molecules with several cut-offs for hERG blocking activity. The authors select a 10 uM cut-off. This implementation of the model does not use any specific featurizer, though the authors suggest the moe206 descriptors (closed-source) improve performance even further.",
     "Mode": "Pretrained",
-    "Task": ["Classification"],
-    "Input": ["Compound"],
+    "Task": [
+        "Classification"
+    ],
+    "Input": [
+        "Compound"
+    ],
     "Input Shape": "Single",
-    "Output": ["Score"],
-    "Output Type": ["Float"],
+    "Output": [
+        "Score"
+    ],
+    "Output Type": [
+        "Float"
+    ],
     "Output Shape": "Single",
     "Interpretation": "Probability of blocking hERG (cut-off: 10uM)",
     "Tag": [
@@ -21,5 +28,10 @@
     ],
     "Publication": "https://pubs.rsc.org/en/content/articlehtml/2022/ra/d1ra07956e",
     "Source Code": "https://github.com/AI-amateur/DMPNN-hERG",
-    "License": "None"
+    "License": "None",
+    "S3": "https://ersilia-models-zipped.s3.eu-central-1.amazonaws.com/eos30f3.zip",
+    "DockerHub": "https://hub.docker.com/r/ersiliaos/eos30f3",
+    "Docker Architecture": [
+        "AMD64"
+    ]
 }
diff --git a/model/framework/code/chemprop/train/run_training.py b/model/framework/code/chemprop/train/run_training.py
@@ -7,7 +7,7 @@
 import warnings
 warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
 import pandas as pd
-from tensorboardX import SummaryWriter
+#from tensorboardX import SummaryWriter
 import torch
 from tqdm import trange
 from torch.optim.lr_scheduler import ExponentialLR
@@ -243,13 +243,13 @@ def run_training(args: TrainArgs,
 
     # Train ensemble of models
     for model_idx in range(args.ensemble_size):
-        # Tensorboard writer
-        save_dir = os.path.join(args.save_dir, f'model_{model_idx}')
-        makedirs(save_dir)
-        try:
-            writer = SummaryWriter(log_dir=save_dir)
-        except:
-            writer = SummaryWriter(logdir=save_dir)
+        # # Tensorboard writer
+        # save_dir = os.path.join(args.save_dir, f'model_{model_idx}')
+        # makedirs(save_dir)
+        # try:
+        #     writer = SummaryWriter(log_dir=save_dir)
+        # except:
+        #     writer = SummaryWriter(logdir=save_dir)
 
         # Load/build model
         if args.checkpoint_paths is not None:

diff --git a/model/framework/code/chemprop/train/train.py b/model/framework/code/chemprop/train/train.py
@@ -2,7 +2,7 @@
 from typing import Callable
 
 import numpy as np
-from tensorboardX import SummaryWriter
+#from tensorboardX import SummaryWriter
 import torch
 import torch.nn as nn
 from torch.optim import Optimizer
@@ -25,7 +25,7 @@ def train(
     n_iter: int = 0,
     atom_bond_scaler: AtomBondScaler = None,
     logger: logging.Logger = None,
-    writer: SummaryWriter = None,
+    #writer: SummaryWriter = None,
 ) -> int:
     """
     Trains a model for an epoch.