Skip to content

Commit

Permalink
cleaned assets (moved to manuscript revision) and added help insets
Browse files Browse the repository at this point in the history
  • Loading branch information
miquelduranfrigola committed Jan 18, 2024
1 parent 25473c8 commit 1bd2258
Show file tree
Hide file tree
Showing 85 changed files with 33 additions and 8,506 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
data/slc_inhibitor_collection_gsf.tsv
data/slc_inhibitor_collection_gsf_with_auto_crf.tsv
data/protein_precalcs_baseline.joblib
results
cache
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
# on-the-fly-modeling
# On the Fly Modeling App
Getting AI/ML models on-the-fly based on primary Ligand Discovery screening data

To run the app, make sure you have the necessary dependencies installed. The dependencies are specified in the Dockerfile. You can deploy the app using the command `streamlit run app/app.py`.
36 changes: 28 additions & 8 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,10 @@ def get_fragment_image(smiles):

col.subheader(":mag: Input your proteins")

text = col.text_area("Paste proteins separated by space or new line")
text = col.text_area(
"Paste proteins in UniProt AC format or Gene Name.",
help="Write one protein per line. UniProt AC format is preferred. Only proteins available in the Ligand Discovery interactome will be considered.",
)
input_tokens = text.split()
input_pids = []
for it in input_tokens:
Expand All @@ -237,7 +240,6 @@ def get_fragment_image(smiles):

input_data = pids_to_dataframe(input_pids)

# tfidf = col.checkbox(label="TFIDF", value=True)
tfidf = True

if input_data.shape[0] == 0:
Expand Down Expand Up @@ -348,11 +350,23 @@ def get_fragment_image(smiles):
num_total = len(data[data["y"] != -1])

subcols = col.columns(3)
subcols[0].metric("Positives", value=num_positives)
subcols[0].metric(
"Positives",
value=num_positives,
help="Number of positive fragments (i.e. fragments that interact with at least one of the selected proteins). Fragments are ranked by their sum of TF-IDF scores, meaning that the fragments that interact with more proteins will be ranked higher. Interacting with specific proteins will also uprank fragments.",
)

subcols[1].metric("Total", value=num_total)
subcols[1].metric(
"Total",
value=num_total,
help="Total number of fragments (positive and negative) used in the model. This value decreases as you decrease the maximum promiscuity of included fragments threshold.",
)

subcols[2].metric("Rate", value="{0:.1f}%".format(num_positives / num_total * 100))
subcols[2].metric(
"Rate",
value="{0:.1f}%".format(num_positives / num_total * 100),
help="Ratio of positives to total fragments.",
)

if num_positives == 0:
col.error(
Expand All @@ -363,14 +377,19 @@ def get_fragment_image(smiles):
else:
task_evaluation = task_evaluator(model, data)
subcols[0].metric(
label="Corr. other", value="{0:.3f}".format(task_evaluation["ref_rho"])
label="Corr. prom",
value="{0:.3f}".format(task_evaluation["ref_rho"]),
help="Correlation between model outcomes and fragment promiscuity predictors. If you wish to have models that are less correlated with promiscuity, consider lowering the maximum promiscuity of included fragments threshold.",
)
subcols[1].metric(
label="Frag. promiscuity", value="{0:.1f}".format(task_evaluation["prom"])
label="Frag. promiscuity",
value="{0:.1f}".format(task_evaluation["prom"]),
help="Average promiscuity of positive fragments. This helps understand how promiscuous the fragments are that are being used to build the model, with a focus on the positive class.",
)
subcols[2].metric(
label="Interactors ({0})".format(len(uniprot_acs)),
value="{0:.1f}".format(task_evaluation["hits"]),
help="Average number of query proteins that interact with positive fragments. If you want this number to be higher, consider decreasing the maximum number of positives threshold in order to focus on the fragments that have the highest protein coverage.",
)

expander = col.expander("View positives")
Expand Down Expand Up @@ -407,7 +426,8 @@ def get_fragment_image(smiles):
col.subheader(":crystal_ball: Make predictions")

input_prediction_tokens = col.text_area(
label="Input your SMILES of interest. They should have the diazirine fragment"
label="Input your SMILES of interest. Ideally, they should have the diazirine fragment",
help="Paste molecules in SMILES format, one per line. Try to include the CRF pattern in your input molecules. If no CRF pattern is present, it will be automatically attached.",
)

pred_tokens = [t for t in input_prediction_tokens.split("\n") if t != ""]
Expand Down
Binary file removed assets/AUROC_vs_positives.png
Binary file not shown.
Loading

0 comments on commit 1bd2258

Please sign in to comment.