From 1bdd482fe1b09f1ab21b2e4e77306840eb940ee7 Mon Sep 17 00:00:00 2001
From: niklases <60261497+niklases@users.noreply.github.com>
Date: Fri, 5 Jan 2024 10:39:51 +0100
Subject: [PATCH] Added only --ps option for pure DCA

---
 .gitignore                         |  1 +
 .vscode/launch.json                | 53 ++++++++++++++++++++++++++++++
 .vscode/settings.json              |  3 ++
 README.md                          | 17 ++++++++--
 pypef/dca/dca_run.py               |  2 +-
 pypef/dca/hybrid_model.py          |  9 +++--
 pypef/main.py                      |  3 +-
 scripts/CLI/run_cli_tests_linux.sh | 12 ++++++-
 scripts/CLI/run_cli_tests_win.ps1  | 16 ++++++++-
 9 files changed, 106 insertions(+), 10 deletions(-)
 create mode 100644 .vscode/settings.json

diff --git a/.gitignore b/.gitignore
index e5542be..39aa73a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -387,3 +387,4 @@ scripts/Setup/linux/apc.png
 datasets/ANEH/KARS160122_PLS_LOOCV_ML_Model_Performance.png
 datasets/ANEH/CV_performance/KARS160122_PLS_LOOCV_5-fold-CV.png
 datasets/ANEH/CV_performance/KARS160122_PLS_LOOCV_CV_Results.txt
+datasets/AVGFP/Predictions_Hybrid_TopTS.txt
diff --git a/.vscode/launch.json b/.vscode/launch.json
index b90f5c8..7474635 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -104,6 +104,23 @@
             ]
         },
 
+        {
+            "name": "Python: PyPEF hybrid/only-PS-zero-shot GREMLIN-DCA avGFP",
+            "type": "python",
+            "request": "launch",
+            "env": {"PYTHONPATH": "${workspaceFolder}"},
+            "program": "${workspaceFolder}/pypef/main.py",
+            "console": "integratedTerminal",
+            "justMyCode": true,
+            "cwd": "${workspaceFolder}/datasets/AVGFP/",
+            "args": [
+                "hybrid", 
+                //"-m", "GREMLIN",   // optional, not required  
+                "--ps", "TS.fasl", 
+                "--params", "GREMLIN"
+            ]
+        },
+
         {   // PLMC zero-shot steps:
             // 1. $pypef param_inference --params uref100_avgfp_jhmmer_119_plmc_42.6.params
             // 2. $pypef hybrid -t TS.fasl --params PLMC
@@ -136,6 +153,42 @@
                 "--params", "PLMC",
                 "--threads", "24"
             ]
+        },
+
+        {
+            "name": "Python: PyPEF hybrid/only-PS-zero-shot PLMC-DCA avGFP",
+            "type": "python",
+            "request": "launch",
+            "env": {"PYTHONPATH": "${workspaceFolder}"},
+            "program": "${workspaceFolder}/pypef/main.py",
+            "console": "integratedTerminal",
+            "justMyCode": true,
+            "cwd": "${workspaceFolder}/datasets/AVGFP/",
+            "args": [
+                "hybrid", 
+                //"-m", "PLMC",   // optional, not required  
+                "--ps", "TS.fasl", 
+                "--params", "uref100_avgfp_jhmmer_119_plmc_42.6.params",
+                "--threads", "24"
+            ]
+        },
+
+        {
+            "name": "Python: PyPEF hybrid/only-PS-zero-shot PLMC-DCA variant 2 avGFP",
+            "type": "python",
+            "request": "launch",
+            "env": {"PYTHONPATH": "${workspaceFolder}"},
+            "program": "${workspaceFolder}/pypef/main.py",
+            "console": "integratedTerminal",
+            "justMyCode": true,
+            "cwd": "${workspaceFolder}/datasets/AVGFP/",
+            "args": [
+                "hybrid", 
+                //"-m", "PLMC",   // optional, not required  
+                "--ps", "TS.fasl", 
+                "--params", "PLMC",
+                "--threads", "24"
+            ]
         }
     ]
 }
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..2b6b3b4
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "markdown.extension.toc.updateOnSave": false
+}
diff --git a/README.md b/README.md
index cf86d5c..9959bd4 100644
--- a/README.md
+++ b/README.md
@@ -17,8 +17,9 @@ Preprint available at bioRxiv: https://doi.org/10.1101/2022.06.07.495081.
 <sup>*§*</sup><sub>Equal contribution</sub> <br>
 
 ---
+
 ## Table of Contents
-- [PyPEF: Pythonic Protein Engineering Framework](#pypef-pythonic-protein-engineering-framework)
+[PyPEF: Pythonic Protein Engineering Framework](#pypef-pythonic-protein-engineering-framework)
   - [Quick Installation](#quick-installation)
   - [Requirements](#requirements)
   - [Running Examples](#running-examples)
@@ -398,15 +399,19 @@ python3 ./pypef/main.py
     ```
 
     5.2 After [installing plmc](https://github.com/debbiemarkslab/plmc#compilation), generate the evolutionary coupling file, which is used for encoding sequences. For example, set `-le` to the value output by `sto2a2m`:
+
     ```
     plmc -o ANEH_72.6.params -le 72.6 -m 100 -g -f WT_ANEH ANEH_jhmmer.a2m
     ```
     
     The output parameter (.params) file can be used for encoding sequences with the DCA-based encoding technique (`-e dca`) by providing it to PyPEF; e.g. for pure ML modeling:
+
     ```
     pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls --params ANEH_72.6.params
     ```
+
     Or for hybrid modeling:
+
     ```
     pypef hybrid -l LS.fasl -t TS.fasl --params ANEH_72.6.params
     ```
@@ -420,21 +425,27 @@ To make zero-shot predictions using PyPEF (plmc-DCA or GREMLIN-DCA) just do not
 
 ```
 pypef param_inference --msa uref100_avgfp_jhmmer_119.a2m
-pypef hybrid -t AVGFP_TS.fasl --params GREMLIN
+pypef hybrid -t TS.fasl --params GREMLIN
 ```
+
 using the GREMLIN parameters, or,
+
 ```
 pypef param_inference --params uref100_avgfp_jhmmer_119_plmc_42.6.params
 pypef hybrid -t TS.fasl --params PLMC
 ```
+
 using the plmc parameters.
 
-Other well-performing zero-shot prediction methods with available source code are (list not complete, see ProteinGym [repository](https://github.com/OATML-Markslab/ProteinGym) and [website](https://proteingym.org/) for a more detailed overview of available methods and achieved performances):
+Other well-performing zero-shot prediction methods with available source code are:
+
 - ESM-1v/ESM-2 (https://github.com/facebookresearch/esm)
 - DeepSequence (https://github.com/debbiemarkslab/DeepSequence)
 - EVcouplings (plmc-DCA, https://github.com/debbiemarkslab/EVcouplings)
 - EVE (https://github.com/OATML/EVE)
 - Tranception (https://github.com/OATML-Markslab/Tranception)
+  
+This list is by no means complete, see ProteinGym [repository](https://github.com/OATML-Markslab/ProteinGym) and [website](https://proteingym.org/) for a more detailed overview of available methods and achieved performances (as well as for getting many benchmark data sets).
 
 <a name="api-usage"></a>
 ## API Usage for Sequence Encoding
diff --git a/pypef/dca/dca_run.py b/pypef/dca/dca_run.py
index e1fa8ca..1447a98 100644
--- a/pypef/dca/dca_run.py
+++ b/pypef/dca/dca_run.py
@@ -61,7 +61,7 @@ def run_pypef_hybrid_modeling(arguments):
             label=arguments['--label']
         )
 
-    elif arguments['--params'] and arguments['--model']:
+    elif arguments['--params'] and arguments['--model'] or arguments['--ps']:
         prediction_dict = {}
         prediction_dict.update({
             'drecomb': arguments['--drecomb'],
diff --git a/pypef/dca/hybrid_model.py b/pypef/dca/hybrid_model.py
index 0647ba5..509fc0f 100644
--- a/pypef/dca/hybrid_model.py
+++ b/pypef/dca/hybrid_model.py
@@ -1094,11 +1094,14 @@ def predict_ps(  # also predicting "pmult" dict directories
         in the respective created folders).
 
     """
-    logger.info(f'Taking model from saved model (Pickle file): {model_pickle_file}...')
-
+    if model_pickle_file is None:
+        model_pickle_file = params_file
+        logger.info(f'Trying to load model from saved parameters (Pickle file): {model_pickle_file}...')
+    else:
+        logger.info(f'Loading model from saved model (Pickle file): {model_pickle_file}...')
     model, model_type = get_model_and_type(model_pickle_file)
 
-    if model_type == 'PLMC':
+    if model_type == 'PLMC' or model_type == 'GREMLIN':
         logger.info(f'No hybrid model provided – falling back to a statistical DCA model.')
     elif model_type == 'Hybrid':
         beta_1, beta_2, reg = model.beta_1, model.beta_2, model.regressor
diff --git a/pypef/main.py b/pypef/main.py
index 1d312f3..1c35d32 100644
--- a/pypef/main.py
+++ b/pypef/main.py
@@ -145,7 +145,8 @@
     pypef shift_pos --input CSV_FILE --offset OFFSET
         [--sep CSV_COLUMN_SEPARATOR] [--mutation_sep MUTATION_SEPARATOR] [--fitness_key FITNESS_KEY]
     pypef sto2a2m --sto STO_MSA_FILE [--inter_gap INTER_GAP] [--intra_gap INTRA_GAP]
-    pypef hybrid --ts TEST_SET
+    pypef hybrid 
+        [--ts TEST_SET] [--ps PREDICTION_SET]
         [--model MODEL] [--params PARAM_FILE]
         [--ls LEARNING_SET] [--label] [--threads THREADS]
     pypef hybrid --model MODEL --params PARAM_FILE
diff --git a/scripts/CLI/run_cli_tests_linux.sh b/scripts/CLI/run_cli_tests_linux.sh
index d744e0b..e155447 100644
--- a/scripts/CLI/run_cli_tests_linux.sh
+++ b/scripts/CLI/run_cli_tests_linux.sh
@@ -368,11 +368,21 @@ echo
 $pypef hybrid -m PLMC -t TS.fasl --params PLMC --threads $threads
 echo
 
-# pure statistical
+# Hybrid: pure statistical
 $pypef hybrid -t TS.fasl --params PLMC --threads $threads
 echo
+$pypef hybrid -p TS.fasl --params PLMC --threads $threads
+echo
+# Same as above command
+$pypef hybrid -p TS.fasl -m PLMC --params PLMC --threads $threads
+echo
 $pypef hybrid -t TS.fasl --params GREMLIN
 echo
+$pypef hybrid -p TS.fasl --params GREMLIN
+echo
+# Same as above command
+$pypef hybrid -p TS.fasl -m GREMLIN --params GREMLIN
+echo
 $pypef hybrid -m GREMLIN -t TS.fasl --params GREMLIN
 echo
 $pypef hybrid -l LS.fasl -t TS.fasl --params GREMLIN
diff --git a/scripts/CLI/run_cli_tests_win.ps1 b/scripts/CLI/run_cli_tests_win.ps1
index daac44a..96f0a40 100644
--- a/scripts/CLI/run_cli_tests_win.ps1
+++ b/scripts/CLI/run_cli_tests_win.ps1
@@ -511,13 +511,27 @@ pypef hybrid -m PLMC -t TS.fasl --params PLMC --threads $threads
 ExitOnExitCode
 Write-Host
 
-# pure statistical
+# Hybrid: pure statistical
 pypef hybrid -t TS.fasl --params PLMC --threads $threads
 ExitOnExitCode
 Write-Host
+pypef hybrid -p TS.fasl --params PLMC --threads $threads
+ExitOnExitCode
+Write-Host
+# Same as above command
+pypef hybrid -p TS.fasl -m PLMC --params PLMC --threads $threads
+ExitOnExitCode
+Write-Host
 pypef hybrid -t TS.fasl --params GREMLIN
 ExitOnExitCode
 Write-Host
+pypef hybrid -p TS.fasl --params GREMLIN
+ExitOnExitCode
+Write-Host
+# Same as above command
+pypef hybrid -p TS.fasl -m GREMLIN --params GREMLIN
+ExitOnExitCode
+Write-Host
 pypef hybrid -m GREMLIN -t TS.fasl --params GREMLIN
 ExitOnExitCode
 Write-Host