From 1692936c6f112808bde5ef761631e0a4292f711e Mon Sep 17 00:00:00 2001 From: Wout Date: Wed, 24 Aug 2022 10:53:04 -0700 Subject: [PATCH 1/2] Harmonize default parameters and config values As per discussion on Slack (https://noblelab.slack.com/archives/C01MXN4NWMP/p1659803053573279). --- casanovo/config.yaml | 4 ++-- casanovo/denovo/dataloaders.py | 2 +- casanovo/denovo/model.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/casanovo/config.yaml b/casanovo/config.yaml index 69a3dab6..e1be52cd 100644 --- a/casanovo/config.yaml +++ b/casanovo/config.yaml @@ -8,7 +8,7 @@ random_seed: 454 # Spectrum processing options. n_peaks: 150 -min_mz: 50.52564895 # 1.0005079 * 50.5 +min_mz: 50.0 max_mz: 2500.0 min_intensity: 0.01 remove_precursor_tol: 2.0 # Da @@ -18,7 +18,7 @@ dim_model: 512 n_head: 8 dim_feedforward: 1024 n_layers: 9 -dropout: 0 +dropout: 0.0 dim_intensity: custom_encoder: max_length: 100 diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py index 28e481ca..2ee2f8f5 100644 --- a/casanovo/denovo/dataloaders.py +++ b/casanovo/denovo/dataloaders.py @@ -54,7 +54,7 @@ def __init__( test_index: Optional[AnnotatedSpectrumIndex] = None, batch_size: int = 128, n_peaks: Optional[int] = 150, - min_mz: float = 140.0, + min_mz: float = 50.0, max_mz: float = 2500.0, min_intensity: float = 0.01, remove_precursor_tol: float = 2.0, diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py index b4253ee6..b78e498b 100644 --- a/casanovo/denovo/model.py +++ b/casanovo/denovo/model.py @@ -76,10 +76,10 @@ class Spec2Pep(pl.LightningModule, ModelMixin): def __init__( self, - dim_model: int = 128, + dim_model: int = 512, n_head: int = 8, dim_feedforward: int = 1024, - n_layers: int = 1, + n_layers: int = 9, dropout: float = 0.0, dim_intensity: Optional[int] = None, custom_encoder: Optional[ @@ -87,7 +87,7 @@ def __init__( ] = None, max_length: int = 100, residues: Union[Dict[str, float], str] = "canonical", - max_charge: int = 5, + max_charge: int = 10, precursor_mass_tol=50, n_log: int = 10, tb_summarywriter: Optional[ From a14f7856919c92f65d322fddff5f738af63ac638 Mon Sep 17 00:00:00 2001 From: Wout Date: Wed, 24 Aug 2022 10:58:33 -0700 Subject: [PATCH 2/2] No need to specify config file by default This simplifies the examples that most users will want to use. --- README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 0d5b3753..ef98d30e 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ Not all releases might have a model file included on the [Releases page](https:/ - To run _de novo_ sequencing: ``` -casanovo --mode=denovo --peak_path=path/to/predict/spectra.mgf --config=path/to/config.yaml --output=path/to/output +casanovo --mode=denovo --peak_path=path/to/predict/spectra.mgf --output=path/to/output ``` Casanovo can predict peptide sequences for MS/MS data in mzML, mzXML, and MGF files. @@ -82,7 +82,7 @@ This will write peptide predictions for the given MS/MS spectra to the specified - To evaluate _de novo_ sequencing performance based on known spectrum annotations: ``` -casanovo --mode=eval --peak_path=path/to/test/annotated_spectra.mgf --config=path/to/config.yaml +casanovo --mode=eval --peak_path=path/to/test/annotated_spectra.mgf ``` To evaluate the peptide predictions, ground truth peptide labels need to be provided as an annotated MGF file. @@ -90,7 +90,7 @@ To evaluate the peptide predictions, ground truth peptide labels need to be prov - To train a model from scratch: ``` -casanovo --mode=train --peak_path=path/to/train/annotated_spectra.mgf --peak_path_val=path/to/validation/annotated_spectra.mgf --config=path/to/config.yaml +casanovo --mode=train --peak_path=path/to/train/annotated_spectra.mgf --peak_path_val=path/to/validation/annotated_spectra.mgf ``` Training and validation MS/MS data need to be provided as annotated MGF files. @@ -103,11 +103,10 @@ We will demonstrate how to use Casanovo using a small walkthrough example on a s The example MGF file is available at [`sample_data/sample_preprocessed_spectra.mgf`](https://github.com/Noble-Lab/casanovo/blob/main/sample_data/sample_preprocessed_spectra.mgf`). 1. Install Casanovo (see above for details). -2. Copy the example `config.yaml` file to a location you can easily access. -3. Ensure you are in the proper anaconda environment by typing `conda activate casanovo_env`. (If you named your environment differently, type in that name instead.) -4. Run this command: +2. Ensure you are in the proper anaconda environment by typing `conda activate casanovo_env`. (If you named your environment differently, type in that name instead.) +3. Run this command: ``` -casanovo --mode=denovo --peak_path=[PATH_TO]/sample_preprocessed_spectra.mgf --config=[PATH_TO]/config.yaml +casanovo --mode=denovo --peak_path=[PATH_TO]/sample_preprocessed_spectra.mgf ``` This job will take very little time to run (< 1 minute). @@ -139,6 +138,9 @@ Run the following command in your command prompt to see all possible command-lin casanovo --help ``` +Additionally, you can use a configuration file to fully customize Casanovo. +You can find the `config.yaml` configuration file that is used by default [here](https://github.com/Noble-Lab/casanovo/blob/main/casanovo/config.yaml). + **I get a "CUDA out of memory" error when trying to run Casanovo. Help!** This means that there was not enough (free) memory available on your GPU to run Casanovo, which is especially likely to happen when you are using a smaller, consumer-grade GPU.