diff --git a/CHANGELOG.md b/CHANGELOG.md index c73eec8b..6864c407 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - Precursor charges are exported as integers instead of floats in the mzTab output file, in compliance with the mzTab specification. +- Fixed log entries written to the config file instead of the log file when running the `configure` command. ### Removed diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py index fef73a9b..12b3d42e 100644 --- a/casanovo/casanovo.py +++ b/casanovo/casanovo.py @@ -41,10 +41,9 @@ import tqdm from lightning.pytorch import seed_everything -from . import __version__ -from . import utils -from .denovo import ModelRunner +from . import __version__, utils from .config import Config +from .denovo import ModelRunner logger = logging.getLogger("casanovo") click.rich_click.USE_MARKDOWN = True @@ -52,21 +51,13 @@ click.rich_click.SHOW_ARGUMENTS = True -class _SharedParams(click.RichCommand): - """Options shared between most Casanovo commands""" +class _SharedFileIOParams(click.RichCommand): + """File IO options shared between most Casanovo commands""" def __init__(self, *args, **kwargs) -> None: """Define shared options.""" super().__init__(*args, **kwargs) self.params += [ - click.Option( - ("-m", "--model"), - help=""" - Either the model weights (.ckpt file) or a URL pointing to the - model weights file. If not provided, Casanovo will try to - download the latest release automatically. - """, - ), click.Option( ("-d", "--output_dir"), help="The destination directory for output files.", @@ -78,30 +69,44 @@ def __init__(self, *args, **kwargs) -> None: type=click.Path(dir_okay=False), ), click.Option( - ("-c", "--config"), - help=""" - The YAML configuration file overriding the default options. - """, - type=click.Path(exists=True, dir_okay=False), + ("-f", "--force_overwrite"), + help="Whether to overwrite output files.", + is_flag=True, + show_default=True, + default=False, ), click.Option( ("-v", "--verbosity"), - help=""" - Set the verbosity of console logging messages. Log files are - always set to 'debug'. - """, + help=( + "Set the verbosity of console logging messages." + " Log files are always set to 'debug'." + ), type=click.Choice( ["debug", "info", "warning", "error"], case_sensitive=False, ), default="info", ), + ] + + +class _SharedParams(_SharedFileIOParams): + """Options shared between main Casanovo commands""" + + def __init__(self, *args, **kwargs) -> None: + """Define shared options.""" + super().__init__(*args, **kwargs) + self.params += [ click.Option( - ("-f", "--force_overwrite"), - help="Whether to overwrite output files.", - is_flag=True, - show_default=True, - default=False, + ("-m", "--model"), + help="""Either the model weights (.ckpt file) or a URL pointing to + the model weights file. If not provided, Casanovo will try to + download the latest release automatically.""", + ), + click.Option( + ("-c", "--config"), + help="The YAML configuration file overriding the default options.", + type=click.Path(exists=True, dir_okay=False), ), ] @@ -336,22 +341,25 @@ def version() -> None: sys.stdout.write("\n".join(versions) + "\n") -@main.command() -@click.option( - "-o", - "--output", - help="The output configuration file.", - default="casanovo.yaml", - type=click.Path(dir_okay=False), -) -def configure(output: Path) -> None: +@main.command(cls=_SharedFileIOParams) +def configure( + output_dir: str, output_root: str, verbosity: str, force_overwrite: bool +) -> None: """Generate a Casanovo configuration file to customize. The casanovo configuration file is in the YAML format. """ - Config.copy_default(str(output)) - setup_logging(output, "info") - logger.info(f"Wrote {output}\n") + output_path, _ = _setup_output( + output_dir, output_root, force_overwrite, verbosity + ) + config_fname = output_root if output_root is not None else "casanovo" + config_fname = Path(config_fname).with_suffix(".yaml") + if not force_overwrite: + utils.check_dir_file_exists(output_path, str(config_fname)) + + config_path = str(output_path / config_fname) + Config.copy_default(config_path) + logger.info(f"Wrote {config_path}") def setup_logging( diff --git a/docs/images/configure-help.svg b/docs/images/configure-help.svg index b1fcce10..dc00cd2f 100644 --- a/docs/images/configure-help.svg +++ b/docs/images/configure-help.svg @@ -1,4 +1,4 @@ - + - - + + - + - + - + - + - + - + - + - + - + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - - $ casanovo configure --help - -Usage:casanovo configure [OPTIONS]                                             - - Generate a Casanovo configuration file to customize.                            - The casanovo configuration file is in the YAML format.                          - -╭─ Options ────────────────────────────────────────────────────────────────────╮ ---output-oFILE  The output configuration file.                            ---help-h  Show this message and exit.                               -╰──────────────────────────────────────────────────────────────────────────────╯ - + + $ casanovo configure --help + +Usage:casanovo configure [OPTIONS]                                             + + Generate a Casanovo configuration file to customize.                            + The casanovo configuration file is in the YAML format.                          + +╭─ Options ────────────────────────────────────────────────────────────────────╮ +--output_dir-dPATH                       The destination directory   +                                                   for output files.           +--output_root-oFILE                       The root name for all       +                                                   output files.               +--force_overwrite-f  Whether to overwrite        +                                                   output files.               +--verbosity-v[debug|info|warning|error  Set the verbosity of        +]  console logging messages.   +                                                   Log files are always set    +                                                   to 'debug'.                 +--help-h  Show this message and       +                                                   exit.                       +╰──────────────────────────────────────────────────────────────────────────────╯ + diff --git a/docs/images/sequence-help.svg b/docs/images/sequence-help.svg index 6354851d..6573d906 100644 --- a/docs/images/sequence-help.svg +++ b/docs/images/sequence-help.svg @@ -19,171 +19,171 @@ font-weight: 700; } - .terminal-3608076648-matrix { + .terminal-827842920-matrix { font-family: Fira Code, monospace; font-size: 20px; line-height: 24.4px; font-variant-east-asian: full-width; } - .terminal-3608076648-title { + .terminal-827842920-title { font-size: 18px; font-weight: bold; font-family: arial; } - .terminal-3608076648-r1 { fill: #c5c8c6 } -.terminal-3608076648-r2 { fill: #d0b344 } -.terminal-3608076648-r3 { fill: #c5c8c6;font-weight: bold } -.terminal-3608076648-r4 { fill: #68a0b3;font-weight: bold } -.terminal-3608076648-r5 { fill: #868887 } -.terminal-3608076648-r6 { fill: #cc555a } -.terminal-3608076648-r7 { fill: #d0b344;font-weight: bold } -.terminal-3608076648-r8 { fill: #8a4346 } -.terminal-3608076648-r9 { fill: #98a84b;font-weight: bold } -.terminal-3608076648-r10 { fill: #8d7b39;font-weight: bold } + .terminal-827842920-r1 { fill: #c5c8c6 } +.terminal-827842920-r2 { fill: #d0b344 } +.terminal-827842920-r3 { fill: #c5c8c6;font-weight: bold } +.terminal-827842920-r4 { fill: #68a0b3;font-weight: bold } +.terminal-827842920-r5 { fill: #868887 } +.terminal-827842920-r6 { fill: #cc555a } +.terminal-827842920-r7 { fill: #d0b344;font-weight: bold } +.terminal-827842920-r8 { fill: #8a4346 } +.terminal-827842920-r9 { fill: #98a84b;font-weight: bold } +.terminal-827842920-r10 { fill: #8d7b39;font-weight: bold } - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -195,56 +195,56 @@ - + - - $ casanovo sequence --help - -Usage:casanovo sequence [OPTIONSPEAK_PATH...                                 - - De novo sequence peptides from tandem mass spectra.                             - PEAK_PATH must be one or more mzML, mzXML, or MGF files from which to sequence  - peptides. If evaluate is set to True PEAK_PATH must be one or more annotated    - MGF file.                                                                       - -╭─ Arguments ──────────────────────────────────────────────────────────────────╮ -*  PEAK_PATH    FILE[required] -╰──────────────────────────────────────────────────────────────────────────────╯ -╭─ Options ────────────────────────────────────────────────────────────────────╮ ---evaluate-e  Run in evaluation mode.     -                                                   When this flag is set the   -                                                   peptide and amino acid      -                                                   precision will be           -                                                   calculated and logged at    -                                                   the end of the sequencing   -                                                   run. All input files must   -                                                   be annotated MGF files if   -                                                   running in evaluation       -                                                   mode.                       ---model-mTEXT                       Either the model weights    -                                                   (.ckpt file) or a URL       -                                                   pointing to the model       -                                                   weights file. If not        -                                                   provided, Casanovo will     -                                                   try to download the latest  -                                                   release automatically.      ---output_dir-dPATH                       The destination directory   -                                                   for output files.           ---output_root-oFILE                       The root name for all       -                                                   output files.               ---config-cFILE                       The YAML configuration      -                                                   file overriding the         -                                                   default options.            ---verbosity-v[debug|info|warning|error  Set the verbosity of        -]  console logging messages.   -                                                   Log files are always set    -                                                   to 'debug'.                 ---force_overwrite-f  Whether to overwrite        -                                                   output files.               ---help-h  Show this message and       -                                                   exit.                       -╰──────────────────────────────────────────────────────────────────────────────╯ - + + $ casanovo sequence --help + +Usage:casanovo sequence [OPTIONSPEAK_PATH...                                 + + De novo sequence peptides from tandem mass spectra.                             + PEAK_PATH must be one or more mzML, mzXML, or MGF files from which to sequence  + peptides. If evaluate is set to True PEAK_PATH must be one or more annotated    + MGF file.                                                                       + +╭─ Arguments ──────────────────────────────────────────────────────────────────╮ +*  PEAK_PATH    FILE[required] +╰──────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────╮ +--evaluate-e  Run in evaluation mode.     +                                                   When this flag is set the   +                                                   peptide and amino acid      +                                                   precision will be           +                                                   calculated and logged at    +                                                   the end of the sequencing   +                                                   run. All input files must   +                                                   be annotated MGF files if   +                                                   running in evaluation       +                                                   mode.                       +--output_dir-dPATH                       The destination directory   +                                                   for output files.           +--output_root-oFILE                       The root name for all       +                                                   output files.               +--force_overwrite-f  Whether to overwrite        +                                                   output files.               +--verbosity-v[debug|info|warning|error  Set the verbosity of        +]  console logging messages.   +                                                   Log files are always set    +                                                   to 'debug'.                 +--model-mTEXT                       Either the model weights    +                                                   (.ckpt file) or a URL       +                                                   pointing to the model       +                                                   weights file. If not        +                                                   provided, Casanovo will     +                                                   try to download the latest  +                                                   release automatically.      +--config-cFILE                       The YAML configuration      +                                                   file overriding the         +                                                   default options.            +--help-h  Show this message and       +                                                   exit.                       +╰──────────────────────────────────────────────────────────────────────────────╯ + diff --git a/docs/images/train-help.svg b/docs/images/train-help.svg index 8aab62d4..7f6ebffa 100644 --- a/docs/images/train-help.svg +++ b/docs/images/train-help.svg @@ -19,162 +19,162 @@ font-weight: 700; } - .terminal-3079567379-matrix { + .terminal-2542172179-matrix { font-family: Fira Code, monospace; font-size: 20px; line-height: 24.4px; font-variant-east-asian: full-width; } - .terminal-3079567379-title { + .terminal-2542172179-title { font-size: 18px; font-weight: bold; font-family: arial; } - .terminal-3079567379-r1 { fill: #c5c8c6 } -.terminal-3079567379-r2 { fill: #d0b344 } -.terminal-3079567379-r3 { fill: #c5c8c6;font-weight: bold } -.terminal-3079567379-r4 { fill: #68a0b3;font-weight: bold } -.terminal-3079567379-r5 { fill: #868887 } -.terminal-3079567379-r6 { fill: #cc555a } -.terminal-3079567379-r7 { fill: #d0b344;font-weight: bold } -.terminal-3079567379-r8 { fill: #8a4346 } -.terminal-3079567379-r9 { fill: #98a84b;font-weight: bold } -.terminal-3079567379-r10 { fill: #8d7b39;font-weight: bold } + .terminal-2542172179-r1 { fill: #c5c8c6 } +.terminal-2542172179-r2 { fill: #d0b344 } +.terminal-2542172179-r3 { fill: #c5c8c6;font-weight: bold } +.terminal-2542172179-r4 { fill: #68a0b3;font-weight: bold } +.terminal-2542172179-r5 { fill: #868887 } +.terminal-2542172179-r6 { fill: #cc555a } +.terminal-2542172179-r7 { fill: #d0b344;font-weight: bold } +.terminal-2542172179-r8 { fill: #8a4346 } +.terminal-2542172179-r9 { fill: #98a84b;font-weight: bold } +.terminal-2542172179-r10 { fill: #8d7b39;font-weight: bold } - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -186,53 +186,53 @@ - + - - $ casanovo train --help - -Usage:casanovo train [OPTIONSTRAIN_PEAK_PATH...                              - - Train a Casanovo model on your own data.                                        - TRAIN_PEAK_PATH must be one or more annoated MGF files, such as those provided  - by MassIVE-KB, from which to train a new Casnovo model.                         - -╭─ Arguments ──────────────────────────────────────────────────────────────────╮ -*  TRAIN_PEAK_PATH    FILE[required] -╰──────────────────────────────────────────────────────────────────────────────╯ -╭─ Options ────────────────────────────────────────────────────────────────────╮ ---validation_peak_path-pFILE                    An annotated MGF file     -                                                     for validation, like      -                                                     from MassIVE-KB. Use      -                                                     this option multiple      -                                                     times to specify          -                                                     multiple files.           ---model-mTEXT                    Either the model weights  -                                                     (.ckpt file) or a URL     -                                                     pointing to the model     -                                                     weights file. If not      -                                                     provided, Casanovo will   -                                                     try to download the       -                                                     latest release            -                                                     automatically.            ---output_dir-dPATH                    The destination           -                                                     directory for output      -                                                     files.                    ---output_root-oFILE                    The root name for all     -                                                     output files.             ---config-cFILE                    The YAML configuration    -                                                     file overriding the       -                                                     default options.          ---verbosity-v[debug|info|warning|er  Set the verbosity of      -ror]  console logging           -                                                     messages. Log files are   -                                                     always set to 'debug'.    ---force_overwrite-f  Whether to overwrite      -                                                     output files.             ---help-h  Show this message and     -                                                     exit.                     -╰──────────────────────────────────────────────────────────────────────────────╯ - + + $ casanovo train --help + +Usage:casanovo train [OPTIONSTRAIN_PEAK_PATH...                              + + Train a Casanovo model on your own data.                                        + TRAIN_PEAK_PATH must be one or more annoated MGF files, such as those provided  + by MassIVE-KB, from which to train a new Casnovo model.                         + +╭─ Arguments ──────────────────────────────────────────────────────────────────╮ +*  TRAIN_PEAK_PATH    FILE[required] +╰──────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ────────────────────────────────────────────────────────────────────╮ +--validation_peak_path-pFILE                    An annotated MGF file     +                                                     for validation, like      +                                                     from MassIVE-KB. Use      +                                                     this option multiple      +                                                     times to specify          +                                                     multiple files.           +--output_dir-dPATH                    The destination           +                                                     directory for output      +                                                     files.                    +--output_root-oFILE                    The root name for all     +                                                     output files.             +--force_overwrite-f  Whether to overwrite      +                                                     output files.             +--verbosity-v[debug|info|warning|er  Set the verbosity of      +ror]  console logging           +                                                     messages. Log files are   +                                                     always set to 'debug'.    +--model-mTEXT                    Either the model weights  +                                                     (.ckpt file) or a URL     +                                                     pointing to the model     +                                                     weights file. If not      +                                                     provided, Casanovo will   +                                                     try to download the       +                                                     latest release            +                                                     automatically.            +--config-cFILE                    The YAML configuration    +                                                     file overriding the       +                                                     default options.          +--help-h  Show this message and     +                                                     exit.                     +╰──────────────────────────────────────────────────────────────────────────────╯ + diff --git a/tests/test_integration.py b/tests/test_integration.py index 7dab1b5b..29c9ed4c 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,13 +1,14 @@ import functools import subprocess +import yaml from pathlib import Path import pyteomics.mztab +import pytest from click.testing import CliRunner from casanovo import casanovo - TEST_DIR = Path(__file__).resolve().parent @@ -215,7 +216,7 @@ def test_train_and_run( assert output_filename.is_file() -def test_auxilliary_cli(tmp_path, monkeypatch): +def test_auxilliary_cli(tmp_path, mgf_small, monkeypatch): """Test the secondary CLI commands""" run = functools.partial( CliRunner().invoke, casanovo.main, catch_exceptions=False @@ -228,5 +229,30 @@ def test_auxilliary_cli(tmp_path, monkeypatch): run(["configure", "-o", "test.yaml"]) assert Path("test.yaml").exists() + with pytest.raises(FileExistsError): + run(["configure", "-o", "test.yaml"]) + + with open("casanovo.yaml") as f_in, open("small.yaml", "w") as f_out: + config = yaml.safe_load(f_in) + config["max_epochs"] = 1 + config["n_layers"] = 1 + yaml.dump(config, f_out) + + train_args = [ + "train", + "--validation_peak_path", + str(mgf_small), + "--config", + "small.yaml", + "--output_dir", + str(tmp_path), + "--output_root", + "train", + str(mgf_small), + ] + + result = run(train_args) + assert result.exit_code == 0 + res = run("version") assert res.output diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py index 00617457..a59608fd 100644 --- a/tests/unit_tests/test_unit.py +++ b/tests/unit_tests/test_unit.py @@ -573,7 +573,6 @@ def test_calc_match_score(): def test_digest_fasta_cleave(tiny_fasta_file, residues_dict): - # No missed cleavages expected_normal = [ "ATSIPAR", @@ -1092,7 +1091,6 @@ def test_get_candidates(tiny_fasta_file, residues_dict): def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict): - # Tide isotope error windows for 496.2, 2+: # 0: [980.481617, 1000.289326] # 1: [979.491114, 999.278813]