From 7d94c9645d2dc52411745b40465f18d478e95b97 Mon Sep 17 00:00:00 2001 From: Alexey Gurevich Date: Tue, 7 Jun 2022 14:28:43 +0300 Subject: [PATCH] pipeline: new option "-report-all-metrics" added --- CHANGES.txt | 6 ++++-- manual.html | 9 +++++++++ quast_libs/options_parser.py | 4 ++++ quast_libs/qconfig.py | 5 ++++- quast_libs/reporting.py | 4 ++++ 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index aa4b714e3f..6a0e2035ad 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -12,8 +12,10 @@ - auN, auNG, auNA, auNGA (areas under the Nx/NGx/NAx/NGx curves; for more detail see https://lh3.github.io/2020/04/08/a-new-metric-on-assembly-contiguity or the manual). - 3. New option: - - "--local-mis-size" for setting minimal local misassembly size (default is 200, was 86). + 3. New options: + - "--local-mis-size" for setting minimal local misassembly size (default is 200, was 86); + - "--report-all-metrics" for keeping the same content (list of metrics) in the main report + independently of inputs/options. 4. MetaQUAST change: - preserving explicitly specified reference genomes in the reports (they were previously diff --git a/manual.html b/manual.html index 1f2c756649..358bd79d70 100644 --- a/manual.html +++ b/manual.html @@ -626,6 +626,15 @@

2.3 Command line options

By default, the value is automatically detected as the median insert size of provided paired-end reads. If no paired-end reads are provided, 255 is used as the default value. +
+--report-all-metrics +
+Keep all quality metrics in the main report. Usually, all not-relevant metrics are not included in the report, e.g., reference-based metrics in the no-reference mode. +Also, if metric values are undefined ('-') for all input assemblies, the metric is removed from the report. + The only exception from the latter rule is NG/NGA/LG/LGA-like metrics that explicitly contain '-' if reference was specified but (the aligned parts of) all assemblies are too small to reach, e.g., NG50 (NGA50). +
+ The --report-all-metrics option changes this behaviour and forces QUAST (metaQUAST) to keep all metrics that can be reported in principle in the report. In this case, the number of rows in the main report is always the same independently of inputs and running mode/options, which simplifies automatic parsing of the report. +
--plots-format <format>
diff --git a/quast_libs/options_parser.py b/quast_libs/options_parser.py index 6cd7fb3875..60af8aa17f 100644 --- a/quast_libs/options_parser.py +++ b/quast_libs/options_parser.py @@ -625,6 +625,10 @@ def parse_options(logger, quast_args): callback_kwargs={'min_value': qconfig.optimal_assembly_min_IS, 'max_value': qconfig.optimal_assembly_max_IS}) ), + (['--report-all-metrics'], dict( + dest='report_all_metrics', + action='store_true') + ), (['--plots-format'], dict( dest='plot_extension', type='string', diff --git a/quast_libs/qconfig.py b/quast_libs/qconfig.py index d82b3d6c2a..1e7270cbd1 100644 --- a/quast_libs/qconfig.py +++ b/quast_libs/qconfig.py @@ -84,6 +84,7 @@ run_busco = False large_genome = False use_kmc = False +report_all_metrics = False # ideal assembly section optimal_assembly = False @@ -486,6 +487,8 @@ def usage(show_hidden=False, mode=None, short=True, stream=sys.stdout): stream.write(" --upper-bound-min-con Minimal number of 'connecting reads' needed for joining upper bound contigs into a scaffold\n") stream.write(" [default: %d for mate-pairs and %d for long reads]\n" % (MIN_CONNECT_MP, MIN_CONNECT_LR)) stream.write(" --est-insert-size Use provided insert size in upper bound assembly simulation [default: auto detect from reads or %d]\n" % optimal_assembly_default_IS) + stream.write(" --report-all-metrics Keep all quality metrics in the main report even if their values are '-' for all assemblies or \n" + " if they are not applicable (e.g., reference-based metrics in the no-reference mode)\n") stream.write(" --plots-format Save plots in specified format [default: %s].\n" % plot_extension) stream.write(" Supported formats: %s\n" % ', '.join(supported_plot_extensions)) stream.write(" --memory-efficient Run everything using one thread, separately per each assembly.\n") @@ -506,7 +509,7 @@ def usage(show_hidden=False, mode=None, short=True, stream=sys.stdout): stream.write(" --sam Comma-separated list of SAM alignment files obtained by aligning reads to assemblies\n" " (use the same order as for files with contigs)\n") stream.write(" --bam Comma-separated list of BAM alignment files obtained by aligning reads to assemblies\n" - " (use the same order as for files with contigs)\n") + " (use the same order as for files with contigs)\n") stream.write(" Reads (or SAM/BAM file) are used for structural variation detection and\n") stream.write(" coverage histogram building in Icarus\n") stream.write(" --sv-bedpe File with structural variations (in BEDPE format)\n") diff --git a/quast_libs/reporting.py b/quast_libs/reporting.py index 438f5c9127..d1f764f8c5 100644 --- a/quast_libs/reporting.py +++ b/quast_libs/reporting.py @@ -457,6 +457,10 @@ def table(order=Fields.order, ref_name=None): required_fields = [] def define_required_fields(): + if qconfig.report_all_metrics: + required_fields.extend(Fields.order) + return + # if a reference is specified, keep the same number of Nx/Lx-like genome-based metrics in different reports # (no matter what percent of the genome was assembled) report = get(assembly_fpaths[0], ref_name=ref_name)