You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
We have recently implemented the new versioning system into the cutandrun pipeline and while it's a major improvement over the old system, I have a suggestion which I think improves the output and usefulness even further.
The current module parses the yaml file and outputs a table of software versions by process. This answers any questions around what process is using what software version. One of the other major questions I have when I view this report is what is the unique list of software versions that I have running in the pipeline? This is useful for the end-user if they just want a concise list of software the pipeline uses, but also from a development point of view where I wish to know if I am running multiple versions of the same software tool. While I can get this information from the current software by process view, it would be better if there was an extra view that showed this exact information.
I propose altering the custom software dumps module to output two table views: one of software version by process, and one of unique software versions.
I have already mocked up a working version of this shown below in the code snippet. The changes in the snippet along with some changes to the multiqc config file now show two software version sections in the output report. One standard output by process:
and one new output:
Here we can already see the usefulness as I was immediately able to see that we are running three versions of samtools and two different versions of python.
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
process CUSTOM_DUMPSOFTWAREVERSIONS {
label 'process_low'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename-> saveFiles(filename:filename, options:params.options, publish_dir:'pipeline_info', meta:[:], publish_by_meta:[]) }
// Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
conda (params.enable_conda ?"bioconda::multiqc=1.11":null)
if (workflow.containerEngine =='singularity'&&!params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0"
} else {
container "quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0"
}
input:
path versions
output:
path "software_versions.yml" , emit: yml
path "software_versions_mqc.yml" , emit: mqc_yml
path "software_versions_unique_mqc.yml", emit: mqc_unique_yml
path "local_versions.yml" , emit: versions
script:""" #!/usr/bin/env python import yaml import platform from textwrap import dedent def _make_versions_html(versions): html = [ dedent( '''\\ <style> #nf-core-versions tbody:nth-child(even) { background-color: #f2f2f2; } </style> <table class="table" style="width:100%" id="nf-core-versions"> <thead> <tr> <th> Process Name </th> <th> Software </th> <th> Version </th> </tr> </thead> ''' ) ] for process, tmp_versions in sorted(versions.items()): html.append("<tbody>") for i, (tool, version) in enumerate(sorted(tmp_versions.items())): html.append( dedent( f'''\\ <tr> <td><samp>{process if (i == 0) else ''}</samp></td> <td><samp>{tool}</samp></td> <td><samp>{version}</samp></td> </tr> ''' ) ) html.append("</tbody>") html.append("</table>") return "\\n".join(html) def _make_versions_unique_html(versions): unique_versions = [] for process, tmp_versions in sorted(versions.items()): for i, (tool, version) in enumerate(sorted(tmp_versions.items())): tool_version = tool + "=" + version if tool_version not in unique_versions: unique_versions.append(tool_version) unique_versions.sort() html = [ dedent( '''\\ <style> #nf-core-versions-unique tbody:nth-child(even) { background-color: #f2f2f2; } </style> <table class="table" style="width:100%" id="nf-core-versions-unique"> <thead> <tr> <th> Software </th> <th> Version </th> </tr> </thead> ''' ) ] for tool_version in unique_versions: tool_version_split = tool_version.split('=') html.append("<tbody>") html.append( dedent( f'''\\ <tr> <td><samp>{tool_version_split[0]}</samp></td> <td><samp>{tool_version_split[1]}</samp></td> </tr> ''' ) ) html.append("</tbody>") html.append("</table>") return "\\n".join(html) module_versions = {} module_versions["${getProcessName(task.process)}"] = { 'python': platform.python_version(), 'yaml': yaml.__version__ } with open("$versions") as f: workflow_versions = yaml.load(f, Loader=yaml.BaseLoader) | module_versions workflow_versions["Workflow"] = { "Nextflow": "$workflow.nextflow.version", "$workflow.manifest.name": "$workflow.manifest.version" } versions_mqc = { 'id': 'software_versions', 'section_name': '${workflow.manifest.name} Software Versions by Process', 'section_href': 'https://github.com/${workflow.manifest.name}', 'plot_type': 'html', 'description': 'are collected at run time from the software output.', 'data': _make_versions_html(workflow_versions) } versions_mqc_unique = { 'id': 'software_versions_unique', 'section_name': '${workflow.manifest.name} Software Versions', 'section_href': 'https://github.com/${workflow.manifest.name}', 'plot_type': 'html', 'description': 'are collected at run time from the software output.', 'data': _make_versions_unique_html(workflow_versions) } with open("software_versions.yml", 'w') as f: yaml.dump(workflow_versions, f, default_flow_style=False) with open("software_versions_mqc.yml", 'w') as f: yaml.dump(versions_mqc, f, default_flow_style=False) with open("software_versions_unique_mqc.yml", 'w') as f: yaml.dump(versions_mqc_unique, f, default_flow_style=False) with open('local_versions.yml', 'w') as f: yaml.dump(module_versions, f, default_flow_style=False)"""
}
The text was updated successfully, but these errors were encountered:
I like this 👍🏻 How about having just one table but with three columns - the middle column being a list of process names that use that tool and version? Then that's basically a slightly less verbose version of what we have currently but still contains all of the information so that we don't need to have two tables..
I like this 👍🏻 How about having just one table but with three columns - the middle column being a list of process names that use that tool and version? Then that's basically a slightly less verbose version of what we have currently but still contains all of the information so that we don't need to have two tables..
We have recently implemented the new versioning system into the cutandrun pipeline and while it's a major improvement over the old system, I have a suggestion which I think improves the output and usefulness even further.
The current module parses the yaml file and outputs a table of software versions by process. This answers any questions around what process is using what software version. One of the other major questions I have when I view this report is what is the unique list of software versions that I have running in the pipeline? This is useful for the end-user if they just want a concise list of software the pipeline uses, but also from a development point of view where I wish to know if I am running multiple versions of the same software tool. While I can get this information from the current software by process view, it would be better if there was an extra view that showed this exact information.
I propose altering the custom software dumps module to output two table views: one of software version by process, and one of unique software versions.
I have already mocked up a working version of this shown below in the code snippet. The changes in the snippet along with some changes to the multiqc config file now show two software version sections in the output report. One standard output by process:
and one new output:
Here we can already see the usefulness as I was immediately able to see that we are running three versions of samtools and two different versions of python.
The text was updated successfully, but these errors were encountered: