From 6fae26eeffbc59818c86e790e1a63eddab3cccf0 Mon Sep 17 00:00:00 2001 From: Santiago Sanchez Date: Wed, 16 Oct 2024 12:19:59 +0100 Subject: [PATCH 1/2] add sortgff and gff3trimfasta modules --- .../ebi-metagenomics/gff3trimfasta/main.nf | 39 +++++++++++++ .../ebi-metagenomics/gff3trimfasta/meta.yml | 48 ++++++++++++++++ .../ebi-metagenomics/jbrowse/sortgff/main.nf | 41 ++++++++++++++ .../ebi-metagenomics/jbrowse/sortgff/meta.yml | 56 +++++++++++++++++++ 4 files changed, 184 insertions(+) create mode 100644 modules/ebi-metagenomics/gff3trimfasta/main.nf create mode 100644 modules/ebi-metagenomics/gff3trimfasta/meta.yml create mode 100644 modules/ebi-metagenomics/jbrowse/sortgff/main.nf create mode 100644 modules/ebi-metagenomics/jbrowse/sortgff/meta.yml diff --git a/modules/ebi-metagenomics/gff3trimfasta/main.nf b/modules/ebi-metagenomics/gff3trimfasta/main.nf new file mode 100644 index 0000000..caafae0 --- /dev/null +++ b/modules/ebi-metagenomics/gff3trimfasta/main.nf @@ -0,0 +1,39 @@ +process GFF3_TRIM_FASTA { + tag "$meta.id" + label 'process_single' + + container 'quay.io/biocontainers/gnu-wget:1.18--h36e9172_9' + + input: + tuple val(meta), path(tab) + + output: + tuple val(meta), path("*._trimmed.gff"), optional:true, emit: gff + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" +""" + + awk '/##FASTA/{exit}1' "$tab" > "${prefix}_trimmed.gff" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(awk --version 2>&1 | grep -o '[0-9]\{8\}') + END_VERSIONS + """ + + stub: + """ + touch "${prefix}_trimmed.gff" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(awk --version 2>&1 | grep -o '[0-9]\{8\}') + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/ebi-metagenomics/gff3trimfasta/meta.yml b/modules/ebi-metagenomics/gff3trimfasta/meta.yml new file mode 100644 index 0000000..b932f81 --- /dev/null +++ b/modules/ebi-metagenomics/gff3trimfasta/meta.yml @@ -0,0 +1,48 @@ +name: gff3trimfasta +description: Trim the FASTA section from a GFF3 file +keywords: + - trim + - gff + - fasta + - awk +tools: + - awk: + description: A program that you can use to select particular records in a file and perform operations upon them. + homepage: https://www.gnu.org/software/gawk/ + documentation: https://www.gnu.org/software/gawk/manual/gawk.html + licence: ["GPL-3.0-or-later"] + identifier: biotools:awk +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tab: + type: file + description: | + GFF3 file that includes a FASTA section to be trimmed + pattern: "*.{gff,gff3}" +output: + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_trimmed.gff": + type: file + description: GFF file with the FASTA section trimmed + pattern: "*_trimmed.gff" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@SantiagoSanchezF" + - "@tgurbich" + - "@vikasguptaebi" +maintainers: + - "@tgurbich" + - "@vikasguptaebi" diff --git a/modules/ebi-metagenomics/jbrowse/sortgff/main.nf b/modules/ebi-metagenomics/jbrowse/sortgff/main.nf new file mode 100644 index 0000000..6f54f52 --- /dev/null +++ b/modules/ebi-metagenomics/jbrowse/sortgff/main.nf @@ -0,0 +1,41 @@ +process TABIX_TABIX { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container 'quay.io/biocontainers/gnu-wget:1.18--h36e9172_9' + + input: + tuple val(meta), path(tab) + + output: + tuple val(meta), path("*._sorted.gff"), optional:true, emit: gff + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + """ + + (grep "^#" $tab; grep -v "^#" $tab | sort -t"`printf '\t'`" -k1,1 -k4,4n) > ${prefix}.sorted.gff; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + grep: \$(grep --version| awk '{print $NF}') + END_VERSIONS + """ + + stub: + """ + touch ${prefix}.sorted.gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + grep: \$(grep --version| awk '{print $NF}') + END_VERSIONS + """ +} diff --git a/modules/ebi-metagenomics/jbrowse/sortgff/meta.yml b/modules/ebi-metagenomics/jbrowse/sortgff/meta.yml new file mode 100644 index 0000000..9387aa0 --- /dev/null +++ b/modules/ebi-metagenomics/jbrowse/sortgff/meta.yml @@ -0,0 +1,56 @@ +name: sortgff +description: sort gff file for jbrowse use +keywords: + - gff + - jbrowse + - sort + - grep +tools: + - grep: + description: Print lines matching a pattern + homepage: https://www.gnu.org/software/grep/ + documentation: https://www.gnu.org/software/grep/manual/ + licence: ["GPL-3.0-or-later"] + identifier: biotools:grep + - sort: + description: Sort lines of text files + homepage: https://www.gnu.org/software/coreutils/ + documentation: https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html + licence: ["GPL-3.0-or-later"] + identifier: biotools:sort +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tab: + type: file + description: | + tab-delimited genome position file (e.g., GFF file) + pattern: "*.{gff,gff3}" +output: + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sorted.gff": + type: file + description: Sorted GFF file + pattern: "*.sorted.gff" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@SantiagoSanchezF" + - "@tgurbich" + - "@vikasguptaebi" +maintainers: + - "@tgurbich" + - "@vikasguptaebi" + + \ No newline at end of file From e1eeb2ae2588f1042c00f6a4266017299365382e Mon Sep 17 00:00:00 2001 From: Santiago Sanchez Date: Wed, 16 Oct 2024 13:34:23 +0100 Subject: [PATCH 2/2] Changes after Martins suggestions --- modules/ebi-metagenomics/gff3trimfasta/main.nf | 8 ++++---- modules/ebi-metagenomics/gff3trimfasta/meta.yml | 2 +- modules/ebi-metagenomics/jbrowse/sortgff/main.nf | 7 +++---- modules/ebi-metagenomics/jbrowse/sortgff/meta.yml | 6 ++---- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/modules/ebi-metagenomics/gff3trimfasta/main.nf b/modules/ebi-metagenomics/gff3trimfasta/main.nf index caafae0..ffa9e4e 100644 --- a/modules/ebi-metagenomics/gff3trimfasta/main.nf +++ b/modules/ebi-metagenomics/gff3trimfasta/main.nf @@ -2,14 +2,14 @@ process GFF3_TRIM_FASTA { tag "$meta.id" label 'process_single' - container 'quay.io/biocontainers/gnu-wget:1.18--h36e9172_9' + container 'quay.io/biocontainers/gawk:4.1.3--0' input: tuple val(meta), path(tab) output: - tuple val(meta), path("*._trimmed.gff"), optional:true, emit: gff - path "versions.yml" , emit: versions + tuple val(meta), path("*._trimmed.gff"), optional: true, emit: gff + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -17,7 +17,7 @@ process GFF3_TRIM_FASTA { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" -""" + """ awk '/##FASTA/{exit}1' "$tab" > "${prefix}_trimmed.gff" diff --git a/modules/ebi-metagenomics/gff3trimfasta/meta.yml b/modules/ebi-metagenomics/gff3trimfasta/meta.yml index b932f81..c40b586 100644 --- a/modules/ebi-metagenomics/gff3trimfasta/meta.yml +++ b/modules/ebi-metagenomics/gff3trimfasta/meta.yml @@ -45,4 +45,4 @@ authors: - "@vikasguptaebi" maintainers: - "@tgurbich" - - "@vikasguptaebi" + - "@vikasguptaebi" \ No newline at end of file diff --git a/modules/ebi-metagenomics/jbrowse/sortgff/main.nf b/modules/ebi-metagenomics/jbrowse/sortgff/main.nf index 6f54f52..a316d92 100644 --- a/modules/ebi-metagenomics/jbrowse/sortgff/main.nf +++ b/modules/ebi-metagenomics/jbrowse/sortgff/main.nf @@ -1,9 +1,9 @@ -process TABIX_TABIX { +process SORT_GFF { tag "$meta.id" label 'process_single' conda "${moduleDir}/environment.yml" - container 'quay.io/biocontainers/gnu-wget:1.18--h36e9172_9' + container 'quay.io/biocontainers/coreutils:8.25--0' input: tuple val(meta), path(tab) @@ -20,7 +20,6 @@ process TABIX_TABIX { prefix = task.ext.prefix ?: "${meta.id}" """ - (grep "^#" $tab; grep -v "^#" $tab | sort -t"`printf '\t'`" -k1,1 -k4,4n) > ${prefix}.sorted.gff; cat <<-END_VERSIONS > versions.yml @@ -38,4 +37,4 @@ process TABIX_TABIX { grep: \$(grep --version| awk '{print $NF}') END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/ebi-metagenomics/jbrowse/sortgff/meta.yml b/modules/ebi-metagenomics/jbrowse/sortgff/meta.yml index 9387aa0..e7b0846 100644 --- a/modules/ebi-metagenomics/jbrowse/sortgff/meta.yml +++ b/modules/ebi-metagenomics/jbrowse/sortgff/meta.yml @@ -27,7 +27,7 @@ input: - tab: type: file description: | - tab-delimited genome position file (e.g., GFF file) + GFF file containig genomic annotations pattern: "*.{gff,gff3}" output: - gff: @@ -51,6 +51,4 @@ authors: - "@vikasguptaebi" maintainers: - "@tgurbich" - - "@vikasguptaebi" - - \ No newline at end of file + - "@vikasguptaebi" \ No newline at end of file