From 0b25490d97fe0a2a2e08399283c79b5f3e21a23b Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Fri, 3 May 2024 17:54:28 +0200 Subject: [PATCH 1/8] bowtie2: do not use pipes fixes https://github.com/galaxyproject/tools-iuc/issues/5983 by using a pipe bowtie2 and samtools run in parallel. since already bowtie2 alone uses more CPU than the assigned ones (less that 1 core) we should not do this, but run them serially. also properly redirects stderr. --- tools/bowtie2/bowtie2_wrapper.xml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/bowtie2/bowtie2_wrapper.xml b/tools/bowtie2/bowtie2_wrapper.xml index 54b686154b5..04c469a7a25 100644 --- a/tools/bowtie2/bowtie2_wrapper.xml +++ b/tools/bowtie2/bowtie2_wrapper.xml @@ -12,8 +12,6 @@ bowtie2 --version '$mapping_stats' + 2> >(tee '$mapping_stats' >&2) #end if ## output file #if str( $sam_options.sam_options_selector ) == "no" or (str( $sam_options.sam_opt ) == "false" and str($sam_options.reorder) == ''): - | samtools sort --no-PG -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '$output' + > alignment.sam + && samtools sort --no-PG -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '$output' alignment.sam #else if $sam_options.reorder: - | samtools view --no-PG -bS - -o '$output' + > alignment.sam + && samtools view --no-PG -b -o '$output' alignment.sam #else: > '$output' #end if From 6e1b4f2a655e4e2815bf582e0e878d00e4ea16a3 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Fri, 3 May 2024 18:28:13 +0200 Subject: [PATCH 2/8] fix linting --- tools/bowtie2/bowtie2_macros.xml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/bowtie2/bowtie2_macros.xml b/tools/bowtie2/bowtie2_macros.xml index ba6baad5b59..2ccd5ffaf17 100644 --- a/tools/bowtie2/bowtie2_macros.xml +++ b/tools/bowtie2/bowtie2_macros.xml @@ -1,6 +1,6 @@ 2.5.3 - 0 + 1 @@ -59,7 +59,7 @@ #if $use_rg #if $rg_param('read_group_id_conditional') is None #set $rg_id = $rg_auto_name - #elif $rg_param('read_group_id_conditional').do_auto_name + #elif $rg_param('read_group_id_conditional').do_auto_name == 'true' #set $rg_id = $rg_auto_name #else #set $rg_id = str($rg_param('read_group_id_conditional').ID) @@ -67,7 +67,7 @@ #if $rg_param('read_group_sm_conditional') is None #set $rg_sm = '' - #elif $rg_param('read_group_sm_conditional').do_auto_name + #elif $rg_param('read_group_sm_conditional').do_auto_name == 'true' #set $rg_sm = $rg_auto_name #else #set $rg_sm = str($rg_param('read_group_sm_conditional').SM) @@ -81,7 +81,7 @@ #if $rg_param('read_group_lb_conditional') is None #set $rg_lb = '' - #elif $rg_param('read_group_lb_conditional').do_auto_name + #elif $rg_param('read_group_lb_conditional').do_auto_name == 'true' #set $rg_lb = $rg_auto_name #else #set $rg_lb = str($rg_param('read_group_lb_conditional').LB) @@ -140,9 +140,11 @@ #set $use_rg = str($rg.rg_selector) != "do_not_set" - - - + + + + + From a3c11d3f938d31a9802cc1ba64e482a6dc705af2 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Tue, 7 May 2024 11:22:21 +0200 Subject: [PATCH 3/8] remove --no-PG --- tools/bowtie2/bowtie2_wrapper.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/bowtie2/bowtie2_wrapper.xml b/tools/bowtie2/bowtie2_wrapper.xml index 04c469a7a25..09ed190b66a 100644 --- a/tools/bowtie2/bowtie2_wrapper.xml +++ b/tools/bowtie2/bowtie2_wrapper.xml @@ -307,10 +307,10 @@ bowtie2 ## output file #if str( $sam_options.sam_options_selector ) == "no" or (str( $sam_options.sam_opt ) == "false" and str($sam_options.reorder) == ''): > alignment.sam - && samtools sort --no-PG -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '$output' alignment.sam + && samtools sort -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '$output' alignment.sam #else if $sam_options.reorder: > alignment.sam - && samtools view --no-PG -b -o '$output' alignment.sam + && samtools view -b -o '$output' alignment.sam #else: > '$output' #end if From 45812b75682a45ccdbc3fcb77dc68ec44079ba7b Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Tue, 7 May 2024 11:47:00 +0200 Subject: [PATCH 4/8] restore piping --- tools/bowtie2/bowtie2_wrapper.xml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/bowtie2/bowtie2_wrapper.xml b/tools/bowtie2/bowtie2_wrapper.xml index 09ed190b66a..d7b5f74afa3 100644 --- a/tools/bowtie2/bowtie2_wrapper.xml +++ b/tools/bowtie2/bowtie2_wrapper.xml @@ -12,6 +12,8 @@ bowtie2 --version alignment.sam - && samtools sort -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '$output' alignment.sam + ## Convert SAM output to sorted BAM + ## using the two pipe stages has the following effect + ## - mapping and sorting run in parallel, during this time sort produces + ## presorted temporary files but does not produce output (hence + ## view does not run) + ## - once mapping is finished sort will start to merge the temporary + ## files (which should be fast also on a single thread) gives the + ## sorted output to view which only compresses the files (now + ## using full parallelism again) + | samtools sort -l 0 -T "\${TMPDIR:-.}" -O bam | samtools view --no-PG -O bam -@ \${GALAXY_SLOTS:-1} -o '$output' #else if $sam_options.reorder: - > alignment.sam - && samtools view -b -o '$output' alignment.sam + | samtools view --no-PG -b -o '$output' #else: > '$output' #end if From 1c04353e93736593615a6092f1c8c0e57dbeff2d Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Tue, 7 May 2024 12:04:21 +0200 Subject: [PATCH 5/8] reduce number of used threads by one --- tools/bowtie2/bowtie2_wrapper.xml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/bowtie2/bowtie2_wrapper.xml b/tools/bowtie2/bowtie2_wrapper.xml index d7b5f74afa3..ccd9a5346db 100644 --- a/tools/bowtie2/bowtie2_wrapper.xml +++ b/tools/bowtie2/bowtie2_wrapper.xml @@ -113,12 +113,22 @@ set -o | grep -q pipefail && set -o pipefail; ln -f -s '${library.input_1}' ${read1} && #end if + +## compute number of threads to be used for bowtie2 +## the bowtie parameter -p specifies the number of alignment threads to use (in +## addition to a control thread) # just using GALAXY_SLOTS will lead to +## overcommiting ressources (in particular because there may be a samtools view +## running in parallel). +## for now we use one thread less than GALAXY_SLOTS +THREADS=\${GALAXY_SLOTS:-4} && +if [ "\$THREADS" -gt 1 ]; then (( THREADS-- )); fi && + ## execute bowtie2 bowtie2 ## number of threads --p \${GALAXY_SLOTS:-4} +-p "\$THREADS" ## index file path -x '$index_path' From b4b69510e93f8a6a0264b51d5c05e7057e1df529 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Tue, 7 May 2024 14:55:03 +0200 Subject: [PATCH 6/8] Revert "fix linting" This reverts commit 6e1b4f2a655e4e2815bf582e0e878d00e4ea16a3. --- tools/bowtie2/bowtie2_macros.xml | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tools/bowtie2/bowtie2_macros.xml b/tools/bowtie2/bowtie2_macros.xml index 2ccd5ffaf17..ba6baad5b59 100644 --- a/tools/bowtie2/bowtie2_macros.xml +++ b/tools/bowtie2/bowtie2_macros.xml @@ -1,6 +1,6 @@ 2.5.3 - 1 + 0 @@ -59,7 +59,7 @@ #if $use_rg #if $rg_param('read_group_id_conditional') is None #set $rg_id = $rg_auto_name - #elif $rg_param('read_group_id_conditional').do_auto_name == 'true' + #elif $rg_param('read_group_id_conditional').do_auto_name #set $rg_id = $rg_auto_name #else #set $rg_id = str($rg_param('read_group_id_conditional').ID) @@ -67,7 +67,7 @@ #if $rg_param('read_group_sm_conditional') is None #set $rg_sm = '' - #elif $rg_param('read_group_sm_conditional').do_auto_name == 'true' + #elif $rg_param('read_group_sm_conditional').do_auto_name #set $rg_sm = $rg_auto_name #else #set $rg_sm = str($rg_param('read_group_sm_conditional').SM) @@ -81,7 +81,7 @@ #if $rg_param('read_group_lb_conditional') is None #set $rg_lb = '' - #elif $rg_param('read_group_lb_conditional').do_auto_name == 'true' + #elif $rg_param('read_group_lb_conditional').do_auto_name #set $rg_lb = $rg_auto_name #else #set $rg_lb = str($rg_param('read_group_lb_conditional').LB) @@ -140,11 +140,9 @@ #set $use_rg = str($rg.rg_selector) != "do_not_set" - - - - - + + + From 10122de59a90de8a42cec6cc74c571df883a68a1 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Tue, 7 May 2024 14:55:29 +0200 Subject: [PATCH 7/8] bump --- tools/bowtie2/bowtie2_macros.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bowtie2/bowtie2_macros.xml b/tools/bowtie2/bowtie2_macros.xml index ba6baad5b59..4d0d5d99911 100644 --- a/tools/bowtie2/bowtie2_macros.xml +++ b/tools/bowtie2/bowtie2_macros.xml @@ -1,6 +1,6 @@ 2.5.3 - 0 + 1 From 99a6bf7101c5e01c2147624b86784c214237ff25 Mon Sep 17 00:00:00 2001 From: M Bernt Date: Wed, 8 May 2024 12:22:08 +0200 Subject: [PATCH 8/8] Improve comment wording Co-authored-by: Wolfgang Maier --- tools/bowtie2/bowtie2_wrapper.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bowtie2/bowtie2_wrapper.xml b/tools/bowtie2/bowtie2_wrapper.xml index ccd9a5346db..d8994313b4d 100644 --- a/tools/bowtie2/bowtie2_wrapper.xml +++ b/tools/bowtie2/bowtie2_wrapper.xml @@ -117,7 +117,7 @@ set -o | grep -q pipefail && set -o pipefail; ## compute number of threads to be used for bowtie2 ## the bowtie parameter -p specifies the number of alignment threads to use (in ## addition to a control thread) # just using GALAXY_SLOTS will lead to -## overcommiting ressources (in particular because there may be a samtools view +## overcommiting ressources (in particular because there may be a samtools sort or view ## running in parallel). ## for now we use one thread less than GALAXY_SLOTS THREADS=\${GALAXY_SLOTS:-4} &&