From 9a3d1b1c3c2e2d1f6d06fa92e9a95580e8529291 Mon Sep 17 00:00:00 2001 From: Maria Doyle Date: Sat, 24 Feb 2018 17:42:53 +1100 Subject: [PATCH] Replace some selects with sections for greater efficiency --- tools/cutadapt/cutadapt.xml | 256 ++++++++++++++++-------------------- tools/cutadapt/macros.xml | 54 ++++---- 2 files changed, 140 insertions(+), 170 deletions(-) diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml index 181378d5710..fb827149ed4 100644 --- a/tools/cutadapt/cutadapt.xml +++ b/tools/cutadapt/cutadapt.xml @@ -134,55 +134,54 @@ --paired-output='$out2' #end if - --error-rate=$error_rate - --times=$count - --overlap=$overlap - $no_indels - $match_read_wildcards - - #if str( $output_filtering_options.output_filtering) == "filter": - $output_filtering_options.discard - $output_filtering_options.discard_untrimmed - $output_filtering_options.no_trim - $output_filtering_options.mask_adapter - #if str($output_filtering_options.min) != '0': - --minimum-length=$output_filtering_options.min - #end if - #if str($output_filtering_options.max) != '0': - --maximum-length=$output_filtering_options.max - #end if - #if $output_filtering_options.max_n: - --max-n=$output_filtering_options.max_n - #end if - #if str( $library.type ) != "single": - #if $output_filtering_options.pair_filter: - --pair-filter=$output_filtering_options.pair_filter - #end if + --error-rate=$adapter_options.error_rate + --times=$adapter_options.count + --overlap=$adapter_options.overlap + $adapter_options.no_indels + $adapter_options.match_read_wildcards + + + $filter_options.discard + $filter_options.discard_untrimmed + $filter_options.no_trim + $filter_options.mask_adapter + #if str($filter_options.min) != '0': + --minimum-length=$filter_options.min + #end if + #if str($filter_options.max) != '0': + --maximum-length=$filter_options.max + #end if + #if $filter_options.max_n: + --max-n=$filter_options.max_n + #end if + #if str( $library.type ) != "single": + #if $filter_options.pair_filter: + --pair-filter=$filter_options.pair_filter #end if #end if - #if str( $read_modification_params.read_modification) == "modify": - #if str($read_modification_params.quality_cutoff) != '0': - --quality-cutoff=$read_modification_params.quality_cutoff - #end if - #if str($read_modification_params.nextseq_trim) != '0': - --nextseq-trim=$read_modification_params.nextseq_trim - #end if - $read_modification_params.trim_n - #if $read_modification_params.prefix != '': - --prefix="$read_modification_params.prefix" - #end if - #if $read_modification_params.suffix != '': - --suffix="$read_modification_params.suffix" - #end if - #if str($read_modification_params.length) != '0': - --length=$read_modification_params.length - #end if - #if $read_modification_params.length_tag != '': - --length-tag="$read_modification_params.length_tag" - #end if - $read_modification_params.zero_cap + + #if str($read_mod_options.quality_cutoff) != '0': + --quality-cutoff=$read_mod_options.quality_cutoff + #end if + #if str($read_mod_options.nextseq_trim) != '0': + --nextseq-trim=$read_mod_options.nextseq_trim + #end if + $read_mod_options.trim_n + #if $read_mod_options.prefix != '': + --prefix="$read_mod_options.prefix" + #end if + #if $read_mod_options.suffix != '': + --suffix="$read_mod_options.suffix" #end if + #if str($read_mod_options.length) != '0': + --length=$$read_mod_options.length + #end if + #if $read_mod_options.length_tag != '': + --length-tag="$read_mod_options.length_tag" + #end if + $read_mod_options.zero_cap + #if str( $library.type ) == "single": '${read1}' @@ -205,91 +204,73 @@ - + - - + + - + - - - - - - - - - - - - + +
+ + + + + +
+ + +
+ + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - +
+ + +
+ + + + - - - - - - - - - - + + + + + + + + +
+ + +
+ + + + + + +
- - - - - - - - - - - - - - - - - - - - - - - @@ -300,45 +281,36 @@ - (output_params['output_type'] == "additional") - (output_params['info_file'] is True) + (output_options['info_file'] is True) - (output_params['output_type'] == "additional") - (output_params['rest_file'] is True) + (output_options['rest_file'] is True) - (output_params['output_type'] == "additional") - (output_params['wildcard_file'] is True) + (output_options['wildcard_file'] is True) - (output_params['output_type'] == "additional") - (output_params['untrimmed_file'] is True) + (output_options['untrimmed_file'] is True) (library['type'] == 'paired' or library['type'] == 'paired_collection') - (output_params['output_type'] == "additional") - (output_params['untrimmed_file'] is True) + (output_options['untrimmed_file'] is True) - (output_params['output_type'] == "additional") - (output_params['too_short_file'] is True) + (output_options['too_short_file'] is True) (library['type'] == 'paired' or library['type'] == 'paired_collection') - (output_params['output_type'] == "additional") - (output_params['too_short_file'] is True) + (output_options['too_short_file'] is True) - (output_params['output_type'] == "additional") - (output_params['too_long_file'] is True) + (output_options['too_long_file'] is True) (library['type'] == 'paired' or library['type'] == 'paired_collection') - (output_params['output_type'] == "additional") - (output_params['too_long_file'] is True) + (output_options['too_long_file'] is True) @@ -422,15 +394,13 @@ ------------------- -**Cutadapt** removes adapter sequences from DNA high-throughput -sequencing data. This is usually necessary when the read length of the -machine is longer than the molecule that is sequenced, such as in -microRNA or CRISPR data. +**Cutadapt** finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads. + +Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’ sequencing adapter because the read is longer than the molecule that is sequenced, such as in microRNA, or CRISPR data, or Poly-A tails that are useful for pulling out RNA from your sample but often you don’t want them to be in your reads. -Cutadapt_ searches for the adapter in all reads and removes it when it finds it. Unless you use a filtering option, all reads that were present in the input file will also be present in the output file, some of them trimmed, some of them not. Even reads that were trimmed entirely (because the adapter was found in the very beginning) are output. All of this can be changed with options in the tool form above, explained further down. +Cutadapt_ helps with these trimming tasks by finding the adapter or primer sequences in an error-tolerant way. It can also modify and filter reads in various ways. Cutadapt searches for the adapter in all reads and removes it when it finds it. Unless you use a filtering option, all reads that were present in the input file will also be present in the output file, some of them trimmed, some of them not. Even reads that were trimmed entirely (because the adapter was found in the very beginning) are output. All of this can be changed with options in the tool form above. -The tool is based on the **Open Source** Cutadapt_ tool. See the complete `Cutadapt -documentation`_ for additional details. +The tool is based on the **Open Source** Cutadapt_ tool. See the complete `Cutadapt documentation`_ for additional details. If you use Cutadapt, please cite *Marcel, 2011* under **Citations** below. ------------------- @@ -440,7 +410,7 @@ documentation`_ for additional details. Input files for Cutadapt need to be FASTQ.GZ, FASTQ.BZ2, FASTQ or FASTA. -To trim an adapter, input the sequence e.g. AACCGGTT (with the characters: $, ^, ..., if anchored or linked). +To trim an adapter, input the sequence e.g. AACCGGTT (with the characters: **$**, **^**, **...**, if anchored or linked). ============================================= =================== **Option** **Sequence** @@ -470,6 +440,7 @@ Below is an illustration of the allowed adapter locations relative to the read a If you have reads containing Illumina TruSeq adapters, for example, follow these steps. + For Single-end reads as well as the first reads of Paired-end data: **Read 1** @@ -478,6 +449,9 @@ In the **3' (End) Adapters** option above, insert A + the “TruSeq Indexed Adap AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC + +For the second reads of Paired-end data: + **Read 2** In the **3' (End) Adapters** option above, insert the reverse complement of the “TruSeq Universal Adapter”: diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml index 97b1db17bb7..331f16d2524 100644 --- a/tools/cutadapt/macros.xml +++ b/tools/cutadapt/macros.xml @@ -38,25 +38,23 @@ ## Additional Outputs - #if str( $output_params.output_type ) == "additional": - #if $output_params.info_file: - --info-file=$info_file - #end if - #if $output_params.rest_file: - --rest-file=$rest_output - #end if - #if $output_params.wildcard_file: - --wildcard-file=$wild_output - #end if - #if $output_params.too_short_file: - --too-short-output=$too_short_output - #end if - #if $output_params.too_long_file: - --too-long-output=$too_long_output - #end if - #if $output_params.untrimmed_file: - --untrimmed-output=$untrimmed_output - #end if + #if $output_options.info_file: + --info-file=$info_file + #end if + #if $output_options.rest_file: + --rest-file=$rest_output + #end if + #if $output_options.wildcard_file: + --wildcard-file=$wild_output + #end if + #if $output_options.too_short_file: + --too-short-output=$too_short_output + #end if + #if $output_options.too_long_file: + --too-long-output=$too_long_output + #end if + #if $output_options.untrimmed_file: + --untrimmed-output=$untrimmed_output #end if ]]> @@ -99,16 +97,14 @@ ## Additional Outputs - Read 2 - #if str( $output_params.output_type ) == "additional": - #if $output_params.too_short_file: - --too-short-paired-output=$too_short_paired_output - #end if - #if $output_params.too_long_file: - --too-long-paired-output=$too_long_paired_output - #end if - #if $output_params.untrimmed_file: - --untrimmed-paired-output=$untrimmed_paired_output - #end if + #if $output_options.too_short_file: + --too-short-paired-output=$too_short_paired_output + #end if + #if $output_options.too_long_file: + --too-long-paired-output=$too_long_paired_output + #end if + #if $output_options.untrimmed_file: + --untrimmed-paired-output=$untrimmed_paired_output #end if ]]>