Merge pull request #5 from Hammarn/master

Single End Support
nf-core · Mar 1, 2018 · fbe94df · fbe94df
2 parents 88972d0 + 0700f96
commit fbe94df
Show file tree

Hide file tree

Showing 6 changed files with 119 additions and 88 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -67,7 +67,7 @@ RUN cpanm install Set::IntervalTree \
 RUN GMAP_URL="http://research-pub.gene.com/gmap/src/gmap-gsnap-2017-01-14.tar.gz" && \
     cd $SRC && \
     wget $GMAP_URL && \
-    tar xvf gmap-gsnap-2017-01-14.tar.gz && \
+    tar xvf gmap-gsnap-2017-01-14.tar.gz && rm gmap-gsnap-2017-01-14.tar.gz &&\
     cd gmap-2017-01-14 && ./configure && make && make install
 
 ENV PERL5LIB ${STAR_FUSION_HOME}/PerlLib
@@ -80,19 +80,11 @@ ENV STAR_FUSION_VERSION=1.1.0
 RUN STAR_FUSION_URL="https://github.com/STAR-Fusion/STAR-Fusion/releases/download/v${STAR_FUSION_VERSION}/STAR-Fusion_v${STAR_FUSION_VERSION}.tar.gz" && \
     cd $SRC && \
     wget $STAR_FUSION_URL && \
-    tar xvf STAR-Fusion_v${STAR_FUSION_VERSION}.tar.gz && \
+    tar xvf STAR-Fusion_v${STAR_FUSION_VERSION}.tar.gz &&  rm STAR-Fusion_v${STAR_FUSION_VERSION}.tar.gz && \
     cd STAR-Fusion_v${STAR_FUSION_VERSION} && make
 
 ENV STAR_FUSION_HOME $SRC/STAR-Fusion_v${STAR_FUSION_VERSION}
 
-# Samtools
-
-RUN SAMTOOLS_URL="https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2" && \
-   cd $SRC && \
-   wget $SAMTOOLS_URL && \
-   tar xvf samtools-1.3.1.tar.bz2 && \
-   cd samtools-1.3.1/htslib-1.3.1 && ./configure && make && make install && \
-   cd ../ && ./configure --without-curses && make && make install
 
 # Trinity
 
@@ -101,7 +93,7 @@ ENV TRINITY_VERSION=2.4.0
 RUN TRINITY_URL="https://github.com/trinityrnaseq/trinityrnaseq/archive/Trinity-v${TRINITY_VERSION}.tar.gz" && \
    cd $SRC && \
    wget $TRINITY_URL && \
-   tar xvf Trinity-v${TRINITY_VERSION}.tar.gz && \
+   tar xvf Trinity-v${TRINITY_VERSION}.tar.gz && rm Trinity-v${TRINITY_VERSION}.tar.gz &&\
    cd trinityrnaseq-Trinity-v${TRINITY_VERSION} && make
 
 

diff --git a/README.md b/README.md
@@ -14,10 +14,12 @@ Path to input reads
 
 ### `--star-fusion`
 If this flag is set to true then the pipeline will run STAR-Fusion.
+True by default
 
 ### `--fusioncatcher`
 
 If this flag is set to true then the pipeline will run Fusion Catcher.
+True by default
 
 More than one of these can be true at a time. 
 

diff --git a/bin/fusion_genes_compare.py b/bin/fusion_genes_compare.py
@@ -4,6 +4,7 @@
 import argparse
 import sys
 
+import pdb
 
 def read_files_store_data(input_files,output_file):
     fusion_dict={}
@@ -25,6 +26,7 @@ def read_files_store_data(input_files,output_file):
 
         elif input_file.endswith("summary_candidate_fusions.txt"):
             #We have a Fusion catcher file
+            pdb.set_trace()
             with open(input_file, 'r') as f:
                 for line in f:
                     if line.startswith("  * "):
@@ -39,24 +41,33 @@ def read_files_store_data(input_files,output_file):
     make_report(fusion_dict, output_file)
 
 
-def group_NGI_files(input_files,outputfile):
-    sample_pattern=re.compile("^(P[0-9]+_[0-9]+)")
-    matches=[]
+def group_files(input_files, outputfile):
+    sample_dict = {}
+    # Look through the input files and find sample names.
     for input_file in input_files:
-        try:
-            match=sample_pattern.search(os.path.basename(input_file)).group(1)
-            if match:
-                matches.append(match)
-        except AttributeError:
-            continue
-    NGI_names=matches    
-    for NGI_name in NGI_names:
-        sample_files=[]
-        for fusion_file in input_files:
-            if os.path.basename(fusion_file).startswith(NGI_name):
-                sample_files.append(fusion_file)
-        outfile="{}.fusion_comparison.txt".format(NGI_name)
-        read_files_store_data(sample_files,outfile)
+        #Check for Star-fusion
+        if input_file.endswith('star-fusion.fusion_candidates.final.abridged'):
+            key = input_file.rstrip('star-fusion.fusion_candidates.final.abridged')    
+            try:
+                #We have already encountered the fusioncatcher mate
+                sample_dict[key].append(input_file)
+            except KeyError:
+                sample_dict[key]=[input_file]
+        #We have fusioncatcher
+        elif input_file.endswith("summary_candidate_fusions.txt"):    
+            try:
+                key = input_file.rstrip('summary_candidate_fusions.txt')   
+                try:
+                    #We have already encountered the star-fusion mate
+                    sample_dict[key].append(input_file)
+                except KeyError:
+                    sample_dict[key]=[input_file]
+            except KeyError:
+                continue
+
+    outfile="{}.fusion_comparison.txt".format(sample_dict.keys()[0])
+    read_files_store_data(sample_dict.values()[0],outfile)  
+
 
 
 def make_report(fusion_dict, output_file):
@@ -84,16 +95,19 @@ def make_report(fusion_dict, output_file):
     content+="## Number of Fusion genes detected with FusionCatcher: {} \n".format(len_fc)
     content +="##FUSIONCATCHER\tSTAR-FUSION\tBOTH\n"
     ##cleanup
+
+
     gene_in_both=[item.rstrip() for item in gene_in_both]
     gene_star_only=[item.rstrip() for item in gene_star_only]
     gene_fc_only=[item.rstrip() for item in gene_fc_only]
 
+    pdb.set_trace()
     maxlen = max([len(l) for l in [gene_in_both,gene_star_only,gene_fc_only]])
-    for idx in range(0, maxlen-1):
-	both_str = gene_in_both[idx] if len(gene_in_both) > idx else ''
-	star_str = gene_star_only[idx] if len(gene_star_only) > idx else ''
-	fc_str = gene_fc_only[idx] if len(gene_fc_only) > idx else ''
-	content += "{}\t{}\t{}\n".format(fc_str, star_str, both_str)    
+    for idx in range(0, maxlen):
+        both_str = gene_in_both[idx] if len(gene_in_both) > idx else ''
+        star_str = gene_star_only[idx] if len(gene_star_only) > idx else ''
+        fc_str = gene_fc_only[idx] if len(gene_fc_only) > idx else ''
+        content += "{}\t{}\t{}\n".format(fc_str, star_str, both_str)    
 
     with open(output_file, 'w') as f:
         f.write(content)
@@ -106,5 +120,4 @@ def make_report(fusion_dict, output_file):
     parser.add_argument("output_file", metavar='Output file', nargs='?', default='fusion_comparison.txt',
                                    help="File to save output to. ")
     args = parser.parse_args() 
-    group_NGI_files(args.input_files,args.output_file)
-    read_files_store_data(args.input_files,args.output_file)
+    group_files(args.input_files,args.output_file)
diff --git a/conf/uppmax.config b/conf/uppmax.config
@@ -14,9 +14,7 @@ $star_fusion.module = ['bioinfo-tools', 'star-fusion']
 $fusioninspector.module = ['bioinfo-tools', 'star-fusion', 'trinity/2014-07-17', 'samtools/1.5', 'htslib/1.5', 'perl_modules']
 
 params {
-  references{
     fusioncatcer_data = '/sw/apps/bioinfo/FusionCatcher/1.00/data/human_v90'
     star_fusion_reference  = '/proj/sllstore2017079/nobackup/private/rickard/STAR_ref'
     fusioncatcher_data_dir= '/sw/apps/bioinfo/FusionCatcher/1.00/data/human_v90'
-  }
 }
diff --git a/main.nf b/main.nf
@@ -24,19 +24,35 @@ version = '0.1'
 params.project = false
 params.reads = "data/*{1,2}.fastq.gz"
 params.email = false
-params.star-fusion = true
+params.star_fusion = true
 params.inspector = false
 params.fusioncatcher = true
 params.sensitivity = 'sensitive'
 params.clusterOptions = false
 params.outdir = './results'
 params.fc_extra_options = ''
+
+params.singleEnd = false
 Channel
-    .fromFilePairs( params.reads, size: 2 )
+    .fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 ) 
     .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}" }
     .into { read_files_star_fusion; fusion_inspector_reads; fusioncatcher_reads}
 
 
+// Validate inputs
+if( params.star_fusion_reference && params.star_fusion ){
+    star_fusion_reference = Channel
+        .fromPath(params.star_fusion_reference)
+        .ifEmpty { exit 1, "STAR-fusion reference not found: ${params.star_fusion_reference}" }
+}
+(star_fusion_reference,star_fusion_reference_fusioninspector) = star_fusion_reference.into(2)
+
+if( params.fusioncatcher_data_dir && params.fusioncatcher ){
+    fusioncatcher_data_dir = Channel
+        .fromPath(params.fusioncatcher_data_dir)
+        .ifEmpty { exit 1, "FusionCatcher data directory not found: ${params.fusioncatcher_data_dir}" }
+}
+
 /*
  * STAR-Fusion
  */
@@ -46,58 +62,24 @@ process star_fusion{
 
     input:
     set val (name), file(reads) from read_files_star_fusion
-
+    file star_fusion_reference from star_fusion_reference.collect()
     output:
     file '*final.abridged*' into star_fusion_abridged
-    file '*star-fusion.fusion_candidates.final.abridged.FFPM' into fusion_candidates,fusion_candidates_list
+    file '*star_fusion.fusion_candidates.final.abridged.FFPM' into fusion_candidates,fusion_candidates_list
 
-    when: params.star-fusion
+    when: params.star_fusion
 
     script:
     """
     STAR-Fusion \\
-        --genome_lib_dir ${params.star_fusion_reference}\\
-        --left_fq ${reads[0]} \\
-        --right_fq ${reads[1]} \\
-        --output_dir .
-    for f in *
-    do
-    mv \$f $name\$f
-    done
-    """
-}
-
-
-/*
- *  -  FusionInspector
- */
-process fusioninspector {
-    tag "$name"
-    publishDir "${params.outdir}/FusionInspector",  mode: 'copy'  
-    input:
-    set val (name), file(reads) from fusion_inspector_reads
-    file fusion_candidates
-
-    output:
-    file '*' into fusioninspector_results
-
-    when: params.inspector
-
-    script:
-    """
-    FusionInspector \\
-        --fusions $fusion_candidates \\
-        --genome_lib ${params.star_fusion_reference} \\
+        --genome_lib_dir ${star_fusion_reference}\\
         --left_fq ${reads[0]} \\
         --right_fq ${reads[1]} \\
-        --out_dir . \\ 
-        --out_prefix finspector \\
-        --prep_for_IGV
+        --output_dir  $name
     """
 }
 
 
-
 /*
  * Fusion Catcher
 */
@@ -109,34 +91,50 @@ process fusioncatcher {
 
     input:
     set val (name), file(reads) from fusioncatcher_reads
+    file fusioncatcher_data_dir from fusioncatcher_data_dir.collect()
 
     output:
     file '*.{txt,log,zip}' into fusioncatcher
-
     when: params.fusioncatcher
 
     script:
+    if (params.singleEnd) {
+
     """
+    mkdir ${reads}_data
+    mv ${reads} ${reads}_data/
     fusioncatcher \\
-        -d ${params.fusioncatcher_data_dir} \\
+        -d $fusioncatcher_data_dir \\
+        -i ${reads}_data \\
+        --threads ${task.cpus} \\
+        -o $name \\
+        --skip-blat \\
+        --single-end \\
+        ${params.fc_extra_options}
+    """
+    } else {
+
+    """
+    fusioncatcher \\
+        -d $fusioncatcher_data_dir \\
         -i ${reads[0]},${reads[1]} \\
         --threads ${task.cpus} \\
         --${params.sensitivity} \\
-        -o . \\
+        -o $name \\
+        --skip-blat \\
         ${params.fc_extra_options}
-    for f in *.{txt,log,zip}
-    do
-    mv \$f $name\$f
-    done
     """
 }
+}
+
+
 
 process fusion_genes_compare {
 
     publishDir "${params.outdir}/Comparative_shortlist",  mode: 'copy'
 
     input:
-    file ('*star-fusion.fusion_candidates.final.abridged') from fusion_candidates_list.collect() 
+    file ('*star_fusion.fusion_candidates.final.abridged') from fusion_candidates_list.collect() 
     file ('*summary_candidate_fusions.txt') from fusioncatcher.collect()
 
     output:
@@ -152,4 +150,33 @@ process fusion_genes_compare {
 }
 
 
+/*
+ *  -  FusionInspector
+ */
+process fusioninspector {
+    tag "$name"
+    publishDir "${params.outdir}/FusionInspector",  mode: 'copy'  
+    input:
+    set val (name), file(reads) from fusion_inspector_reads
+    file fusion_candidates
+    file star_fusion_reference from star_fusion_reference_fusioninspector.collect()
+
+    output:
+    file '*' into fusioninspector_results
+
+    when: params.inspector
+
+    script:
+    """
+    FusionInspector \\
+        --fusions $fusion_candidates \\
+        --genome_lib $star_fusion_reference \\
+        --left_fq ${reads[0]} \\
+        --right_fq ${reads[1]} \\
+        --out_dir . \\ 
+        --out_prefix finspector \\
+        --prep_for_IGV
+    """
+}
+
 
diff --git a/nextflow.config b/nextflow.config
@@ -12,8 +12,7 @@ vim: syntax=groovy
 
 // Variable to specify the docker / singularity image tag to use
 // Picks up on use of -r 1.3 in nextflow command
-container_tag = workflow.revision ? workflow.revision : 'latest'
-wf_container = "scilifelab/ngi-rnafusion:${container_tag}"
+wf_container = { "scilifelab/ngi-rnafusion:${workflow.revision ? workflow.revision : 'latest'}" }