Skip to content

Commit

Permalink
Merge pull request #5 from Hammarn/master
Browse files Browse the repository at this point in the history
Single End Support
  • Loading branch information
Hammarn authored Mar 1, 2018
2 parents 88972d0 + 0700f96 commit fbe94df
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 88 deletions.
14 changes: 3 additions & 11 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ RUN cpanm install Set::IntervalTree \
RUN GMAP_URL="http://research-pub.gene.com/gmap/src/gmap-gsnap-2017-01-14.tar.gz" && \
cd $SRC && \
wget $GMAP_URL && \
tar xvf gmap-gsnap-2017-01-14.tar.gz && \
tar xvf gmap-gsnap-2017-01-14.tar.gz && rm gmap-gsnap-2017-01-14.tar.gz &&\
cd gmap-2017-01-14 && ./configure && make && make install

ENV PERL5LIB ${STAR_FUSION_HOME}/PerlLib
Expand All @@ -80,19 +80,11 @@ ENV STAR_FUSION_VERSION=1.1.0
RUN STAR_FUSION_URL="https://github.com/STAR-Fusion/STAR-Fusion/releases/download/v${STAR_FUSION_VERSION}/STAR-Fusion_v${STAR_FUSION_VERSION}.tar.gz" && \
cd $SRC && \
wget $STAR_FUSION_URL && \
tar xvf STAR-Fusion_v${STAR_FUSION_VERSION}.tar.gz && \
tar xvf STAR-Fusion_v${STAR_FUSION_VERSION}.tar.gz && rm STAR-Fusion_v${STAR_FUSION_VERSION}.tar.gz && \
cd STAR-Fusion_v${STAR_FUSION_VERSION} && make

ENV STAR_FUSION_HOME $SRC/STAR-Fusion_v${STAR_FUSION_VERSION}

# Samtools

RUN SAMTOOLS_URL="https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2" && \
cd $SRC && \
wget $SAMTOOLS_URL && \
tar xvf samtools-1.3.1.tar.bz2 && \
cd samtools-1.3.1/htslib-1.3.1 && ./configure && make && make install && \
cd ../ && ./configure --without-curses && make && make install

# Trinity

Expand All @@ -101,7 +93,7 @@ ENV TRINITY_VERSION=2.4.0
RUN TRINITY_URL="https://github.com/trinityrnaseq/trinityrnaseq/archive/Trinity-v${TRINITY_VERSION}.tar.gz" && \
cd $SRC && \
wget $TRINITY_URL && \
tar xvf Trinity-v${TRINITY_VERSION}.tar.gz && \
tar xvf Trinity-v${TRINITY_VERSION}.tar.gz && rm Trinity-v${TRINITY_VERSION}.tar.gz &&\
cd trinityrnaseq-Trinity-v${TRINITY_VERSION} && make


Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ Path to input reads

### `--star-fusion`
If this flag is set to true then the pipeline will run STAR-Fusion.
True by default

### `--fusioncatcher`

If this flag is set to true then the pipeline will run Fusion Catcher.
True by default

More than one of these can be true at a time.

Expand Down
61 changes: 37 additions & 24 deletions bin/fusion_genes_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import argparse
import sys

import pdb

def read_files_store_data(input_files,output_file):
fusion_dict={}
Expand All @@ -25,6 +26,7 @@ def read_files_store_data(input_files,output_file):

elif input_file.endswith("summary_candidate_fusions.txt"):
#We have a Fusion catcher file
pdb.set_trace()
with open(input_file, 'r') as f:
for line in f:
if line.startswith(" * "):
Expand All @@ -39,24 +41,33 @@ def read_files_store_data(input_files,output_file):
make_report(fusion_dict, output_file)


def group_NGI_files(input_files,outputfile):
sample_pattern=re.compile("^(P[0-9]+_[0-9]+)")
matches=[]
def group_files(input_files, outputfile):
sample_dict = {}
# Look through the input files and find sample names.
for input_file in input_files:
try:
match=sample_pattern.search(os.path.basename(input_file)).group(1)
if match:
matches.append(match)
except AttributeError:
continue
NGI_names=matches
for NGI_name in NGI_names:
sample_files=[]
for fusion_file in input_files:
if os.path.basename(fusion_file).startswith(NGI_name):
sample_files.append(fusion_file)
outfile="{}.fusion_comparison.txt".format(NGI_name)
read_files_store_data(sample_files,outfile)
#Check for Star-fusion
if input_file.endswith('star-fusion.fusion_candidates.final.abridged'):
key = input_file.rstrip('star-fusion.fusion_candidates.final.abridged')
try:
#We have already encountered the fusioncatcher mate
sample_dict[key].append(input_file)
except KeyError:
sample_dict[key]=[input_file]
#We have fusioncatcher
elif input_file.endswith("summary_candidate_fusions.txt"):
try:
key = input_file.rstrip('summary_candidate_fusions.txt')
try:
#We have already encountered the star-fusion mate
sample_dict[key].append(input_file)
except KeyError:
sample_dict[key]=[input_file]
except KeyError:
continue

outfile="{}.fusion_comparison.txt".format(sample_dict.keys()[0])
read_files_store_data(sample_dict.values()[0],outfile)



def make_report(fusion_dict, output_file):
Expand Down Expand Up @@ -84,16 +95,19 @@ def make_report(fusion_dict, output_file):
content+="## Number of Fusion genes detected with FusionCatcher: {} \n".format(len_fc)
content +="##FUSIONCATCHER\tSTAR-FUSION\tBOTH\n"
##cleanup


gene_in_both=[item.rstrip() for item in gene_in_both]
gene_star_only=[item.rstrip() for item in gene_star_only]
gene_fc_only=[item.rstrip() for item in gene_fc_only]

pdb.set_trace()
maxlen = max([len(l) for l in [gene_in_both,gene_star_only,gene_fc_only]])
for idx in range(0, maxlen-1):
both_str = gene_in_both[idx] if len(gene_in_both) > idx else ''
star_str = gene_star_only[idx] if len(gene_star_only) > idx else ''
fc_str = gene_fc_only[idx] if len(gene_fc_only) > idx else ''
content += "{}\t{}\t{}\n".format(fc_str, star_str, both_str)
for idx in range(0, maxlen):
both_str = gene_in_both[idx] if len(gene_in_both) > idx else ''
star_str = gene_star_only[idx] if len(gene_star_only) > idx else ''
fc_str = gene_fc_only[idx] if len(gene_fc_only) > idx else ''
content += "{}\t{}\t{}\n".format(fc_str, star_str, both_str)

with open(output_file, 'w') as f:
f.write(content)
Expand All @@ -106,5 +120,4 @@ def make_report(fusion_dict, output_file):
parser.add_argument("output_file", metavar='Output file', nargs='?', default='fusion_comparison.txt',
help="File to save output to. ")
args = parser.parse_args()
group_NGI_files(args.input_files,args.output_file)
read_files_store_data(args.input_files,args.output_file)
group_files(args.input_files,args.output_file)
2 changes: 0 additions & 2 deletions conf/uppmax.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ $star_fusion.module = ['bioinfo-tools', 'star-fusion']
$fusioninspector.module = ['bioinfo-tools', 'star-fusion', 'trinity/2014-07-17', 'samtools/1.5', 'htslib/1.5', 'perl_modules']

params {
references{
fusioncatcer_data = '/sw/apps/bioinfo/FusionCatcher/1.00/data/human_v90'
star_fusion_reference = '/proj/sllstore2017079/nobackup/private/rickard/STAR_ref'
fusioncatcher_data_dir= '/sw/apps/bioinfo/FusionCatcher/1.00/data/human_v90'
}
}
125 changes: 76 additions & 49 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,35 @@ version = '0.1'
params.project = false
params.reads = "data/*{1,2}.fastq.gz"
params.email = false
params.star-fusion = true
params.star_fusion = true
params.inspector = false
params.fusioncatcher = true
params.sensitivity = 'sensitive'
params.clusterOptions = false
params.outdir = './results'
params.fc_extra_options = ''

params.singleEnd = false
Channel
.fromFilePairs( params.reads, size: 2 )
.fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 )
.ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}" }
.into { read_files_star_fusion; fusion_inspector_reads; fusioncatcher_reads}


// Validate inputs
if( params.star_fusion_reference && params.star_fusion ){
star_fusion_reference = Channel
.fromPath(params.star_fusion_reference)
.ifEmpty { exit 1, "STAR-fusion reference not found: ${params.star_fusion_reference}" }
}
(star_fusion_reference,star_fusion_reference_fusioninspector) = star_fusion_reference.into(2)

if( params.fusioncatcher_data_dir && params.fusioncatcher ){
fusioncatcher_data_dir = Channel
.fromPath(params.fusioncatcher_data_dir)
.ifEmpty { exit 1, "FusionCatcher data directory not found: ${params.fusioncatcher_data_dir}" }
}

/*
* STAR-Fusion
*/
Expand All @@ -46,58 +62,24 @@ process star_fusion{

input:
set val (name), file(reads) from read_files_star_fusion

file star_fusion_reference from star_fusion_reference.collect()
output:
file '*final.abridged*' into star_fusion_abridged
file '*star-fusion.fusion_candidates.final.abridged.FFPM' into fusion_candidates,fusion_candidates_list
file '*star_fusion.fusion_candidates.final.abridged.FFPM' into fusion_candidates,fusion_candidates_list

when: params.star-fusion
when: params.star_fusion

script:
"""
STAR-Fusion \\
--genome_lib_dir ${params.star_fusion_reference}\\
--left_fq ${reads[0]} \\
--right_fq ${reads[1]} \\
--output_dir .
for f in *
do
mv \$f $name\$f
done
"""
}


/*
* - FusionInspector
*/
process fusioninspector {
tag "$name"
publishDir "${params.outdir}/FusionInspector", mode: 'copy'
input:
set val (name), file(reads) from fusion_inspector_reads
file fusion_candidates

output:
file '*' into fusioninspector_results

when: params.inspector

script:
"""
FusionInspector \\
--fusions $fusion_candidates \\
--genome_lib ${params.star_fusion_reference} \\
--genome_lib_dir ${star_fusion_reference}\\
--left_fq ${reads[0]} \\
--right_fq ${reads[1]} \\
--out_dir . \\
--out_prefix finspector \\
--prep_for_IGV
--output_dir $name
"""
}



/*
* Fusion Catcher
*/
Expand All @@ -109,34 +91,50 @@ process fusioncatcher {

input:
set val (name), file(reads) from fusioncatcher_reads
file fusioncatcher_data_dir from fusioncatcher_data_dir.collect()

output:
file '*.{txt,log,zip}' into fusioncatcher

when: params.fusioncatcher

script:
if (params.singleEnd) {

"""
mkdir ${reads}_data
mv ${reads} ${reads}_data/
fusioncatcher \\
-d ${params.fusioncatcher_data_dir} \\
-d $fusioncatcher_data_dir \\
-i ${reads}_data \\
--threads ${task.cpus} \\
-o $name \\
--skip-blat \\
--single-end \\
${params.fc_extra_options}
"""
} else {

"""
fusioncatcher \\
-d $fusioncatcher_data_dir \\
-i ${reads[0]},${reads[1]} \\
--threads ${task.cpus} \\
--${params.sensitivity} \\
-o . \\
-o $name \\
--skip-blat \\
${params.fc_extra_options}
for f in *.{txt,log,zip}
do
mv \$f $name\$f
done
"""
}
}



process fusion_genes_compare {

publishDir "${params.outdir}/Comparative_shortlist", mode: 'copy'

input:
file ('*star-fusion.fusion_candidates.final.abridged') from fusion_candidates_list.collect()
file ('*star_fusion.fusion_candidates.final.abridged') from fusion_candidates_list.collect()
file ('*summary_candidate_fusions.txt') from fusioncatcher.collect()

output:
Expand All @@ -152,4 +150,33 @@ process fusion_genes_compare {
}


/*
* - FusionInspector
*/
process fusioninspector {
tag "$name"
publishDir "${params.outdir}/FusionInspector", mode: 'copy'
input:
set val (name), file(reads) from fusion_inspector_reads
file fusion_candidates
file star_fusion_reference from star_fusion_reference_fusioninspector.collect()

output:
file '*' into fusioninspector_results

when: params.inspector

script:
"""
FusionInspector \\
--fusions $fusion_candidates \\
--genome_lib $star_fusion_reference \\
--left_fq ${reads[0]} \\
--right_fq ${reads[1]} \\
--out_dir . \\
--out_prefix finspector \\
--prep_for_IGV
"""
}


3 changes: 1 addition & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ vim: syntax=groovy

// Variable to specify the docker / singularity image tag to use
// Picks up on use of -r 1.3 in nextflow command
container_tag = workflow.revision ? workflow.revision : 'latest'
wf_container = "scilifelab/ngi-rnafusion:${container_tag}"
wf_container = { "scilifelab/ngi-rnafusion:${workflow.revision ? workflow.revision : 'latest'}" }



Expand Down

0 comments on commit fbe94df

Please sign in to comment.