-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merging two workflows with related functionality.
- Loading branch information
Showing
92 changed files
with
1,336,510 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# syntax=docker/dockerfile:1 | ||
FROM continuumio/miniconda3:23.5.2-0 AS build | ||
|
||
ENV container=docker | ||
|
||
# add conda channels | ||
RUN conda config --add channels conda-forge \ | ||
&& conda config --add channels bioconda | ||
|
||
RUN conda init bash \ | ||
&& . ~/.bashrc \ | ||
&& conda create --name phageFinder \ | ||
&& conda activate phageFinder | ||
|
||
# install dependencies for phageFinder | ||
RUN conda install -n phageFinder -c bioconda aragorn | ||
RUN conda install -n phageFinder -c bioconda blast-legacy | ||
RUN conda install -n phageFinder -c bioconda hmmer | ||
RUN conda install -n phageFinder -c bioconda trnascan-se | ||
RUN conda install -c conda-forge conda-pack | ||
|
||
ADD bin/*.pl /opt/conda/envs/phageFinder/bin | ||
|
||
RUN conda-pack -n phageFinder -o /tmp/env.tar && \ | ||
mkdir /venv && cd /venv && tar xf /tmp/env.tar && \ | ||
rm /tmp/env.tar | ||
|
||
RUN /venv/bin/conda-unpack | ||
|
||
#we need the version of phage_finder from EDGE's third-party database to handle some bugs | ||
|
||
RUN wget https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_thirdParty_softwares.tgz \ | ||
&& tar -xvzf edge_dev_thirdParty_softwares.tgz \ | ||
&& tar -xvzf thirdParty/phage_finder_v2.1.tar.gz -C . | ||
|
||
|
||
RUN chmod -R a+rx phage_finder_v2.1/* | ||
|
||
FROM debian:latest AS runtime | ||
|
||
COPY --from=build /venv /venv | ||
COPY --from=build /phage_finder_v2.1 /venv/opt/phage_finder_v2.1 | ||
ENV PATH=/venv/opt/phage_finder_v2.1/bin:/venv/bin:$PATH | ||
|
||
SHELL ["/bin/bash", "-c"] | ||
CMD /bin/bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/usr/bin/env perl | ||
# Purpose: prepare files for phage finder. | ||
# This script takes a GFF file from Prokka as input, and produces a | ||
# phage_finder_info.txt (protein table) | ||
# Written by Chien-Chi Lo | ||
# 16 Oct 2014 | ||
|
||
use strict; | ||
use warnings; | ||
use Getopt::Long; | ||
use File::Basename; | ||
|
||
my $outDir; | ||
my $version=0.1; | ||
GetOptions( | ||
"o=s" => \$outDir, | ||
"version" => sub{print "Version $version\n";exit;}, | ||
"help|?" => sub{Usage()} ); | ||
|
||
|
||
if (@ARGV != 2) {&Usage();} | ||
unless ( -e $ARGV[0] ) { print "GFF File not exist\n"; &Usage();} | ||
unless ( -e $ARGV[1] ) { print "Genome/Contig fasta file not exist\n"; &Usage();} | ||
open(my $fh, $ARGV[0]) or die "Cannot open GFF file\n"; | ||
|
||
my %len; | ||
my $cds_count=0; | ||
my %id_map; | ||
my $id_map_file="$outDir/id_map.txt"; | ||
my $seq_id="Sequence0000001"; | ||
|
||
## rename fasta file to mapped id | ||
my $new_fasta="$outDir/Assembly.con"; | ||
open(my $ofh, ">$new_fasta") or die "Cannot write $new_fasta\n"; | ||
open(my $ffh, $ARGV[1]) or die "Cannot open Fasta file\n"; | ||
open (my $id_fh, ">$id_map_file") or die "Cannot write $id_map_file\n"; | ||
my ($id,$seq); | ||
while(<$ffh>) | ||
{ | ||
chomp; | ||
if(/^>(\S+)/) | ||
{ | ||
if ($seq){ | ||
$len{$id}=length($seq); | ||
} | ||
$id = $1; | ||
$id_map{$id}=$seq_id; | ||
print $id_fh "$seq_id\t$id\n"; | ||
print $ofh ">$seq_id\n"; | ||
$seq_id++; | ||
$seq=""; | ||
}else{ | ||
$seq .= $_; | ||
print $ofh $_,"\n"; | ||
} | ||
} | ||
$len{$id}=length($seq) if ($seq); | ||
|
||
close $ffh; | ||
close $id_fh; | ||
close $ofh; | ||
|
||
## prepare phage_finder_info file | ||
open (my $ph_fh, ">$outDir/phage_finder_info.txt") or die "Cannot write $outDir/phage_finder_info.txt\n"; | ||
while (<$fh>) # each LOCUS | ||
{ | ||
chomp; | ||
if (/#sequence-region/) | ||
{ | ||
my ($tmp, $region_id, $start, $end)=split/\s+/,$_; | ||
$len{$region_id}=$end-$start+1; | ||
} | ||
else | ||
{ | ||
my ($id,$source,$type,$start,$end,$score,$strand,$phase,$Attributes)=split /\t/,$_; | ||
if (defined $type and $type eq "CDS") | ||
{ | ||
my $region_len = $len{$id}; | ||
my %annotations=map { split /=/;} split /;/,$Attributes; | ||
my $product = $annotations{"product"} || $annotations{"Note"} || $annotations{"function"} || "Unknown" ; | ||
my $locus_tag = $annotations{"locus_tag"} || $annotations{"Name"} || ""; | ||
$product =~ s/\%2C/,/g; | ||
$product =~ s/\%3B/;/g; | ||
print $ph_fh join("\t",$id_map{$id},$region_len,$locus_tag,$start,$end,$product),"\n"; | ||
$cds_count++; | ||
} | ||
} | ||
} | ||
close $ph_fh; | ||
close $fh; | ||
|
||
|
||
sub Usage | ||
{ | ||
print <<"END"; | ||
Usage: perl $0 -o outDir GFF_file Fasta_file | ||
Version $version | ||
-o Output directory. | ||
END | ||
exit; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/usr/bin/env perl | ||
|
||
use strict; | ||
use warnings; | ||
use Getopt::Long; | ||
|
||
my $id_file; | ||
my $table; | ||
|
||
GetOptions( | ||
't=s' => \$table, | ||
'i=s' => \$id_file | ||
); | ||
|
||
my %id_map; | ||
print "$id_file"; | ||
print "$table"; | ||
open(my $fh,$id_file) or die "Cannot read id_map.txt\n"; | ||
while(<$fh>){chomp; my($new_id,$original_id)=split; $id_map{$new_id}=$original_id;} | ||
close $fh; | ||
open(my $ofh,">phageFinder_summary.txt") or die "Cannot write phageFinder_summary.txt\n"; | ||
open(my $result_fh,$table) or die "Cannot read PFPR_tab.txt\n"; | ||
while(<$result_fh>) | ||
{ | ||
my @fields=split /\s+/,$_; | ||
$fields[0]=$id_map{$fields[0]} if ($id_map{$fields[0]}); | ||
print $ofh join("\t",@fields),"\n"; | ||
} | ||
close $result_fh; | ||
close $ofh; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
params { | ||
outDir = "." | ||
gffFile = null | ||
faaFile = null | ||
fnaFile = null | ||
numCPU = 8 | ||
} | ||
|
||
singularity { | ||
enabled = true | ||
runOptions="--compat" | ||
|
||
} | ||
|
||
process.container = "apwat/phage_finder:noWrite" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
|
||
process phageFinderPrep { | ||
|
||
input: | ||
path gff | ||
path fna | ||
|
||
output: | ||
path "id_map.txt", emit:idMap //separate output declaration for post-PF processing | ||
path "*", emit:allPFoutput //all output files will go into the next process | ||
|
||
|
||
script: | ||
""" | ||
phageFinder_prepare.pl -o . $gff $fna | ||
""" | ||
} | ||
|
||
process phageFinder { | ||
publishDir( | ||
path: "$params.outDir/AssemblyBasedAnalysis/Prophage", | ||
mode: 'copy', | ||
pattern: "log.txt" | ||
) | ||
|
||
input: | ||
path prepOut | ||
path faa, stageAs: "Assembly.pep" | ||
|
||
output: | ||
path "PFPR_tab.txt", emit: phageTable | ||
path "log.txt" | ||
|
||
//must be on PATH | ||
script: | ||
""" | ||
phage_finder_v2.1.sh Assembly $params.numCPU 1>log.txt 2>&1 | ||
""" | ||
|
||
} | ||
|
||
process summary { | ||
publishDir( | ||
path: "$params.outDir/AssemblyBasedAnalysis/Prophage", | ||
mode: 'copy' | ||
) | ||
|
||
input: | ||
path idMap | ||
path pfprTab | ||
|
||
output: | ||
path "phageFinder_summary.txt" | ||
|
||
script: | ||
""" | ||
phageFinder_summary.pl -t $pfprTab -i $idMap | ||
""" | ||
} | ||
|
||
|
||
workflow { | ||
gff_ch = channel.fromPath(params.gffFile, checkIfExists:true) | ||
faa_ch = channel.fromPath(params.faaFile, checkIfExists:true).filter{ it.size()>0 } | ||
fna_ch = channel.fromPath(params.fnaFile, checkIfExists:true) | ||
|
||
phageFinderPrep(gff_ch, fna_ch) | ||
phageFinder(phageFinderPrep.out.allPFoutput, faa_ch) | ||
summary(phageFinderPrep.out.idMap,phageFinder.out.phageTable) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"gffFile" : "test_files/phageProj.gff", | ||
"faaFile" : "test_files/phageProj.faa", | ||
"fnaFile" : "test_files/phageProj.fna", | ||
"numCPU" : 4, | ||
"outDir": "test_phageFinder" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# syntax=docker/dockerfile:1 | ||
FROM continuumio/miniconda3:24.5.0-0 AS build | ||
|
||
ENV container=docker | ||
|
||
# add conda channels | ||
RUN conda config --add channels conda-forge \ | ||
&& conda config --add channels bioconda | ||
|
||
RUN conda init bash \ | ||
&& . ~/.bashrc \ | ||
&& conda create --name readsTaxonomyAssignment \ | ||
&& conda activate readsTaxonomyAssignment | ||
|
||
RUN conda install -n readsTaxonomyAssignment -c bioconda metaphlan=4.1.1 | ||
RUN conda install -n readsTaxonomyAssignment python=3.10 | ||
#the required version of diamond is 2.0.5, but this runs into conda installation problems | ||
#we will install it here to handle any dependencies, then later replace diamond with the appropriate release | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda diamond | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda perl-json | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda perl-html-template | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda perl-xml-simple | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda perl-excel-writer-xlsx | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda kraken2 | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda krona | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda perl-yaml | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda centrifuge | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda gottcha2 | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda minimap2=2.17 | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda pybedtools | ||
RUN conda install -n readsTaxonomyAssignment -c conda-forge parallel | ||
#conda does not have the most recent version of gottcha (1.0b instead of 1.0c), | ||
#but we encounter errors when compiling splitrim.d in gottcha's latest source code release. | ||
#we will install gottcha here and later replace the non-splitrim scripts with the latest source code. | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda gottcha | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda bowtie2=2.5.1 | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda perl-html-template | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda perl-xml-simple | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda perl-excel-writer-xlsx | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda perl-bioperl | ||
RUN conda install -n readsTaxonomyAssignment -c conda-forge perl-app-cpanminus | ||
RUN conda install -n readsTaxonomyAssignment -c bioconda perl-bioperl-core | ||
RUN conda install -n readsTaxonomyAssignment -c conda-forge cairosvg=2.7.1 | ||
RUN conda install -c conda-forge conda-pack | ||
|
||
|
||
#download latest PanGIA | ||
#ISSUE: differs from version in EDGE's third-party software, in ways that break scripts | ||
#we can get EDGE's version from its third-party tarball, but it's a wastefully large download. | ||
RUN wget https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_thirdParty_softwares.tgz \ | ||
&& tar -xvzf edge_dev_thirdParty_softwares.tgz \ | ||
&& tar -xvzf thirdParty/pangia-1.0.0.tar.gz -C . \ | ||
&& mv pangia /opt/conda/envs/readsTaxonomyAssignment/opt | ||
|
||
#correct diamond version | ||
RUN wget https://github.com/bbuchfink/diamond/releases/download/v2.0.5/diamond-linux64.tar.gz \ | ||
&& tar -xvzf diamond-linux64.tar.gz | ||
|
||
#correct gottcha version | ||
RUN wget https://github.com/LANL-Bioinformatics/GOTTCHA/archive/refs/tags/1.0c.tar.gz \ | ||
&& tar -xvzf 1.0c.tar.gz \ | ||
&& chmod 755 GOTTCHA-1.0c/src/*.pl | ||
|
||
|
||
#add scripts from this project to bin | ||
ADD bin/* /opt/conda/envs/readsTaxonomyAssignment/bin | ||
|
||
#pack environment for runtime image | ||
RUN conda-pack -n readsTaxonomyAssignment -o /tmp/env.tar && \ | ||
mkdir /venv && cd /venv && tar xf /tmp/env.tar && \ | ||
rm /tmp/env.tar | ||
|
||
RUN /venv/bin/conda-unpack | ||
|
||
FROM debian:latest AS runtime | ||
|
||
COPY --from=build /venv /venv | ||
COPY --from=build /diamond /venv/bin | ||
COPY --from=build /GOTTCHA-1.0c/src/*.pl /venv/bin | ||
|
||
#add environment, pangia base and vis-scripts to PATH | ||
ENV PATH=/venv/bin:/venv/opt/pangia:/venv/opt/pangia/pangia-vis/scripts:/venv/opt/krona:$PATH | ||
ENV PERL5LIB=/venv/lib/perl5/core_perl/ | ||
|
||
|
||
SHELL ["/bin/bash", "-c"] | ||
CMD /bin/bash |
Oops, something went wrong.