-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* initial commit * Prokka and RATT running * generating plots and KEGG pathway views * Full workflow, with explicit outputs and TODOs resolved. * Containerized * comments * Nf phage finder (#9) Merging two workflows with related functionality. * Image version bump and documentation for phageFinder
- Loading branch information
Showing
25 changed files
with
3,772 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
**/__pycache__/* | ||
.nextflow* | ||
*/work/* | ||
*/logs/* | ||
*/logs/* | ||
**/ec_info/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# syntax=docker/dockerfile:1 | ||
FROM continuumio/miniconda3:23.5.2-0 AS build | ||
|
||
ENV container=docker | ||
|
||
# add conda channels | ||
RUN conda config --add channels conda-forge \ | ||
&& conda config --add channels bioconda | ||
|
||
RUN conda init bash \ | ||
&& . ~/.bashrc \ | ||
&& conda create --name phageFinder \ | ||
&& conda activate phageFinder | ||
|
||
# install dependencies for phageFinder | ||
RUN conda install -n phageFinder -c bioconda aragorn | ||
RUN conda install -n phageFinder -c bioconda blast-legacy | ||
RUN conda install -n phageFinder -c bioconda hmmer | ||
RUN conda install -n phageFinder -c bioconda trnascan-se | ||
RUN conda install -c conda-forge conda-pack | ||
|
||
ADD bin/*.pl /opt/conda/envs/phageFinder/bin | ||
|
||
RUN conda-pack -n phageFinder -o /tmp/env.tar && \ | ||
mkdir /venv && cd /venv && tar xf /tmp/env.tar && \ | ||
rm /tmp/env.tar | ||
|
||
RUN /venv/bin/conda-unpack | ||
|
||
#we need the version of phage_finder from EDGE's third-party database to handle some bugs | ||
|
||
RUN wget https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_thirdParty_softwares.tgz \ | ||
&& tar -xvzf edge_dev_thirdParty_softwares.tgz \ | ||
&& tar -xvzf thirdParty/phage_finder_v2.1.tar.gz -C . | ||
|
||
|
||
RUN chmod -R a+rx phage_finder_v2.1/* | ||
|
||
FROM debian:latest AS runtime | ||
|
||
COPY --from=build /venv /venv | ||
COPY --from=build /phage_finder_v2.1 /venv/opt/phage_finder_v2.1 | ||
ENV PATH=/venv/opt/phage_finder_v2.1/bin:/venv/bin:$PATH | ||
|
||
SHELL ["/bin/bash", "-c"] | ||
CMD /bin/bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/usr/bin/env perl | ||
# Purpose: prepare files for phage finder. | ||
# This script takes a GFF file from Prokka as input, and produces a | ||
# phage_finder_info.txt (protein table) | ||
# Written by Chien-Chi Lo | ||
# 16 Oct 2014 | ||
|
||
use strict; | ||
use warnings; | ||
use Getopt::Long; | ||
use File::Basename; | ||
|
||
my $outDir; | ||
my $version=0.1; | ||
GetOptions( | ||
"o=s" => \$outDir, | ||
"version" => sub{print "Version $version\n";exit;}, | ||
"help|?" => sub{Usage()} ); | ||
|
||
|
||
if (@ARGV != 2) {&Usage();} | ||
unless ( -e $ARGV[0] ) { print "GFF File not exist\n"; &Usage();} | ||
unless ( -e $ARGV[1] ) { print "Genome/Contig fasta file not exist\n"; &Usage();} | ||
open(my $fh, $ARGV[0]) or die "Cannot open GFF file\n"; | ||
|
||
my %len; | ||
my $cds_count=0; | ||
my %id_map; | ||
my $id_map_file="$outDir/id_map.txt"; | ||
my $seq_id="Sequence0000001"; | ||
|
||
## rename fasta file to mapped id | ||
my $new_fasta="$outDir/Assembly.con"; | ||
open(my $ofh, ">$new_fasta") or die "Cannot write $new_fasta\n"; | ||
open(my $ffh, $ARGV[1]) or die "Cannot open Fasta file\n"; | ||
open (my $id_fh, ">$id_map_file") or die "Cannot write $id_map_file\n"; | ||
my ($id,$seq); | ||
while(<$ffh>) | ||
{ | ||
chomp; | ||
if(/^>(\S+)/) | ||
{ | ||
if ($seq){ | ||
$len{$id}=length($seq); | ||
} | ||
$id = $1; | ||
$id_map{$id}=$seq_id; | ||
print $id_fh "$seq_id\t$id\n"; | ||
print $ofh ">$seq_id\n"; | ||
$seq_id++; | ||
$seq=""; | ||
}else{ | ||
$seq .= $_; | ||
print $ofh $_,"\n"; | ||
} | ||
} | ||
$len{$id}=length($seq) if ($seq); | ||
|
||
close $ffh; | ||
close $id_fh; | ||
close $ofh; | ||
|
||
## prepare phage_finder_info file | ||
open (my $ph_fh, ">$outDir/phage_finder_info.txt") or die "Cannot write $outDir/phage_finder_info.txt\n"; | ||
while (<$fh>) # each LOCUS | ||
{ | ||
chomp; | ||
if (/#sequence-region/) | ||
{ | ||
my ($tmp, $region_id, $start, $end)=split/\s+/,$_; | ||
$len{$region_id}=$end-$start+1; | ||
} | ||
else | ||
{ | ||
my ($id,$source,$type,$start,$end,$score,$strand,$phase,$Attributes)=split /\t/,$_; | ||
if (defined $type and $type eq "CDS") | ||
{ | ||
my $region_len = $len{$id}; | ||
my %annotations=map { split /=/;} split /;/,$Attributes; | ||
my $product = $annotations{"product"} || $annotations{"Note"} || $annotations{"function"} || "Unknown" ; | ||
my $locus_tag = $annotations{"locus_tag"} || $annotations{"Name"} || ""; | ||
$product =~ s/\%2C/,/g; | ||
$product =~ s/\%3B/;/g; | ||
print $ph_fh join("\t",$id_map{$id},$region_len,$locus_tag,$start,$end,$product),"\n"; | ||
$cds_count++; | ||
} | ||
} | ||
} | ||
close $ph_fh; | ||
close $fh; | ||
|
||
|
||
sub Usage | ||
{ | ||
print <<"END"; | ||
Usage: perl $0 -o outDir GFF_file Fasta_file | ||
Version $version | ||
-o Output directory. | ||
END | ||
exit; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/usr/bin/env perl | ||
|
||
use strict; | ||
use warnings; | ||
use Getopt::Long; | ||
|
||
my $id_file; | ||
my $table; | ||
|
||
GetOptions( | ||
't=s' => \$table, | ||
'i=s' => \$id_file | ||
); | ||
|
||
my %id_map; | ||
print "$id_file"; | ||
print "$table"; | ||
open(my $fh,$id_file) or die "Cannot read id_map.txt\n"; | ||
while(<$fh>){chomp; my($new_id,$original_id)=split; $id_map{$new_id}=$original_id;} | ||
close $fh; | ||
open(my $ofh,">phageFinder_summary.txt") or die "Cannot write phageFinder_summary.txt\n"; | ||
open(my $result_fh,$table) or die "Cannot read PFPR_tab.txt\n"; | ||
while(<$result_fh>) | ||
{ | ||
my @fields=split /\s+/,$_; | ||
$fields[0]=$id_map{$fields[0]} if ($id_map{$fields[0]}); | ||
print $ofh join("\t",@fields),"\n"; | ||
} | ||
close $result_fh; | ||
close $ofh; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
params { | ||
outDir = "." | ||
gffFile = null | ||
faaFile = null | ||
fnaFile = null | ||
numCPU = 8 | ||
} | ||
|
||
singularity { | ||
enabled = true | ||
runOptions="--compat" | ||
|
||
} | ||
|
||
process.container = "apwat/phage_finder:1.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
|
||
//prepares input for phage finder (creates appropriate files) | ||
process phageFinderPrep { | ||
|
||
input: | ||
path gff | ||
path fna | ||
|
||
output: | ||
path "id_map.txt", emit:idMap //separate output declaration for post-PF processing | ||
path "*", emit:allPFoutput //all output files will go into the next process | ||
|
||
|
||
script: | ||
""" | ||
phageFinder_prepare.pl -o . $gff $fna | ||
""" | ||
} | ||
|
||
//calls phage_finder | ||
process phageFinder { | ||
publishDir( | ||
path: "$params.outDir/AssemblyBasedAnalysis/Prophage", | ||
mode: 'copy', | ||
pattern: "log.txt" | ||
) | ||
|
||
input: | ||
path prepOut | ||
path faa, stageAs: "Assembly.pep" | ||
|
||
output: | ||
path "PFPR_tab.txt", emit: phageTable | ||
path "log.txt" | ||
|
||
//must be on PATH | ||
script: | ||
""" | ||
phage_finder_v2.1.sh Assembly $params.numCPU 1>log.txt 2>&1 | ||
""" | ||
|
||
} | ||
|
||
|
||
//creates text summary of results | ||
process summary { | ||
publishDir( | ||
path: "$params.outDir/AssemblyBasedAnalysis/Prophage", | ||
mode: 'copy' | ||
) | ||
|
||
input: | ||
path idMap | ||
path pfprTab | ||
|
||
output: | ||
path "phageFinder_summary.txt" | ||
|
||
script: | ||
""" | ||
phageFinder_summary.pl -t $pfprTab -i $idMap | ||
""" | ||
} | ||
|
||
|
||
workflow { | ||
gff_ch = channel.fromPath(params.gffFile, checkIfExists:true) | ||
faa_ch = channel.fromPath(params.faaFile, checkIfExists:true).filter{ it.size()>0 } | ||
fna_ch = channel.fromPath(params.fnaFile, checkIfExists:true) | ||
|
||
phageFinderPrep(gff_ch, fna_ch) | ||
phageFinder(phageFinderPrep.out.allPFoutput, faa_ch) | ||
summary(phageFinderPrep.out.idMap,phageFinder.out.phageTable) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"gffFile" : "test_files/PROJECT.gff", | ||
"faaFile" : "test_files/PROJECT.faa", | ||
"fnaFile" : "test_files/PROJECT.fna", | ||
"numCPU" : 4, | ||
"outDir": "test_phageFinder" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# syntax=docker/dockerfile:1 | ||
FROM continuumio/miniconda3:24.5.0-0 AS build | ||
|
||
ENV container=docker | ||
|
||
# add conda channels | ||
RUN conda config --add channels conda-forge \ | ||
&& conda config --add channels bioconda | ||
|
||
RUN conda init bash \ | ||
&& . ~/.bashrc \ | ||
&& conda create --name annotation \ | ||
&& conda activate annotation | ||
|
||
RUN conda install -n annotation -c bioconda perl-lwp-protocol-https | ||
RUN conda install -n annotation -c conda-forge r-base | ||
RUN conda install -n annotation -c bioconda prokka | ||
RUN conda install -n annotation -c bioconda mummer | ||
RUN conda install -n annotation -c bioconda blast=2.16 | ||
RUN conda install -n annotation -c bioconda perl-yaml | ||
|
||
RUN conda install -c conda-forge conda-pack | ||
|
||
#Custom implementation of RATT from EDGE | ||
RUN wget https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_thirdParty_softwares.tgz \ | ||
&& tar -xvzf edge_dev_thirdParty_softwares.tgz \ | ||
&& tar -xvzf thirdParty/RATT.tar.gz -C . \ | ||
&& mv RATT /opt/conda/envs/annotation/opt | ||
|
||
#add scripts from this project to bin | ||
ADD bin/* /opt/conda/envs/annotation/bin | ||
|
||
#pack environment for runtime image | ||
RUN conda-pack -n annotation -o /tmp/env.tar && \ | ||
mkdir /venv && cd /venv && tar xf /tmp/env.tar && \ | ||
rm /tmp/env.tar | ||
|
||
RUN /venv/bin/conda-unpack | ||
|
||
FROM debian:latest AS runtime | ||
|
||
COPY --from=build /venv /venv | ||
|
||
ENV PATH=/venv/bin:/venv/opt/RATT:$PATH | ||
|
||
SHELL ["/bin/bash", "-c"] | ||
CMD /bin/bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#!/usr/bin/env perl | ||
|
||
use FindBin qw($Bin); | ||
use Getopt::Long; | ||
use strict; | ||
use warnings; | ||
use LWP::UserAgent; | ||
|
||
my $url; | ||
|
||
GetOptions( | ||
"url=s" => \$url | ||
); | ||
|
||
|
||
my $ua = LWP::UserAgent->new; | ||
$ua->timeout(10); | ||
$ua->env_proxy; | ||
my $up=0; | ||
my $response = $ua->get($url); | ||
|
||
if ($response->is_success) { | ||
$up=1 | ||
} | ||
else { | ||
die("$url is not up!") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/usr/bin/env perl | ||
use strict; | ||
use warnings; | ||
use Bio::SeqIO; | ||
|
||
if (@ARGV != 2) { die "USAGE: embl2genbank.pl embl_Iutput Genbank_Onput \n"; } | ||
|
||
my $seqio = Bio::SeqIO->new('-format' => 'embl', '-file' => "$ARGV[0]"); | ||
my $seqout = new Bio::SeqIO('-format' => 'genbank', '-file' => ">>$ARGV[1]"); | ||
while( my $seq = $seqio->next_seq) { | ||
my $locus = $seq->display_id; | ||
$locus =~ s/\.final$//; | ||
$locus =~ s/^(\S+?)\.//; | ||
$seq->accession_number($locus); | ||
$seq->display_id($locus); | ||
$seqout->write_seq($seq) | ||
} |
Oops, something went wrong.