Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nf annotation #10

Merged
merged 9 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
**/__pycache__/*
.nextflow*
*/work/*
*/logs/*
*/logs/*
**/ec_info/*
46 changes: 46 additions & 0 deletions phageFinder/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# syntax=docker/dockerfile:1
FROM continuumio/miniconda3:23.5.2-0 AS build

ENV container=docker

# add conda channels
RUN conda config --add channels conda-forge \
&& conda config --add channels bioconda

RUN conda init bash \
&& . ~/.bashrc \
&& conda create --name phageFinder \
&& conda activate phageFinder

# install dependencies for phageFinder
RUN conda install -n phageFinder -c bioconda aragorn
RUN conda install -n phageFinder -c bioconda blast-legacy
RUN conda install -n phageFinder -c bioconda hmmer
RUN conda install -n phageFinder -c bioconda trnascan-se
RUN conda install -c conda-forge conda-pack

ADD bin/*.pl /opt/conda/envs/phageFinder/bin

RUN conda-pack -n phageFinder -o /tmp/env.tar && \
mkdir /venv && cd /venv && tar xf /tmp/env.tar && \
rm /tmp/env.tar

RUN /venv/bin/conda-unpack

#we need the version of phage_finder from EDGE's third-party database to handle some bugs

RUN wget https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_thirdParty_softwares.tgz \
&& tar -xvzf edge_dev_thirdParty_softwares.tgz \
&& tar -xvzf thirdParty/phage_finder_v2.1.tar.gz -C .


RUN chmod -R a+rx phage_finder_v2.1/*

FROM debian:latest AS runtime

COPY --from=build /venv /venv
COPY --from=build /phage_finder_v2.1 /venv/opt/phage_finder_v2.1
ENV PATH=/venv/opt/phage_finder_v2.1/bin:/venv/bin:$PATH

SHELL ["/bin/bash", "-c"]
CMD /bin/bash
101 changes: 101 additions & 0 deletions phageFinder/bin/phageFinder_prepare.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/usr/bin/env perl
# Purpose: prepare files for phage finder.
# This script takes a GFF file from Prokka as input, and produces a
# phage_finder_info.txt (protein table)
# Written by Chien-Chi Lo
# 16 Oct 2014

use strict;
use warnings;
use Getopt::Long;
use File::Basename;

my $outDir;
my $version=0.1;
GetOptions(
"o=s" => \$outDir,
"version" => sub{print "Version $version\n";exit;},
"help|?" => sub{Usage()} );


if (@ARGV != 2) {&Usage();}
unless ( -e $ARGV[0] ) { print "GFF File not exist\n"; &Usage();}
unless ( -e $ARGV[1] ) { print "Genome/Contig fasta file not exist\n"; &Usage();}
open(my $fh, $ARGV[0]) or die "Cannot open GFF file\n";

my %len;
my $cds_count=0;
my %id_map;
my $id_map_file="$outDir/id_map.txt";
my $seq_id="Sequence0000001";

## rename fasta file to mapped id
my $new_fasta="$outDir/Assembly.con";
open(my $ofh, ">$new_fasta") or die "Cannot write $new_fasta\n";
open(my $ffh, $ARGV[1]) or die "Cannot open Fasta file\n";
open (my $id_fh, ">$id_map_file") or die "Cannot write $id_map_file\n";
my ($id,$seq);
while(<$ffh>)
{
chomp;
if(/^>(\S+)/)
{
if ($seq){
$len{$id}=length($seq);
}
$id = $1;
$id_map{$id}=$seq_id;
print $id_fh "$seq_id\t$id\n";
print $ofh ">$seq_id\n";
$seq_id++;
$seq="";
}else{
$seq .= $_;
print $ofh $_,"\n";
}
}
$len{$id}=length($seq) if ($seq);

close $ffh;
close $id_fh;
close $ofh;

## prepare phage_finder_info file
open (my $ph_fh, ">$outDir/phage_finder_info.txt") or die "Cannot write $outDir/phage_finder_info.txt\n";
while (<$fh>) # each LOCUS
{
chomp;
if (/#sequence-region/)
{
my ($tmp, $region_id, $start, $end)=split/\s+/,$_;
$len{$region_id}=$end-$start+1;
}
else
{
my ($id,$source,$type,$start,$end,$score,$strand,$phase,$Attributes)=split /\t/,$_;
if (defined $type and $type eq "CDS")
{
my $region_len = $len{$id};
my %annotations=map { split /=/;} split /;/,$Attributes;
my $product = $annotations{"product"} || $annotations{"Note"} || $annotations{"function"} || "Unknown" ;
my $locus_tag = $annotations{"locus_tag"} || $annotations{"Name"} || "";
$product =~ s/\%2C/,/g;
$product =~ s/\%3B/;/g;
print $ph_fh join("\t",$id_map{$id},$region_len,$locus_tag,$start,$end,$product),"\n";
$cds_count++;
}
}
}
close $ph_fh;
close $fh;


sub Usage
{
print <<"END";
Usage: perl $0 -o outDir GFF_file Fasta_file
Version $version
-o Output directory.
END
exit;
}
30 changes: 30 additions & 0 deletions phageFinder/bin/phageFinder_summary.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env perl

use strict;
use warnings;
use Getopt::Long;

my $id_file;
my $table;

GetOptions(
't=s' => \$table,
'i=s' => \$id_file
);

my %id_map;
print "$id_file";
print "$table";
open(my $fh,$id_file) or die "Cannot read id_map.txt\n";
while(<$fh>){chomp; my($new_id,$original_id)=split; $id_map{$new_id}=$original_id;}
close $fh;
open(my $ofh,">phageFinder_summary.txt") or die "Cannot write phageFinder_summary.txt\n";
open(my $result_fh,$table) or die "Cannot read PFPR_tab.txt\n";
while(<$result_fh>)
{
my @fields=split /\s+/,$_;
$fields[0]=$id_map{$fields[0]} if ($id_map{$fields[0]});
print $ofh join("\t",@fields),"\n";
}
close $result_fh;
close $ofh;
15 changes: 15 additions & 0 deletions phageFinder/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
params {
outDir = "."
gffFile = null
faaFile = null
fnaFile = null
numCPU = 8
}

singularity {
enabled = true
runOptions="--compat"

}

process.container = "apwat/phage_finder:1.0"
77 changes: 77 additions & 0 deletions phageFinder/phageFinder.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env nextflow


//prepares input for phage finder (creates appropriate files)
process phageFinderPrep {

input:
path gff
path fna

output:
path "id_map.txt", emit:idMap //separate output declaration for post-PF processing
path "*", emit:allPFoutput //all output files will go into the next process


script:
"""
phageFinder_prepare.pl -o . $gff $fna
"""
}

//calls phage_finder
process phageFinder {
publishDir(
path: "$params.outDir/AssemblyBasedAnalysis/Prophage",
mode: 'copy',
pattern: "log.txt"
)

input:
path prepOut
path faa, stageAs: "Assembly.pep"

output:
path "PFPR_tab.txt", emit: phageTable
path "log.txt"

//must be on PATH
script:
"""
phage_finder_v2.1.sh Assembly $params.numCPU 1>log.txt 2>&1
"""

}


//creates text summary of results
process summary {
publishDir(
path: "$params.outDir/AssemblyBasedAnalysis/Prophage",
mode: 'copy'
)

input:
path idMap
path pfprTab

output:
path "phageFinder_summary.txt"

script:
"""
phageFinder_summary.pl -t $pfprTab -i $idMap
"""
}


workflow {
gff_ch = channel.fromPath(params.gffFile, checkIfExists:true)
faa_ch = channel.fromPath(params.faaFile, checkIfExists:true).filter{ it.size()>0 }
fna_ch = channel.fromPath(params.fnaFile, checkIfExists:true)

phageFinderPrep(gff_ch, fna_ch)
phageFinder(phageFinderPrep.out.allPFoutput, faa_ch)
summary(phageFinderPrep.out.idMap,phageFinder.out.phageTable)

}
7 changes: 7 additions & 0 deletions phageFinder/test_files/parameters/basic.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"gffFile" : "test_files/PROJECT.gff",
"faaFile" : "test_files/PROJECT.faa",
"fnaFile" : "test_files/PROJECT.fna",
"numCPU" : 4,
"outDir": "test_phageFinder"
}
47 changes: 47 additions & 0 deletions runAnnotation/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# syntax=docker/dockerfile:1
FROM continuumio/miniconda3:24.5.0-0 AS build

ENV container=docker

# add conda channels
RUN conda config --add channels conda-forge \
&& conda config --add channels bioconda

RUN conda init bash \
&& . ~/.bashrc \
&& conda create --name annotation \
&& conda activate annotation

RUN conda install -n annotation -c bioconda perl-lwp-protocol-https
RUN conda install -n annotation -c conda-forge r-base
RUN conda install -n annotation -c bioconda prokka
RUN conda install -n annotation -c bioconda mummer
RUN conda install -n annotation -c bioconda blast=2.16
RUN conda install -n annotation -c bioconda perl-yaml

RUN conda install -c conda-forge conda-pack

#Custom implementation of RATT from EDGE
RUN wget https://ref-db.edgebioinformatics.org/EDGE/dev/edge_dev_thirdParty_softwares.tgz \
&& tar -xvzf edge_dev_thirdParty_softwares.tgz \
&& tar -xvzf thirdParty/RATT.tar.gz -C . \
&& mv RATT /opt/conda/envs/annotation/opt

#add scripts from this project to bin
ADD bin/* /opt/conda/envs/annotation/bin

#pack environment for runtime image
RUN conda-pack -n annotation -o /tmp/env.tar && \
mkdir /venv && cd /venv && tar xf /tmp/env.tar && \
rm /tmp/env.tar

RUN /venv/bin/conda-unpack

FROM debian:latest AS runtime

COPY --from=build /venv /venv

ENV PATH=/venv/bin:/venv/opt/RATT:$PATH

SHELL ["/bin/bash", "-c"]
CMD /bin/bash
27 changes: 27 additions & 0 deletions runAnnotation/bin/check_server_up.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env perl

use FindBin qw($Bin);
use Getopt::Long;
use strict;
use warnings;
use LWP::UserAgent;

my $url;

GetOptions(
"url=s" => \$url
);


my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
my $up=0;
my $response = $ua->get($url);

if ($response->is_success) {
$up=1
}
else {
die("$url is not up!")
}
17 changes: 17 additions & 0 deletions runAnnotation/bin/embl2genbank.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Bio::SeqIO;

if (@ARGV != 2) { die "USAGE: embl2genbank.pl embl_Iutput Genbank_Onput \n"; }

my $seqio = Bio::SeqIO->new('-format' => 'embl', '-file' => "$ARGV[0]");
my $seqout = new Bio::SeqIO('-format' => 'genbank', '-file' => ">>$ARGV[1]");
while( my $seq = $seqio->next_seq) {
my $locus = $seq->display_id;
$locus =~ s/\.final$//;
$locus =~ s/^(\S+?)\.//;
$seq->accession_number($locus);
$seq->display_id($locus);
$seqout->write_seq($seq)
}
Loading