Skip to content

Commit

Permalink
Better matches EDGE output
Browse files Browse the repository at this point in the history
  • Loading branch information
aw-watson committed Aug 7, 2024
1 parent 1a51cb3 commit 3645606
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 75 deletions.
60 changes: 41 additions & 19 deletions runAssembly/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,32 +1,54 @@
FROM continuumio/miniconda3:23.5.2-0
FROM continuumio/miniconda3:23.5.2-0 AS build

ENV container=docker

# add conda channels
RUN conda config --add channels conda-forge \
&& conda config --add channels bioconda

RUN conda init bash \
&& . ~/.bashrc \
&& conda create --name assembly \
&& conda activate assembly

# install dependencies
RUN conda install conda-libmamba-solver
RUN conda config --set solver libmamba
RUN conda install -c conda-forge python=3.9
RUN conda install -c bioconda samtools=1.17
RUN conda install -c bioconda racon=1.5
RUN conda install -c bioconda seqtk=1.3
RUN conda install -c bioconda spades=3.15.5
RUN conda install -c bioconda minimap2=2.26
RUN conda install -c bioconda megahit=1.2.9
RUN conda install -c bioconda idba=1.1.3
RUN conda install -c bioconda unicycler=0.5.0
RUN wget https://github.com/ruanjue/wtdbg2/releases/download/v2.5/wtdbg-2.5_x64_linux.tgz
RUN tar -xvzf wtdbg-2.5_x64_linux.tgz
RUN cp wtdbg-2.5_x64_linux/* /opt/conda/bin
RUN conda install -c bioconda flye=2.9.2
RUN conda install -n assembly -c conda-forge python=3.9
RUN conda install -n assembly -c bioconda samtools=1.17
RUN conda install -n assembly -c bioconda racon=1.5
RUN conda install -n assembly -c bioconda seqtk=1.3
RUN conda install -n assembly -c bioconda spades=3.15.5
RUN conda install -n assembly -c bioconda minimap2=2.26
RUN conda install -n assembly -c bioconda megahit=1.2.9
RUN conda install -n assembly -c bioconda idba=1.1.3
RUN conda install -n assembly -c bioconda unicycler=0.5.0
RUN wget https://github.com/ruanjue/wtdbg2/releases/download/v2.5/wtdbg-2.5_x64_linux.tgz \
&& tar -xvzf wtdbg-2.5_x64_linux.tgz \
&& cp wtdbg-2.5_x64_linux/* /opt/conda/envs/assembly/bin
RUN conda install -n assembly -c bioconda flye=2.9.2
RUN conda install -n assembly git
RUN conda install -c conda-forge conda-pack

ADD bin/extractLongReads.pl /opt/conda/envs/assembly/bin
ADD bin/getAvgLen.pl /opt/conda/envs/assembly/bin
ADD bin/renameFilterFasta.pl /opt/conda/envs/assembly/bin

RUN conda-pack -n assembly -o /tmp/env.tar && \
mkdir /venv && cd /venv && tar xf /tmp/env.tar && \
rm /tmp/env.tar

RUN /venv/bin/conda-unpack

FROM debian:latest AS runtime

COPY --from=build /venv /venv
ENV PERL5LIB=/venv/lib/perl5/core_perl


ENV PATH="/venv/bin:$PATH"
RUN git clone --depth 1 https://gitlab.com/chienchi/long_read_assembly.git
ENV PATH="/long_read_assembly:$PATH"

ADD bin/extractLongReads.pl /opt/conda/bin
ADD bin/getAvgLen.pl /opt/conda/bin
ADD bin/renameFilterFasta.pl /opt/conda/bin

CMD ["/bin/bash"]
SHELL ["/bin/bash", "-c"]
CMD /bin/bash
25 changes: 13 additions & 12 deletions runAssembly/bin/renameFilterFasta.pl
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,17 @@
my $id_mapping;
my $contig_size_for_annotation;
my $project_name;
my $do_annotation;

GetOptions(
'u=s{1,}' => \$fasta,
'd=s' => \$outputDir,
'filt=i' => \$size_filter,
'maxseq=i' => \$max_seq_number,
'id:s' => \$id_mapping,
'ann=i' => \$contig_size_for_annotation,
'n=s' => \$project_name
'ann_size=i' => \$contig_size_for_annotation,
'n=s' => \$project_name,
'ann:i' => \$do_annotation #default to false (0)
);

my $output= "$outputDir/${project_name}_contigs.fa";
Expand Down Expand Up @@ -56,18 +58,17 @@
chomp $seq;
my $fasta_header;
$id =~ s/\W/_/g;
#not doing any of the annotation-specific tasks for this modularized version of runAssembly
#if($configuration->{DoAnnotation}){
if($do_annotation){
# genbank format limit the LOCUS name length
#if ($id_mapping){
#$fasta_header = ( length($id) > 20 ) ? "contig_$serial_id $id" : "$id contig_$serial_id";
#print $idmap_ofh "contig_$serial_id\t$id\n";
#}else{
#$fasta_header = ( length($project_name) > 20 || $project_name =~/^Assembly/i ) ? "contig_$serial_id $id_info GC_content_$GC_content": "${project_name}_$serial_id $id_info GC_content_$GC_content";
#}
#}else{
if ($id_mapping){
$fasta_header = ( length($id) > 20 ) ? "contig_$serial_id $id" : "$id contig_$serial_id";
print $idmap_ofh "contig_$serial_id\t$id\n";
}else{
$fasta_header = ( length($project_name) > 20 || $project_name =~/^Assembly/i ) ? "contig_$serial_id $id_info GC_content_$GC_content": "${project_name}_$serial_id $id_info GC_content_$GC_content";
}
}else{
$fasta_header = ($id_mapping)? "$id" : "${project_name}_$serial_id $id_info GC_content_$GC_content";
#}
}
if ($len >= $contig_size_for_annotation)
{
print $ofh2 ">$fasta_header\n" . $seq."\n";
Expand Down
11 changes: 7 additions & 4 deletions runAssembly/nextflow.config
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
process.container = 'apwat/run_assembly:1.4.5'
singularity {
enabled = true
runOptions = "--compat"
}
params {

assembler = "IDBA_UD"
outDir = '.'
threads = 8
projName = "project"
annotation = false
contigSizeForAnnotation = 700
pairedFiles = "nf_assets/NO_FILE"
unpairedFile = "nf_assets/NO_FILE2"
Expand Down Expand Up @@ -37,6 +42,4 @@ params {
}
workflow.onComplete = {
"rm -rf nf_assets".execute().text
}
process.container = 'apwat/run_assembly:1.2.5'
docker.enabled = true
}
Loading

0 comments on commit 3645606

Please sign in to comment.