-
Notifications
You must be signed in to change notification settings - Fork 126
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update medaka to version 2.0.0 (#1053)
* adding medaka verison 2.0.0 * adding cpu version of 2.0.0 * Update Dockerfile * Adding MEDAKA_VER and PYABPOA_VER to pip install lines * Added pyabpoa
- Loading branch information
Showing
3 changed files
with
194 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
ARG MEDAKA_VER="2.0.0" | ||
ARG PYABPOA_VER="1.5.3" | ||
ARG SAMTOOLS_VER="1.21" | ||
ARG BCFTOOLS_VER=${SAMTOOLS_VER} | ||
ARG HTSLIB_VER=${SAMTOOLS_VER} | ||
ARG MINIMAP2_VER="2.28" | ||
|
||
FROM ubuntu:jammy as builder | ||
|
||
ARG SAMTOOLS_VER | ||
ARG BCFTOOLS_VER | ||
ARG HTSLIB_VER | ||
ARG MINIMAP2_VER | ||
|
||
# install dependencies required for compiling samtools | ||
ARG DEBIAN_FRONTEND=noninteractive | ||
|
||
RUN apt-get update && apt-get install --no-install-recommends -y \ | ||
libncurses5-dev \ | ||
libbz2-dev \ | ||
liblzma-dev \ | ||
libcurl4-gnutls-dev \ | ||
zlib1g-dev \ | ||
libssl-dev \ | ||
gcc \ | ||
wget \ | ||
make \ | ||
perl \ | ||
bzip2 \ | ||
gnuplot \ | ||
ca-certificates \ | ||
curl \ | ||
perl \ | ||
bzip2 \ | ||
autoconf \ | ||
automake \ | ||
make \ | ||
gcc \ | ||
libdeflate-dev \ | ||
zlib1g-dev \ | ||
libbz2-dev \ | ||
liblzma-dev \ | ||
libcurl4-gnutls-dev \ | ||
libssl-dev \ | ||
libperl-dev \ | ||
libgsl0-dev \ | ||
libdeflate-dev \ | ||
procps && \ | ||
rm -rf /var/lib/apt/lists/* && apt-get autoclean | ||
|
||
# download, compile, and install bcftools | ||
RUN wget -q https://github.com/samtools/bcftools/releases/download/${BCFTOOLS_VER}/bcftools-${BCFTOOLS_VER}.tar.bz2 && \ | ||
tar -xjf bcftools-${BCFTOOLS_VER}.tar.bz2 && \ | ||
rm -v bcftools-${BCFTOOLS_VER}.tar.bz2 && \ | ||
cd bcftools-${BCFTOOLS_VER} && \ | ||
./configure --enable-libgsl --enable-perl-filters &&\ | ||
make && \ | ||
make install | ||
|
||
# download, compile, and install samtools | ||
RUN wget -q https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VER}/samtools-${SAMTOOLS_VER}.tar.bz2 && \ | ||
tar -xjf samtools-${SAMTOOLS_VER}.tar.bz2 && \ | ||
cd samtools-${SAMTOOLS_VER} && \ | ||
./configure && \ | ||
make && \ | ||
make install | ||
|
||
RUN wget -q https://github.com/samtools/htslib/releases/download/${HTSLIB_VER}/htslib-${HTSLIB_VER}.tar.bz2 && \ | ||
tar -vxjf htslib-${HTSLIB_VER}.tar.bz2 && \ | ||
rm -v htslib-${HTSLIB_VER}.tar.bz2 && \ | ||
cd htslib-${HTSLIB_VER} && \ | ||
./configure && \ | ||
make && \ | ||
make install | ||
|
||
RUN curl -L https://github.com/lh3/minimap2/releases/download/v${MINIMAP2_VER}/minimap2-${MINIMAP2_VER}_x64-linux.tar.bz2 | tar -jxvf - --no-same-owner && \ | ||
ls && \ | ||
mv minimap2-${MINIMAP2_VER}_x64-linux/minimap2 /usr/local/bin | ||
|
||
### start of app stage ### | ||
FROM ubuntu:jammy as app | ||
|
||
ARG MEDAKA_VER | ||
ARG PYABPOA_VER | ||
|
||
LABEL base.image="ubuntu:jammy" | ||
LABEL dockerfile.version=1 | ||
LABEL container.version="1" | ||
LABEL software="Medaka" | ||
LABEL software.version="${MEDAKA_VER}" | ||
LABEL description="Consensus sequence correction provided by ONT Research" | ||
LABEL website="https://github.com/nanoporetech/medaka" | ||
LABEL license="https://github.com/nanoporetech/medaka/blob/master/LICENSE.md" | ||
LABEL maintainer="Curtis Kapsak" | ||
LABEL maintainer.email="[email protected]" | ||
LABEL maintainer1="Erin Young" | ||
LABEL maintainer1.email="[email protected]" | ||
|
||
ARG DEBIAN_FRONTEND=noninteractive | ||
|
||
# install dependencies required for running samtools | ||
RUN apt-get update && apt-get install --no-install-recommends -y \ | ||
perl \ | ||
zlib1g \ | ||
libncurses5 \ | ||
bzip2 \ | ||
libdeflate-dev \ | ||
liblzma-dev \ | ||
libcurl4-gnutls-dev \ | ||
python3 \ | ||
python3-pip \ | ||
python-is-python3 \ | ||
python3-dev \ | ||
build-essential \ | ||
libssl-dev \ | ||
libffi-dev \ | ||
libxml2-dev \ | ||
libxslt1-dev \ | ||
zlib1g-dev \ | ||
libgsl-dev \ | ||
&& apt-get autoclean && rm -rf /var/lib/apt/lists/* | ||
|
||
# copy in samtools, htslib, and minimap2 executables from builder stage | ||
COPY --from=builder /usr/local/bin/* /usr/local/bin/ | ||
|
||
# install medaka via pip | ||
RUN pip install medaka-cpu==${MEDAKA_VER} --extra-index-url https://download.pytorch.org/whl/cpu -vv --no-cache-dir && \ | ||
pip install pyabpoa==${PYABPOA_VER} -vv --no-cache-dir | ||
|
||
ENV LC_ALL=C | ||
|
||
# final working directory is /data | ||
WORKDIR /data | ||
|
||
# default command is to pull up help options | ||
CMD medaka --help | ||
|
||
### start of test stage ### | ||
FROM app as test | ||
|
||
RUN apt-get update && apt-get install --no-install-recommends -y wget | ||
|
||
# making sure dependencies are in path | ||
RUN samtools --help && minimap2 --help && tabix --help && bgzip --help | ||
|
||
RUN medaka --help && \ | ||
medaka --version | ||
|
||
# set working directory so that all test inputs & outputs are kept in /test | ||
WORKDIR /test | ||
|
||
# using on real data (CRPA isolate) | ||
RUN wget -q https://www.ebi.ac.uk/ena/browser/api/fasta/GCA_021601745.3 -O GCA_021601745.3.fasta && \ | ||
wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR234/068/SRR23473168/SRR23473168_1.fastq.gz && \ | ||
medaka_consensus -i SRR23473168_1.fastq.gz -d GCA_021601745.3.fasta -o testing -t 4 | ||
|
||
# listing available models | ||
RUN medaka tools list\_models |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# medaka container | ||
|
||
Main tool : [medaka](https://github.com/nanoporetech/medaka) | ||
|
||
Additional tools: | ||
- bcftools version 1.21 | ||
- samtools version 1.21 | ||
- htslib version 1.21 | ||
- minimap2 version 2.28 | ||
- pyabpoa verson 1.5.3 | ||
|
||
Full documentation: [https://github.com/nanoporetech/medaka](https://github.com/nanoporetech/medaka) | ||
|
||
> medaka is a tool to create consensus sequences and variant calls from nanopore sequencing data. This task is performed using neural networks applied a pileup of individual sequencing reads against a draft assembly. It provides state-of-the-art results outperforming sequence-graph based methods and signal-based methods, whilst also being faster. | ||
## Example Usage | ||
|
||
```bash | ||
|
||
# listing models | ||
medaka tools list\_models | ||
|
||
# polishing | ||
medaka_consensus -i sample.fastq.gz -d sample.fasta -o medaka/sample -t 4 | ||
|
||
``` | ||
|
||
## Medaka models | ||
|
||
Medaka updates frequently. These are the medaka models in this image: | ||
``` | ||
Available: r103_fast_g507, r103_fast_snp_g507, r103_fast_variant_g507, r103_hac_g507, r103_hac_snp_g507, r103_hac_variant_g507, r103_sup_g507, r103_sup_snp_g507, r103_sup_variant_g507, r1041_e82_260bps_fast_g632, r1041_e82_260bps_fast_variant_g632, r1041_e82_260bps_hac_g632, r1041_e82_260bps_hac_v4.0.0, r1041_e82_260bps_hac_v4.1.0, r1041_e82_260bps_hac_variant_g632, r1041_e82_260bps_hac_variant_v4.1.0, r1041_e82_260bps_joint_apk_ulk_v5.0.0, r1041_e82_260bps_sup_g632, r1041_e82_260bps_sup_v4.0.0, r1041_e82_260bps_sup_v4.1.0, r1041_e82_260bps_sup_variant_g632, r1041_e82_260bps_sup_variant_v4.1.0, r1041_e82_400bps_bacterial_methylation, r1041_e82_400bps_fast_g615, r1041_e82_400bps_fast_g632, r1041_e82_400bps_fast_variant_g615, r1041_e82_400bps_fast_variant_g632, r1041_e82_400bps_hac_g615, r1041_e82_400bps_hac_g632, r1041_e82_400bps_hac_v4.0.0, r1041_e82_400bps_hac_v4.1.0, r1041_e82_400bps_hac_v4.2.0, r1041_e82_400bps_hac_v4.3.0, r1041_e82_400bps_hac_v5.0.0, r1041_e82_400bps_hac_variant_g615, r1041_e82_400bps_hac_variant_g632, r1041_e82_400bps_hac_variant_v4.1.0, r1041_e82_400bps_hac_variant_v4.2.0, r1041_e82_400bps_hac_variant_v4.3.0, r1041_e82_400bps_hac_variant_v5.0.0, r1041_e82_400bps_sup_g615, r1041_e82_400bps_sup_v4.0.0, r1041_e82_400bps_sup_v4.1.0, r1041_e82_400bps_sup_v4.2.0, r1041_e82_400bps_sup_v4.3.0, r1041_e82_400bps_sup_v5.0.0, r1041_e82_400bps_sup_variant_g615, r1041_e82_400bps_sup_variant_v4.1.0, r1041_e82_400bps_sup_variant_v4.2.0, r1041_e82_400bps_sup_variant_v4.3.0, r1041_e82_400bps_sup_variant_v5.0.0, r104_e81_fast_g5015, r104_e81_fast_variant_g5015, r104_e81_hac_g5015, r104_e81_hac_variant_g5015, r104_e81_sup_g5015, r104_e81_sup_g610, r104_e81_sup_variant_g610, r941_e81_fast_g514, r941_e81_fast_variant_g514, r941_e81_hac_g514, r941_e81_hac_variant_g514, r941_e81_sup_g514, r941_e81_sup_variant_g514, r941_min_fast_g507, r941_min_fast_snp_g507, r941_min_fast_variant_g507, r941_min_hac_g507, r941_min_hac_snp_g507, r941_min_hac_variant_g507, r941_min_sup_g507, r941_min_sup_snp_g507, r941_min_sup_variant_g507, r941_prom_fast_g507, r941_prom_fast_snp_g507, r941_prom_fast_variant_g507, r941_prom_hac_g507, r941_prom_hac_snp_g507, r941_prom_hac_variant_g507, r941_prom_sup_g507, r941_prom_sup_snp_g507, r941_prom_sup_variant_g507, r941_sup_plant_g610, r941_sup_plant_variant_g610 | ||
Default consensus: r1041_e82_400bps_sup_v5.0.0 | ||
Default variant: r1041_e82_400bps_sup_variant_v5.0.0 | ||
``` |