-
Notifications
You must be signed in to change notification settings - Fork 126
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
updating the emmtyper blast database
- Loading branch information
Showing
3 changed files
with
123 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
FROM mambaorg/micromamba:1.5.8 as app | ||
|
||
ARG EMMTYPER_VER="0.2.0" | ||
ARG SCRIPT_HASH="c0d1c26625cfe9ac458306089358dc26edad06f0" | ||
|
||
# build and run as root users since micromamba image has 'mambauser' set as the $USER | ||
USER root | ||
# set workdir to default for building | ||
WORKDIR / | ||
|
||
LABEL base.image="mambaorg/micromamba:1.5.8" | ||
LABEL dockerfile.version="1" | ||
LABEL software="emmtyper" | ||
LABEL software.version=${EMMTYPER_VER} | ||
LABEL description="Conda environment for emmtyper. emmtyper is a command line tool for emm-typing of Streptococcus pyogenes using a de novo or complete assembly." | ||
LABEL website="https://github.com/MDU-PHL/emmtyper" | ||
LABEL license="GNU General Public License v3.0" | ||
LABEL license.url="https://github.com/MDU-PHL/emmtyper/blob/master/LICENSE" | ||
LABEL maintainer="Erin Young" | ||
LABEL maintainer.email="[email protected]" | ||
|
||
# install dependencies; cleanup apt garbage | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
wget \ | ||
ca-certificates \ | ||
procps \ | ||
unzip && \ | ||
apt-get autoclean && rm -rf /var/lib/apt/lists/* | ||
|
||
#install emmtyper and dependencies | ||
RUN micromamba create -n emmtyper -c conda-forge -c bioconda -c defaults emmtyper=${EMMTYPER_VER} pip && \ | ||
micromamba clean -a -y -f | ||
|
||
# install script for downloading emmtyper database | ||
RUN wget -q https://github.com/Daniel-VM/cdc-utilities/archive/${SCRIPT_HASH}.zip && \ | ||
unzip ${SCRIPT_HASH}.zip && \ | ||
echo '#!/usr/bin/env python3' > /usr/local/bin/emm_download_makedb.py && \ | ||
cat cdc-utilities*/emm_download_makedb.py >> /usr/local/bin/emm_download_makedb.py && \ | ||
rm -rf ${SCRIPT_HASH}.zip cdc-utilities* && \ | ||
chmod +x /usr/local/bin/emm_download_makedb.py | ||
|
||
# set the environment, put new conda env in PATH by default | ||
ENV PATH="/opt/conda/envs/emmtyper/bin:/opt/conda/envs/env/bin:${PATH}" \ | ||
LC_ALL=C.UTF-8 | ||
|
||
RUN pip install --no-cache-dir requests beautifulsoup4 | ||
|
||
CMD emmtyper --help && emm_download_makedb.py -h | ||
|
||
WORKDIR /cdc_emm_database | ||
|
||
# get latest emmtyper database | ||
RUN emm_download_makedb.py \ | ||
--ftp_url 'https://ftp.cdc.gov/' \ | ||
--remote_path 'pub/infectious_diseases/biotech/tsemm/' \ | ||
--local_path /cdc_emm_database | ||
|
||
# create a blast database without a date in the filename | ||
RUN makeblastdb -in /cdc_emm_database/cdc_emm_database*fasta -dbtype nucl -out /cdc_emm_database/cdc_emm | ||
|
||
WORKDIR /data | ||
|
||
FROM app as test | ||
|
||
RUN emmtyper --help && emm_download_makedb.py -h | ||
|
||
RUN wget -q ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/006/785/GCA_000006785.2_ASM678v2/GCA_000006785.2_ASM678v2_genomic.fna.gz && \ | ||
gunzip GCA_000006785.2_ASM678v2_genomic.fna.gz && \ | ||
mv GCA_000006785.2_ASM678v2_genomic.fna test_data.fasta | ||
|
||
RUN emmtyper test_data.fasta && \ | ||
emmtyper -w pcr test_data.fasta -o test_out && \ | ||
head -10 test_out | ||
|
||
# testing new database | ||
RUN emmtyper --blast_db /cdc_emm_database/cdc_emm test_data.fasta -o test3 && \ | ||
head -10 test3 | ||
|
||
RUN emmtyper --version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# emmtyper container | ||
|
||
Main tool : [emmtyper](https://github.com/MDU-PHL/emmtyper) | ||
|
||
Code repository: [emmtyper](https://github.com/MDU-PHL/emmtyper) | ||
|
||
Additional tools: | ||
- [Daniel-VM/cdc-utilities](https://github.com/Daniel-VM/cdc-utilities): c0d1c26625cfe9ac458306089358dc26edad06f0 | ||
|
||
Basic information on how to use this tool: | ||
- executable: emmtyper | ||
- help: --help | ||
- version: --version | ||
- description: | | ||
'emmtyper' is a command line tool for emm-typing of _Streptococcus pyogenes_ using a _de novo_ or complete assembly. | ||
|
||
Additional information: | ||
|
||
This image also contains `emm_download_makedb.py` from https://github.com/Daniel-VM/cdc-utilities for downloading the most-recent fasta file for emm typing. | ||
|
||
Full documentation: https://github.com/MDU-PHL/emmtyper | ||
|
||
## Example Usage | ||
|
||
```bash | ||
# run emmtyper in BLAST (default) mode: | ||
emmtyper sample.fasta -o outfile | ||
|
||
# or with output written in verbose format: | ||
emmtyper sample.fasta -o outfile -f verbose | ||
|
||
# run emmtyper in PCR mode (useful for troubleshooting, see documentation) | ||
emmtyper -w pcr sample.fasta -o outfile | ||
|
||
# downloading a new fasta file for the most-current emm types | ||
emm_download_makedb.py \ | ||
--ftp_url 'https://ftp.cdc.gov/' \ | ||
--remote_path 'pub/infectious_diseases/biotech/tsemm/' \ | ||
--local_path ./out_fasta | ||
|
||
# using the database in the image downloaded via emm_download_makedb.py | ||
emmtyper --blast_db /cdc_emm_database/cdc_emm sample.fasta -o outfile | ||
``` |