forked from StaPH-B/docker-builds
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add virulencefinder 2.0.4 (StaPH-B#669)
* added README.md for virulencefinder 2.0.4 * added dockerfile for virulencefinder 2.0.4. builds successfully but needs some minor improvements * added virulencefinder to main README.md and Program_Licenses.md * updated LABEL; slightly adjusted virulencefinder test stage layer * added a CMD layer at end of app stage for virulencefinder dockerfile --------- Co-authored-by: kapsakcj <[email protected]>
- Loading branch information
1 parent
83df83f
commit 41e5882
Showing
4 changed files
with
226 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
ARG VIRULENCEFINDER_VER="2.0.1" | ||
# Database not properly versioned, so using most recent commit made on 2023-05-03 | ||
# see here: https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/f678bdc15283aed3a45f66050d2eb3a6c9651f3f | ||
ARG VIRULENCEFINDER_DB_COMMIT_HASH="f678bdc15283aed3a45f66050d2eb3a6c9651f3f" | ||
|
||
FROM ubuntu:focal as app | ||
|
||
# re-instantiating for use in the app layer | ||
ARG VIRULENCEFINDER_VER | ||
ARG VIRULENCEFINDER_DB_COMMIT_HASH | ||
|
||
# metadata | ||
LABEL base.image="ubuntu:focal" | ||
LABEL dockerfile.version="1" | ||
LABEL software="VirulenceFinder" | ||
LABEL software.version="${VIRULENCEFINDER_VER}" | ||
LABEL description="Tool for identifying the virulence genes in E. coli, Enterococcus, Staphylococcus aureus, & Listeria from reads or assemblies" | ||
LABEL website="https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/" | ||
LABEL license="https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/" | ||
LABEL maintainer="Curtis Kapsak" | ||
LABEL maintainer.email="[email protected]" | ||
|
||
# install dependencies; cleanup apt garbage | ||
# ncbi-blast+ v2.9.0 (ubuntu:focal), min required version is 2.8.1 | ||
# python3 v3.8.10, min required version is 3.5 | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
wget \ | ||
ca-certificates \ | ||
procps \ | ||
git \ | ||
ncbi-blast+ \ | ||
python3 \ | ||
python3-pip \ | ||
python3-setuptools \ | ||
python3-dev \ | ||
gcc \ | ||
make \ | ||
libz-dev \ | ||
dos2unix \ | ||
unzip && \ | ||
apt-get autoclean && rm -rf /var/lib/apt/lists/* | ||
|
||
# install python dependencies | ||
RUN pip3 install biopython==1.73 tabulate==0.7.7 cgecore==1.5.5 | ||
|
||
# Install kma | ||
# apt deps: libz-dev (for compiling) | ||
RUN git clone --branch 1.0.1 --depth 1 https://bitbucket.org/genomicepidemiology/kma.git && \ | ||
cd kma && \ | ||
make && \ | ||
mv -v kma* /usr/local/bin/ | ||
|
||
# download VIRULENCEFINDER database using a specific commit hash to aid in reproducibility | ||
# index database w/ kma | ||
# NOTE: files HAVE to go into '/database' since that is the default location expected by serotyperfinder.py | ||
# dos2unix on the FASTA files to ensure they have LF line endings | ||
RUN mkdir /database && \ | ||
git clone https://bitbucket.org/genomicepidemiology/virulencefinder_db.git /database && \ | ||
cd /database && \ | ||
git checkout ${VIRULENCEFINDER_DB_COMMIT_HASH} && \ | ||
dos2unix *.fsa && \ | ||
python3 INSTALL.py kma_index | ||
|
||
# install virulencefinder to specific tag/version; make /data | ||
RUN git clone --branch ${VIRULENCEFINDER_VER} https://bitbucket.org/genomicepidemiology/virulencefinder.git && \ | ||
mkdir /data | ||
|
||
# set $PATH and locale settings for singularity compatibility | ||
ENV PATH="/virulencefinder:${PATH}" \ | ||
LC_ALL=C.UTF-8 | ||
|
||
# set final working directory for production docker image (app layer only) | ||
WORKDIR /data | ||
|
||
# default command is to pull up help options for virulencefinder | ||
CMD [ "virulencefinder.py", "-h"] | ||
|
||
### START OF TEST STAGE ### | ||
FROM app as test | ||
|
||
# set working directory for test layer | ||
WORKDIR /test | ||
|
||
# download an example assembly; test with VirulenceFinder | ||
# Escherichia coli complete genome (Unicycler assembly) | ||
# GenBank Nucleotide entry: https://www.ncbi.nlm.nih.gov/nuccore/CP113091.1/ | ||
# BioSample:SAMN08799860 | ||
RUN mkdir -v /test/asm-input && \ | ||
wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/224/845/GCA_012224845.2_ASM1222484v2/GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ | ||
gunzip GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ | ||
virulencefinder.py -i /test/GCA_012224845.2_ASM1222484v2_genomic.fna -x -o /test/asm-input && \ | ||
cat /test/asm-input/results_tab.tsv | ||
|
||
# download Illumina reads for the same sample ^ and test reads as input into VirulenceFinder | ||
RUN mkdir /test/reads-input && \ | ||
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_1.fastq.gz && \ | ||
virulencefinder.py -i SRR6903006_1.fastq.gz -mp kma -x -o /test/reads-input && \ | ||
cat /test/reads-input/results_tab.tsv | ||
|
||
# test using FASTA supplied with VirulenceFinder code; print help options | ||
# expect to see hits to astA and 2 stx genes; unfortunately it finds astA and 3 stx genes (that don't match) | ||
# issue created here: https://bitbucket.org/genomicepidemiology/virulencefinder/issues/11/test-results-do-not-match-expected-results | ||
RUN cd /virulencefinder/test && \ | ||
virulencefinder.py -i test.fsa -o . -mp blastn -x -q && \ | ||
virulencefinder.py --help |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
# VirulenceFinder Docker Image | ||
|
||
A docker image that contains VirulenceFinder, a tool for identifying virulence factors in E. coli isolates from reads or assemblies | ||
|
||
[Link to StaPH-B DockerHub repository](https://hub.docker.com/r/staphb/virulencefinder) | ||
|
||
Main tool: | ||
|
||
- Main Code Repo: [https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/](https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/) | ||
- VirulenceFinder database: [https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/](https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/) | ||
- You may be familiar with the web version of VirulenceFinder: [https://cge.food.dtu.dk/services/VirulenceFinder/](https://cge.food.dtu.dk/services/VirulenceFinder/) | ||
|
||
Additional tools: | ||
|
||
- python 3.8.10 | ||
- biopython 1.73 | ||
- [kma](https://bitbucket.org/genomicepidemiology/kma/src/master/) 1.0.0 | ||
- ncbi-blast+ 2.9.0 | ||
|
||
## Version information | ||
|
||
VirulenceFinder version: 2.0.4 [https://bitbucket.org/genomicepidemiology/virulencefinder/src/2.0.4/](https://bitbucket.org/genomicepidemiology/virulencefinder/src/2.0.4/) made on 2020-02-06 | ||
|
||
VirulenceFinder database version: commit `f678bdc15283aed3a45f66050d2eb3a6c9651f3f` made on 2023‑05‑03. [Link to commit history](https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/) | ||
|
||
## Requirements | ||
|
||
- Docker or Singularity | ||
- E. coli raw reads (fastq.gz) or assembly (fasta) | ||
- Illumina, Ion Torrent, Roche 454, SOLiD, Oxford Nanopore, and PacBio reads are supported. (I've only tested Illumina reads) | ||
|
||
## Usage | ||
|
||
```bash | ||
usage: virulencefinder.py [-h] -i INFILE [INFILE ...] [-o OUTDIR] [-tmp TMP_DIR] [-mp METHOD_PATH] [-p DB_PATH] [-d DATABASES] [-l MIN_COV] [-t THRESHOLD] [-x] [-q] | ||
|
||
optional arguments: | ||
-h, --help show this help message and exit | ||
-i INFILE [INFILE ...], --infile INFILE [INFILE ...] | ||
FASTA or FASTQ input files. | ||
-o OUTDIR, --outputPath OUTDIR | ||
Path to blast output | ||
-tmp TMP_DIR, --tmp_dir TMP_DIR | ||
Temporary directory for storage of the results from the external software. | ||
-mp METHOD_PATH, --methodPath METHOD_PATH | ||
Path to method to use (kma or blastn) | ||
-p DB_PATH, --databasePath DB_PATH | ||
Path to the databases | ||
-d DATABASES, --databases DATABASES | ||
Databases chosen to search in - if non is specified all is used | ||
-l MIN_COV, --mincov MIN_COV | ||
Minimum coverage | ||
-t THRESHOLD, --threshold THRESHOLD | ||
Minimum threshold for identity | ||
-x, --extented_output | ||
Give extented output with allignment files, template and query hits in fasta and a tab seperated file with gene profile results | ||
-q, --quiet | ||
``` | ||
## Notes and Recommendations | ||
- You do not need to supply a database or use the `-p` or `-d` flags | ||
- Database is included in the image and is in the default/expected location within the image filesystem: `/database` | ||
- (*NOT RECOMMENDED*) If you do need to use your own database, you will need to first index it with `kma` and use the `virulencefinder.py -p` flag. You can find instructions for this on the VirulenceFinder Bitbucket README. `kma` is included in this docker image for database indexing. | ||
- VirulenceFinder does **NOT** create an output directory when you use the `-o` flag. You MUST create it beforehand or it will throw an error. | ||
- **Default % Identity threshold: 90%**. Adjust with `-t 0.95` | ||
- **Default % coverage threshold: 60%**. Adjust with `-l 0.70` | ||
- Use the `-x` flag (extended output) if you want the traditional/legacy VirulenceFinder output files `results_tab.tsv results.txt Virulence_genes.fsa Hit_in_genome_seq.fsa`. Otherwise you will need to parse the default output file `data.json` for results | ||
- (*RECOMMENDED*) Use raw reads due to the increased sensitivity (without loss of specificity) and the additional information gleaned from KMA output (specifically the depth metric). You also save time from having to assemble the genome first. [CITATION NEEDED, PROBABLY THE KMA PAPER] | ||
- Querying reads: | ||
- This will run VirulenceFinder with `kma` (instead of ncbi-blast+) | ||
- Only one of the PE read files is necessary. There is likely little benefit to using both R1 and R2. It will take longer to run if you use both R1 and R2 files. | ||
- Querying assemblies: | ||
- This will run VirulenceFinder with `ncbi-blast+` | ||
- VirulenceFinder does not clean up after itself. `tmp/` (which contains 7 different `.xml` files) will exist in the specified output directory | ||
## Example Usage: Docker | ||
```bash | ||
# download the image | ||
$ docker pull staphb/virulencefinder:2.0.4 | ||
# input files are in my PWD | ||
$ ls | ||
E-coli.skesa.fasta E-coli.R1.fastq.gz E-coli.R2.fastq.gz | ||
# make an output directory | ||
$ mkdir output-dir-reads output-dir-asm | ||
# query reads, mount PWD to /data inside container (broken into two lines for readabilty) | ||
$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:2.0.1 \ | ||
virulencefinder.py -i /data/E-coli.R1.fastq.gz -o /data/output-dir-reads | ||
# query assembly | ||
$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:2.0.1 \ | ||
virulencefinder.py -i /data/E-coli.skesa.fasta -o /data/output-dir-asm | ||
``` | ||
## Example Usage: Singularity | ||
```bash | ||
# download the image | ||
$ singularity build virulencefinder.2.0.4.sif docker://staphb/virulencefinder:2.0.4 | ||
# files are in my PWD | ||
$ ls | ||
E-coli.skesa.fasta E-coli.R1.fastq.gz E-coli.R2.fastq.gz | ||
# make an output directory | ||
$ mkdir output-dir-reads output-dir-asm | ||
# query reads; mount PWD to /data inside container | ||
$ singularity exec --no-home -B $PWD:/data virulencefinder.2.0.4.sif \ | ||
virulencefinder.py -i /data/E-coli.R1.fastq.gz -o /data/output-dir-reads | ||
# assembly | ||
$ singularity exec --no-home -B $PWD:/data virulencefinder.2.0.4.sif \ | ||
virulencefinder.py -i /data/E-coli.skesa.fasta -o /data/output-dir-asm | ||
``` |