-
Notifications
You must be signed in to change notification settings - Fork 126
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
36 changed files
with
1,973 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
ARG ARTIC_VER=1.2.4 | ||
|
||
FROM mambaorg/micromamba:1.4.9 as app | ||
|
||
ARG ARTIC_VER | ||
ARG MEDAKA_VER=1.11.1 | ||
|
||
LABEL base.image="ubuntu:jammy" | ||
LABEL dockerfile.version="1" | ||
LABEL software="artic" | ||
LABEL software.version="${ARTIC_VER}" | ||
LABEL software1="medaka" | ||
LABEL software1.version="${MEDAKA_VER}" | ||
LABEL description="A bioinformatics pipeline for working with virus sequencing data sequenced with nanopore" | ||
LABEL website="https://github.com/artic-network/fieldbioinformatics" | ||
LABEL license="https://github.com/artic-network/fieldbioinformatics/blob/master/LICENSE" | ||
LABEL sop="https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html" | ||
LABEL maintainer="Erin Young" | ||
LABEL maintainer.email="[email protected]" | ||
|
||
USER root | ||
WORKDIR / | ||
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
wget \ | ||
ca-certificates \ | ||
procps \ | ||
gcc \ | ||
make \ | ||
pkg-config \ | ||
zlib1g-dev \ | ||
libbz2-dev \ | ||
liblzma-dev \ | ||
libcurl4-gnutls-dev \ | ||
libssl-dev \ | ||
python3-dev \ | ||
python3-pip \ | ||
python-is-python3 && \ | ||
apt-get autoclean && rm -rf /var/lib/apt/lists/* | ||
|
||
RUN wget -q https://github.com/artic-network/fieldbioinformatics/archive/refs/tags/v${ARTIC_VER}.tar.gz && \ | ||
tar -xzf v${ARTIC_VER}.tar.gz && \ | ||
micromamba env create -y -f /fieldbioinformatics-${ARTIC_VER}/environment.yml && \ | ||
rm v${ARTIC_VER}.tar.gz && \ | ||
cd fieldbioinformatics-${ARTIC_VER} && \ | ||
python setup.py install && \ | ||
artic -v && \ | ||
mkdir /data | ||
|
||
ENV ENV_NAME="artic" | ||
ARG MAMBA_DOCKERFILE_ACTIVATE=1 | ||
|
||
RUN /opt/conda/envs/artic/bin/pip install setuptools wheel cython medaka==${MEDAKA_VER} && \ | ||
medaka --version && \ | ||
/opt/conda/envs/artic/bin/pip install pyabpoa==1.2.4 | ||
|
||
ENV PATH="${PATH}:/opt/conda/envs/artic/bin/" \ | ||
LC_ALL=C.UTF-8 | ||
|
||
CMD artic --help | ||
|
||
WORKDIR /data | ||
|
||
##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### | ||
##### Step 2. Set up the testing stage. ##### | ||
##### The docker image is built to the 'test' stage before merging, but ##### | ||
##### the test stage (or any stage after 'app') will be lost. ##### | ||
##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### | ||
|
||
# A second FROM insruction creates a new stage | ||
# new base for testing | ||
FROM app as test | ||
ARG ARTIC_VER | ||
|
||
# print help and version info; check dependencies (not all software has these options available) | ||
# Mostly this ensures the tool of choice is in path and is executable | ||
RUN artic --help && \ | ||
artic --version | ||
|
||
# listing available models | ||
RUN medaka tools list\_models | ||
|
||
# set working directory so that all test inputs & outputs are kept in /test | ||
WORKDIR /fieldbioinformatics-${ARTIC_VER} | ||
|
||
# test that came with artic | ||
RUN bash ./test-runner.sh medaka && bash ./test-runner.sh nanopolish | ||
|
||
WORKDIR /test | ||
|
||
# using on "real" data (sample files were not sequenced with version 5.3.2 primers) | ||
RUN wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR224/050/SRR22452250/SRR22452250_1.fastq.gz && \ | ||
artic guppyplex --min-length 400 --max-length 700 --directory . --prefix SRR22452250_1.fastq.gz --output SRR22452250_1_filtered.fastq.gz && \ | ||
mkdir -p dir/name/V5 && \ | ||
wget -q https://raw.githubusercontent.com/artic-network/primer-schemes/master/nCoV-2019/V5.3.2/SARS-CoV-2.primer.bed -O dir/name/V5/name.primer.bed && \ | ||
wget -q https://raw.githubusercontent.com/artic-network/primer-schemes/master/nCoV-2019/V5.3.2/SARS-CoV-2.reference.fasta -O dir/name/V5/name.reference.fasta && \ | ||
wget -q https://raw.githubusercontent.com/artic-network/primer-schemes/master/nCoV-2019/V5.3.2/SARS-CoV-2.scheme.bed -O dir/name/V5/name.scheme.bed && \ | ||
samtools faidx dir/name/V5/name.reference.fasta && \ | ||
artic minion --normalise 200 --skip-nanopolish --medaka --medaka-model r941_min_high_g360 --threads 4 --read-file SRR22452250_1_filtered.fastq.gz --scheme-directory ./dir --scheme-version 5 name final && \ | ||
ls final* | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# artic fieldbioinformatics container | ||
|
||
Main tool : [artic](https://github.com/artic-network/fieldbioinformatics) | ||
|
||
Additional tools: | ||
|
||
- medaka=1.11.1 | ||
|
||
Full documentation: [https://github.com/artic-network/fieldbioinformatics](https://github.com/artic-network/fieldbioinformatics) | ||
|
||
There is also a very useful SOP: [https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html](https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html) | ||
And additional documentation: [https://artic.readthedocs.io/en/latest/](https://artic.readthedocs.io/en/latest/) | ||
|
||
> A bioinformatics pipeline for working with virus sequencing data sequenced with nanopore. | ||
WARNING : This container does not contain the primer schemes found at [https://github.com/artic-network/primer-schemes](https://github.com/artic-network/primer-schemes). Those will have to be downloaded and mounted separately. | ||
|
||
## Example Usage with the artic primers | ||
|
||
```bash | ||
# get primers | ||
git clone https://github.com/artic-network/primer-schemes | ||
|
||
# download reads for example | ||
wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR224/050/SRR22452250/SRR22452250_1.fastq.gz | ||
|
||
# read filtering | ||
docker run -v $(pwd):/data staphb/artic:latest artic guppyplex --min-length 400 --max-length 700 --directory . --prefix SRR22452250_1.fastq.gz --output SRR22452250_1_filtered.fastq.gz | ||
|
||
# running the artic minion workflow with medaka | ||
docker run -v $(pwd):/data staphb/artic:latest artic minion --normalise 200 --skip-nanopolish --medaka --medaka-model r941_min_high_g360 --threads 4 --read-file SRR22452250_1_filtered.fastq.gz --scheme-directory primer-schemes --scheme-version 5.3.2 nCoV-2019 test | ||
|
||
# the result files will all start with test* | ||
``` | ||
|
||
In general, any primer scheme can be used as long as it meeds [artic's requirments](https://github.com/artic-network/primer-schemes). | ||
|
||
This is the recommended directory structure with corresponding files: | ||
|
||
```bash | ||
${diretory}/${name}/V${version}/${name}.primer.bed | ||
${diretory}/${name}/V${version}/${name}.scheme.bed | ||
${diretory}/${name}/V${version}/${name}.reference.fasta | ||
${diretory}/${name}/V${version}/${name}.reference.fasta.fai | ||
``` | ||
|
||
The command to use this primer scheme would be | ||
|
||
```bash | ||
artic minion --normalise 200 --skip-nanopolish --medaka --medaka-model r941_min_high_g360 --threads 4 --read-file input.fastq.gz --scheme-directory ${directory} --scheme-version ${version} ${name} outputprefix | ||
``` | ||
|
||
Different primer schemes can be validated via artic-tools (already in PATH) via | ||
|
||
```bash | ||
artic-tools validate_scheme ${basename}.primer.bed --outputInserts ${basename}.insert.bed | ||
``` | ||
|
||
## Medaka models | ||
|
||
Medaka updates frequently, and artic can throw errors when corresponding ONT models are not found. | ||
|
||
These are the medaka models in this image: | ||
``` | ||
Available: r103_fast_g507, r103_fast_snp_g507, r103_fast_variant_g507, r103_hac_g507, r103_hac_snp_g507, r103_hac_variant_g507, r103_min_high_g345, r103_min_high_g360, r103_prom_high_g360, r103_prom_snp_g3210, r103_prom_variant_g3210, r103_sup_g507, r103_sup_snp_g507, r103_sup_variant_g507, r1041_e82_260bps_fast_g632, r1041_e82_260bps_fast_variant_g632, r1041_e82_260bps_hac_g632, r1041_e82_260bps_hac_v4.0.0, r1041_e82_260bps_hac_v4.1.0, r1041_e82_260bps_hac_variant_g632, r1041_e82_260bps_hac_variant_v4.1.0, r1041_e82_260bps_sup_g632, r1041_e82_260bps_sup_v4.0.0, r1041_e82_260bps_sup_v4.1.0, r1041_e82_260bps_sup_variant_g632, r1041_e82_260bps_sup_variant_v4.1.0, r1041_e82_400bps_fast_g615, r1041_e82_400bps_fast_g632, r1041_e82_400bps_fast_variant_g615, r1041_e82_400bps_fast_variant_g632, r1041_e82_400bps_hac_g615, r1041_e82_400bps_hac_g632, r1041_e82_400bps_hac_v4.0.0, r1041_e82_400bps_hac_v4.1.0, r1041_e82_400bps_hac_v4.2.0, r1041_e82_400bps_hac_variant_g615, r1041_e82_400bps_hac_variant_g632, r1041_e82_400bps_hac_variant_v4.1.0, r1041_e82_400bps_hac_variant_v4.2.0, r1041_e82_400bps_sup_g615, r1041_e82_400bps_sup_v4.0.0, r1041_e82_400bps_sup_v4.1.0, r1041_e82_400bps_sup_v4.2.0, r1041_e82_400bps_sup_variant_g615, r1041_e82_400bps_sup_variant_v4.1.0, r1041_e82_400bps_sup_variant_v4.2.0, r104_e81_fast_g5015, r104_e81_fast_variant_g5015, r104_e81_hac_g5015, r104_e81_hac_variant_g5015, r104_e81_sup_g5015, r104_e81_sup_g610, r104_e81_sup_variant_g610, r10_min_high_g303, r10_min_high_g340, r941_e81_fast_g514, r941_e81_fast_variant_g514, r941_e81_hac_g514, r941_e81_hac_variant_g514, r941_e81_sup_g514, r941_e81_sup_variant_g514, r941_min_fast_g303, r941_min_fast_g507, r941_min_fast_snp_g507, r941_min_fast_variant_g507, r941_min_hac_g507, r941_min_hac_snp_g507, r941_min_hac_variant_g507, r941_min_high_g303, r941_min_high_g330, r941_min_high_g340_rle, r941_min_high_g344, r941_min_high_g351, r941_min_high_g360, r941_min_sup_g507, r941_min_sup_snp_g507, r941_min_sup_variant_g507, r941_prom_fast_g303, r941_prom_fast_g507, r941_prom_fast_snp_g507, r941_prom_fast_variant_g507, r941_prom_hac_g507, r941_prom_hac_snp_g507, r941_prom_hac_variant_g507, r941_prom_high_g303, r941_prom_high_g330, r941_prom_high_g344, r941_prom_high_g360, r941_prom_high_g4011, r941_prom_snp_g303, r941_prom_snp_g322, r941_prom_snp_g360, r941_prom_sup_g507, r941_prom_sup_snp_g507, r941_prom_sup_variant_g507, r941_prom_variant_g303, r941_prom_variant_g322, r941_prom_variant_g360, r941_sup_plant_g610, r941_sup_plant_variant_g610 | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
FROM ubuntu:focal as app | ||
|
||
ARG BLAST_VER="2.14.1" | ||
|
||
# LABEL instructions tag the image with metadata that might be important to the user | ||
LABEL base.image="ubuntu:focal" | ||
LABEL dockerfile.version="1" | ||
LABEL software="blast+" | ||
LABEL software.version=$BLAST_VER | ||
LABEL description="Finds matches in sequencing reads" | ||
LABEL website="https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download" | ||
LABEL license="https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE" | ||
LABEL maintainer="Erin Young" | ||
LABEL maintainer.email="[email protected]" | ||
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
wget \ | ||
ca-certificates \ | ||
libgomp1 && \ | ||
apt-get autoclean && rm -rf /var/lib/apt/lists/* | ||
|
||
# Install and/or setup more things. Make /data for use as a working dir | ||
RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ | ||
tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ | ||
rm ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ | ||
mkdir /data | ||
|
||
# ENV instructions set environment variables that persist from the build into the resulting image | ||
# Use for e.g. $PATH and locale settings for compatibility with Singularity | ||
ENV PATH="/ncbi-blast-${BLAST_VER}+/bin:$PATH" \ | ||
LC_ALL=C | ||
|
||
# WORKDIR sets working directory | ||
WORKDIR /data | ||
|
||
# default command is to pull up help options for virulencefinder | ||
# yes, there are more tools than blastn, but it's likely the most common one used | ||
CMD [ "blastn", "-help" ] | ||
|
||
|
||
|
||
# A second FROM insruction creates a new stage | ||
# We use `test` for the test image | ||
FROM app as test | ||
|
||
# getting all the exectubles in bin | ||
RUN ls /ncbi-blast-*/bin/ | ||
|
||
# getting a genome | ||
RUN mkdir db && \ | ||
wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz -P db && \ | ||
gunzip db/GCF_000005845.2_ASM584v2_genomic.fna.gz && \ | ||
makeblastdb -dbtype nucl -in db/GCF_000005845.2_ASM584v2_genomic.fna | ||
|
||
# getting a list of genes | ||
RUN wget https://raw.githubusercontent.com/rrwick/Unicycler/main/unicycler/gene_data/dnaA.fasta | ||
|
||
# getting some blast results | ||
RUN tblastn -query dnaA.fasta \ | ||
-db db/GCF_000005845.2_ASM584v2_genomic.fna \ | ||
-outfmt '6' \ | ||
-out blast_hits.txt && \ | ||
head blast_hits.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# blast+ container | ||
|
||
Main tools: | ||
|
||
- [blast+](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download) | ||
|
||
This is meant to assist in local blast searches. No blast databases will be maintained in this container. Be sure to mount your relevant Volumes with `--volumes` or `-v` when using the command line. | ||
|
||
blast+ is actually a suite of tools. blast+ v.2.14.1 includes: | ||
|
||
```bash | ||
$ ls /ncbi-blast-2.14.1+/bin | ||
blast_formatter | ||
blast_formatter_vdb | ||
blast_vdb_cmd | ||
blastdb_aliastool | ||
blastdbcheck | ||
blastdbcmd | ||
blastn | ||
blastn_vdb | ||
blastp | ||
blastx | ||
cleanup-blastdb-volumes.py | ||
convert2blastmask | ||
deltablast | ||
dustmasker | ||
get_species_taxids.sh | ||
legacy_blast.pl | ||
makeblastdb | ||
makembindex | ||
makeprofiledb | ||
psiblast | ||
rpsblast | ||
rpstblastn | ||
segmasker | ||
tblastn | ||
tblastn_vdb | ||
tblastx | ||
update_blastdb.pl | ||
windowmasker | ||
``` | ||
|
||
Currently not supported, but could be: | ||
|
||
```bash | ||
get_species_taxids.sh # requires E-direct | ||
update_blastdb.pl # requires perl | ||
``` | ||
|
||
## Example Usage | ||
|
||
```bash | ||
# making a blast database | ||
makeblastdb -dbtype nucl -in fasta.fa | ||
|
||
# query | ||
tblastn -query query.fasta -db fasta.fa -outfmt '6' -out blast_hits.txt | ||
``` | ||
|
||
More documentation can be found at [https://www.ncbi.nlm.nih.gov/books/NBK569856/](https://www.ncbi.nlm.nih.gov/books/NBK569856/) and [https://www.ncbi.nlm.nih.gov/books/NBK279690/](https://www.ncbi.nlm.nih.gov/books/NBK279690/) |
Oops, something went wrong.