diff --git a/README.md b/README.md index a9b69bd3e..de141d594 100644 --- a/README.md +++ b/README.md @@ -236,7 +236,7 @@ To learn more about the docker pull rate limits and the open source software pro | [skani](https://github.com/bluenote-1577/skani)
[![docker pulls](https://badgen.net/docker/pulls/staphb/skani)](https://hub.docker.com/r/staphb/skani) | | https://github.com/bluenote-1577/skani | | [SKESA](https://hub.docker.com/r/staphb/skesa)
[![docker pulls](https://badgen.net/docker/pulls/staphb/skesa)](https://hub.docker.com/r/staphb/skesa) | | https://github.com/ncbi/SKESA | | [Smalt](https://hub.docker.com/r/staphb/smalt)
[![docker pulls](https://badgen.net/docker/pulls/staphb/smalt)](https://hub.docker.com/r/staphb/smalt) | | https://www.sanger.ac.uk/tool/smalt-0/ | -| [snpeff](https://hub.docker.com/r/staphb/snpeff)
[![docker pulls](https://badgen.net/docker/pulls/staphb/snpeff)](https://hub.docker.com/r/staphb/snpeff) | | https://pcingola.github.io/SnpEff | +| [snpeff](https://hub.docker.com/r/staphb/snpeff)
[![docker pulls](https://badgen.net/docker/pulls/staphb/snpeff)](https://hub.docker.com/r/staphb/snpeff) | | https://pcingola.github.io/SnpEff | | [Snippy](https://hub.docker.com/r/staphb/snippy)
[![docker pulls](https://badgen.net/docker/pulls/staphb/snippy)](https://hub.docker.com/r/staphb/snippy) | | https://github.com/tseemann/snippy | | [snp-dists](https://hub.docker.com/r/staphb/snp-dists)
[![docker pulls](https://badgen.net/docker/pulls/staphb/snp-dists)](https://hub.docker.com/r/staphb/snp-dists) | | https://github.com/tseemann/snp-dists | | [SNP-sites](https://hub.docker.com/r/staphb/snp-sites)
[![docker pulls](https://badgen.net/docker/pulls/staphb/snp-sites)](https://hub.docker.com/r/staphb/snp-sites) | | https://github.com/sanger-pathogens/snp-sites | diff --git a/snpeff/5.2a/Dockerfile b/snpeff/5.2a/Dockerfile new file mode 100644 index 000000000..c69ea7989 --- /dev/null +++ b/snpeff/5.2a/Dockerfile @@ -0,0 +1,116 @@ +ARG SNPEFF_VER="5.2a" +ARG SNPEFF_JAR_VER="5.2" +ARG SNPSIFT_VER="5.1" + +## Builder ## +FROM ubuntu:focal as builder + +ARG SNPEFF_VER +ARG SNPEFF_JAR_VER +ARG SNPSIFT_VER + +# Install open using apt +RUN apt-get update && apt-get install -y software-properties-common && \ + apt-get update && apt-get install -y \ + openjdk-11-jre \ + ant \ + maven \ + curl \ + wget && apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Install dependencies and snpEff executables from the source code using Maven. +RUN wget https://github.com/pcingola/SnpEff/archive/refs/tags/v${SNPEFF_VER}.tar.gz && \ + tar -xvf v${SNPEFF_VER}.tar.gz && \ + rm v${SNPEFF_VER}.tar.gz && \ + cd /SnpEff-${SNPEFF_VER}/lib && \ + # install Antlr + mvn install:install-file -Dfile=antlr-4.5.1-complete.jar -DgroupId=org.antlr -DartifactId=antlr -Dversion=4.5.1 -Dpackaging=jar && \ + # install BioJava core + mvn install:install-file -Dfile=biojava3-core-3.0.7.jar -DgroupId=org.biojava -DartifactId=biojava3-core -Dversion=3.0.7 -Dpackaging=jar && \ + # install BioJava structure + mvn install:install-file -Dfile=biojava3-structure-3.0.7.jar -DgroupId=org.biojava -DartifactId=biojava3-structure -Dversion=3.0.7 -Dpackaging=jar && \ + cd /SnpEff-${SNPEFF_VER} && \ + # skipping scripts_build/make.sh and instead putting relevant commands here + mvn clean compile assembly:single jar:jar && \ + cp /SnpEff-${SNPEFF_VER}/target/SnpEff-${SNPEFF_JAR_VER}-jar-with-dependencies.jar /snpEff.jar && \ + # The version for the dependencies matches that of SNPSIFT + mvn install:install-file -Dfile=/SnpEff-${SNPEFF_VER}/target/SnpEff-${SNPEFF_JAR_VER}.jar -DgroupId=org.snpeff -DartifactId=SnpEff -Dversion=${SNPSIFT_VER} -Dpackaging=jar -DgeneratePom=true --quiet && \ + mkdir /data + +# Install dependencies and SnpSift executables from the source code using Maven. +RUN wget https://github.com/pcingola/SnpSift/archive/refs/tags/v${SNPSIFT_VER}.tar.gz && \ + tar -xvf v${SNPSIFT_VER}.tar.gz && \ + rm v${SNPSIFT_VER}.tar.gz && \ + cd /SnpSift-${SNPSIFT_VER} && \ + mvn clean compile assembly:single jar:jar && \ + cp target/SnpSift-${SNPSIFT_VER}-jar-with-dependencies.jar /SnpSift.jar && \ + mvn install:install-file -Dfile=target/SnpSift-${SNPSIFT_VER}.jar -DgroupId=org.snpsift -DartifactId=SnpSift -Dversion=${SNPSIFT_VER} -Dpackaging=jar -DgeneratePom=true + +# Modify java executables and set environment variable $PATH +RUN mv SnpEff-${SNPEFF_VER} snpEff && \ + mv snpEff.jar snpEff && \ + mv SnpSift.jar snpEff && \ + chmod +x /snpEff/snpEff.jar && \ + echo "#!/bin/bash" >> /snpEff/snpeff && \ + chmod +x /snpEff/SnpSift.jar && \ + echo "#!/bin/bash" >> /snpEff/snpsift && \ + echo "exec java -jar /snpEff/snpEff.jar """"$""@"""" " >> /snpEff/snpeff && \ + chmod +x /snpEff/snpeff && \ + echo "exec java -jar /snpEff/SnpSift.jar """"$""@"""" " >> /snpEff/snpsift && \ + chmod +x /snpEff/snpsift + +# Modify scripts to jar location +RUN for file in $(grep -iw "snpEff.jar" /snpEff/scripts/*sh | cut -f 1 -d ":" ) ; do cat $file | sed 's/snpEff.jar/\/snpEff\/snpEff.jar/g' > $file.tmp ; mv $file.tmp $file ; done && \ + for file in $(grep -iw "SnpSift.jar" /snpEff/scripts/*sh | cut -f 1 -d ":" ) ; do cat $file | sed 's/snpEff.jar/\/snpEff\/SnpSift.jar/g' > $file.tmp ; mv $file.tmp $file ; done && \ + chmod +x /snpEff/scripts/*sh + +## App ## +FROM ubuntu:focal as app +ARG SNPEFF_VER +ARG SNPEFF_JAR_VER +ARG SNPSIFT_VER + +# Metadata +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="SnpEff & SnpSift" +LABEL software.version=${SNPEFF_VER} +LABEL snpeff.jarfile.version=${SNPEFF_JAR_VER} +LABEL snpsift.software.version=${SNPSIFT_VER} +LABEL description="Genetic variant annotation and effect prediction toolbox." +LABEL description.SnpSift="Used after SnpEff annotation to filter and manipulate annotated files." +LABEL website="https://pcingola.github.io/SnpEff" +LABEL license="https://github.com/pcingola/SnpEff/blob/master/LICENSE.md" +LABEL maintainer="Tom Iverson" +LABEL maintainer.email="tiverson@utah.gov" + +ARG DEBIAN_FRONTEND=noninteractive + +# perl, python2, and r are required for the scripts +RUN apt-get update && apt-get install --no-install-recommends -y \ + openjdk-11-jre-headless \ + perl \ + r-base \ + python-is-python2 \ + curl \ + && apt-get autoclean && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /snpEff/snpeff /snpEff/snpsift /snpEff/snpEff.jar /snpEff/SnpSift.jar /snpEff/snpEff.config /snpEff/ +COPY --from=builder /snpEff/scripts/ /snpEff/scripts/ + +ENV PATH="${PATH}:/snpEff:/snpEff/scripts" + +CMD snpEff -h + +WORKDIR /data + +# Run test of annotation +FROM app as test +RUN apt-get update && apt-get install -y wget + +COPY test_snpeff_snpsift.sh . + +RUN bash test_snpeff_snpsift.sh + +# from issue https://github.com/StaPH-B/docker-builds/issues/760 +RUN buildDbNcbi.sh CP014866.1 diff --git a/snpeff/5.2a/README.md b/snpeff/5.2a/README.md new file mode 100644 index 000000000..f57f8810c --- /dev/null +++ b/snpeff/5.2a/README.md @@ -0,0 +1,50 @@ +# snpeff container +Main tool: [snpeff](https://pcingola.github.io/SnpEff/) + +Additional tools: +SNPSift version 5.1 + +> Genomic variant annotations, and functional effect prediction toolbox. + +This Dockerfile has made an attempt to allow uses to use the scripts included with SnpEff. Not all the scripts are tested before deployment, so please [submit an issue](https://github.com/StaPH-B/docker-builds/issues) for any use-cases that aren't working with the corresponding error message. + +Command line options: [https://pcingola.github.io/SnpEff/se_commandline/](https://pcingola.github.io/SnpEff/se_commandline/) +# Example Usage +### This example was taken from the test written for this image using human reference genome HG-19 and demo.1kg.vcf. + +```{bash} + +# To view the list of available SnpEff databases that may be used for annotation. +snpeff databases + +# Get annotation vcf file data. +wget -nv https://sourceforge.net/projects/snpeff/files/demo.1kg.vcf -O demo.vcf + +# Inspect the first five lines of the demo.vcf file. +grep -v "^#" demo.vcf | head -5 | tr "\t" "\ " | sed -e "s/.\{75\}/&\n/g" + +# Using SnpEff, download human genome reference hg19. +snpeff download -v hg19 + +# Annotate the demo.vcf file with the hg19 data. +snpeff hg19 demo.vcf > annotated.vcf + +# Inspect the first five lines of the annotated.vcf file. +grep -v "^#" annotated.vcf | head -5 | tr "\t" "\ " | sed -e "s/.\{75\}/&\n/g" + +# SnpSift is a toolbox that allows you to filter and manipulate annotated files. +# Using SnpSift, filter out samples in the annotated.vcf file with quality less than 30. +cat annotated.vcf | snpsift filter "(QUAL>=30)" > filtered.vcf + +# Inspect the first five lines of the filtered.vcf file. +grep -v "^#" filtered.vcf | head -5 | tr "\t" "\ " | sed -e "s/.\{75\}/&\n/g" + +``` + +Additional usage examples are available at [https://pcingola.github.io/SnpEff/examples/](https://pcingola.github.io/SnpEff/examples/) + +Additional SnpEff options and features are available at [https://pcingola.github.io/SnpEff/se_introduction/](https://pcingola.github.io/SnpEff/se_introduction/) + +Additional SnpSift options and features are available at [https://pcingola.github.io/SnpEff/ss_introduction/](https://pcingola.github.io/SnpEff/ss_introduction/) + +SnpEff and SnpSift can perform annotation, primary impact assessment and variants filtering, as well as many other tasks beyond the scope of this protocol. The software developers highly recommend reading their comprehensive documentation available [here](https://pcingola.github.io/SnpEff/adds/VCFannotationformat_v1.0.pdf) diff --git a/snpeff/5.2a/test_snpeff_snpsift.sh b/snpeff/5.2a/test_snpeff_snpsift.sh new file mode 100644 index 000000000..a47b3c619 --- /dev/null +++ b/snpeff/5.2a/test_snpeff_snpsift.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# This script is to test that the program SnpEff runs as expected. + +# Get annotation vcf file data. +wget -nv https://sourceforge.net/projects/snpeff/files/demo.1kg.vcf -O demo.vcf + +# Inspect the first five lines of the demo.vcf file. +echo "*****TAKE A LOOK AT THE FIRST 5 LINES OF THE DEMO.VCF FILE.*****" +grep -v "^#" demo.vcf | head -5 | tr "\t" "\ " | sed -e "s/.\{75\}/&\n/g" + +# Download human genome reference hg19. +snpeff download -v hg19 + +# Annotate the demo.vcf file with the hg19 data. +snpeff hg19 demo.vcf > annotated.vcf +# More SnpEff options and features are available at https://pcingola.github.io/SnpEff/se_introduction/ + +# Inspect the first five lines of the annotated.vcf file. +echo "*****TAKE A LOOK AT THE FIRST 5 LINES OF THE ANNOTATED.VCF FILE.*****" +grep -v "^#" annotated.vcf | head -5 | tr "\t" "\ " | sed -e "s/.\{75\}/&\n/g" + +# SnpSift is a toolbox that allows you to filter and manipulate annotated files. +# Using SnpSift, filter out samples in the annotated.vcf file with quality less than 30. +cat annotated.vcf | snpsift filter "(QUAL>=30)" > filtered.vcf +# More SnpSift options and features are available at https://pcingola.github.io/SnpEff/ss_introduction/ + +# Inspect the first five lines of the filtered.vcf file. +echo "*****TAKE A LOOK AT THE FIRST 5 LINES OF THE FILTERED.VCF FILE.*****" +grep -v "^#" filtered.vcf | head -5 | tr "\t" "\ " | sed -e "s/.\{75\}/&\n/g" + +echo "*****TEST IS COMPLETE.*****" + +