From 4eeb635070c9e67323ad531cddc118ce02df49d0 Mon Sep 17 00:00:00 2001 From: Alex Petty Date: Wed, 21 Aug 2024 12:56:58 -0400 Subject: [PATCH] Link htslib to libdeflate, improving performance. (#1022) --- htslib/1.20.c/Dockerfile | 97 ++++++++++++++++++++++++++++++++++++++++ htslib/1.20.c/README.md | 26 +++++++++++ 2 files changed, 123 insertions(+) create mode 100644 htslib/1.20.c/Dockerfile create mode 100644 htslib/1.20.c/README.md diff --git a/htslib/1.20.c/Dockerfile b/htslib/1.20.c/Dockerfile new file mode 100644 index 000000000..bc1b1d8ed --- /dev/null +++ b/htslib/1.20.c/Dockerfile @@ -0,0 +1,97 @@ +# for easy upgrade later. ARG variables only persist during build time +ARG HTSLIB_VER="1.20" + +FROM ubuntu:jammy as builder + +ARG HTSLIB_VER + +# install dependencies, cleanup apt garbage +# It's helpful when they're all listed on https://github.com/samtools/htslib/blob/develop/INSTALL +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + ca-certificates \ + make \ + bzip2 \ + autoconf \ + automake \ + make \ + gcc \ + perl \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + libssl-dev \ + libdeflate-dev \ + procps && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# get htslib, compile, install, run test suite +RUN wget -q https://github.com/samtools/htslib/releases/download/${HTSLIB_VER}/htslib-${HTSLIB_VER}.tar.bz2 && \ + tar -vxjf htslib-${HTSLIB_VER}.tar.bz2 && \ + rm -v htslib-${HTSLIB_VER}.tar.bz2 && \ + cd htslib-${HTSLIB_VER} && \ + ./configure && \ + make && \ + make install && \ + make test + +### start of app stage ### +FROM ubuntu:jammy as app + +ARG HTSLIB_VER + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="htslib" +LABEL software.version="${HTSLIB_VER}" +LABEL description="A C library for reading/writing high-throughput sequencing data" +LABEL website="https://github.com/samtools/htslib" +LABEL license="https://github.com/samtools/htslib/blob/develop/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +# install runtime dependencies & cleanup apt garbage +# installed as recommend here: https://github.com/samtools/htslib/blob/develop/INSTALL#L31 +RUN apt-get update && apt-get install --no-install-recommends -y \ + bzip2 \ + zlib1g \ + libbz2-1.0 \ + liblzma5 \ + libcurl3-gnutls \ + libdeflate0 \ + ca-certificates \ + && apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# copy in htslib executables from builder stage +COPY --from=builder /usr/local/bin/* /usr/local/bin/ +COPY --from=builder /usr/local/lib/ /usr/local/lib/ +COPY --from=builder /usr/local/include/ /usr/local/include/ + +# set locale settings for singularity compatibility +ENV LC_ALL=C + +# set working directory +WORKDIR /data + +# default command is to show help options +CMD ["htsfile", "--help"] + +### start of test stage ### +FROM app as test + +# check that these three executables are available +RUN bgzip --help && tabix --help && htsfile --help + +RUN apt-get update && apt-get install --no-install-recommends -y wget + +# use on actual files +RUN wget -q https://github.com/StaPH-B/docker-builds/raw/master/tests/SARS-CoV-2/SRR13957123_1.fastq.gz && \ + gunzip SRR13957123_1.fastq.gz && \ + bgzip SRR13957123_1.fastq + +# FYI Test suite "make test" now performed in the builder stage since app and +# test stages do not include htslib source code. +# This is to avoid having to re-download source code simply to run test suite \ No newline at end of file diff --git a/htslib/1.20.c/README.md b/htslib/1.20.c/README.md new file mode 100644 index 000000000..7d03b6333 --- /dev/null +++ b/htslib/1.20.c/README.md @@ -0,0 +1,26 @@ +# htslib container + +Main tool: + +* [https://www.htslib.org/](https://www.htslib.org/) +* [GitHub](https://github.com/samtools/htslib) + +Additional tools: + +* perl 5.34.0 + +## Example Usage + +```bash +# determine file formats for various BAM and SAM files +$ htsfile tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam +tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam: BAM version 1 compressed sequence data + +$ htsfile ce_tag_padded.sam +ce_tag_padded.sam: SAM version 1.4 sequence text + +# compresses sample.fastq to sample.fastq.gz in BGZF format (blocked GNU Zip Format) +$ bgzip sample.fastq +``` + +Better documentation can be found at [https://www.htslib.org/doc/samtools.html](https://www.htslib.org/doc/samtools.html)