-
Notifications
You must be signed in to change notification settings - Fork 125
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* adding rasusa version 2.1.0 * gnu
- Loading branch information
Showing
3 changed files
with
87 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
FROM ubuntu:jammy as app | ||
|
||
ARG RASUSA_VER="2.1.0" | ||
|
||
LABEL base.image="ubuntu:jammy" | ||
LABEL dockerfile.version="1" | ||
LABEL software="rasusa" | ||
LABEL software.version="${RASUSA_VER}" | ||
LABEL description="Randomly subsample sequencing reads to a specified coverage." | ||
LABEL website="https://github.com/mbhall88/rasusa" | ||
LABEL license="https://github.com/mbhall88/rasusa/blob/master/LICENSE" | ||
LABEL maintainer="Jake Garfin" | ||
LABEL maintainer.email="[email protected]" | ||
LABEL maintainer2="Curtis Kapsak" | ||
LABEL maintainer2.email="[email protected]" | ||
LABEL maintainer3="Erin Young" | ||
LABEL maintainer3.email="[email protected]" | ||
|
||
# install wget and cleanup apt garbage | ||
RUN apt-get update && apt-get -y install --no-install-recommends \ | ||
wget \ | ||
ca-certificates \ | ||
procps && \ | ||
apt-get autoclean && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# install rasusa | ||
RUN wget -q https://github.com/mbhall88/rasusa/releases/download/${RASUSA_VER}/rasusa-${RASUSA_VER}-x86_64-unknown-linux-gnu.tar.gz && \ | ||
tar -xvf rasusa-${RASUSA_VER}-x86_64-unknown-linux-gnu.tar.gz && \ | ||
rm -rf rasusa-${RASUSA_VER}-x86_64-unknown-linux-gnu.tar.gz && \ | ||
mkdir /data | ||
|
||
# set PATH and perl locale settings | ||
ENV PATH="${PATH}:/rasusa-${RASUSA_VER}-x86_64-unknown-linux-gnu/" \ | ||
LC_ALL=C | ||
|
||
WORKDIR /data | ||
|
||
CMD rasusa --help | ||
|
||
FROM app as test | ||
|
||
# print version and help options | ||
RUN rasusa --version && rasusa --help | ||
|
||
# cannot run rust-based test, since rust isn't installed, but just testing basic command on SC2 reads in our repo | ||
# inputs FASTQs each have 80867 reads | ||
# output FASTQs should have 40434 reads in each (about 1/2 of input) | ||
# -s 1 is for setting a specific seed | ||
# -O is for gzip compression on output FASTQs | ||
RUN wget -q https://github.com/StaPH-B/docker-builds/raw/master/tests/SARS-CoV-2/SRR13957123_1.fastq.gz && \ | ||
wget -q https://github.com/StaPH-B/docker-builds/raw/master/tests/SARS-CoV-2/SRR13957123_2.fastq.gz && \ | ||
rasusa reads --num 40434 -s 1 -O g -o SRR13957123_downsampled_1.fastq.gz -o SRR13957123_downsampled_2.fastq.gz SRR13957123_1.fastq.gz SRR13957123_2.fastq.gz && \ | ||
ls -lh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# Rasusa container | ||
|
||
Main tool : [Rasusa](https://github.com/mbhall88/rasusa) | ||
|
||
Additional tools: | ||
|
||
- none | ||
|
||
Full documentation: https://github.com/mbhall88/rasusa | ||
|
||
Randomly subsample sequencing reads to a specified coverage | ||
|
||
## Example Usage | ||
|
||
```bash | ||
# sars-cov-2 example, paired-end illumina | ||
rasusa reads \ | ||
-n 40434 \ # downsample to specific number of reads per FASTQ file | ||
-s 1 \ # set seed | ||
-O g \ # set output file compression format as gzip | ||
-o SRR13957123_downsampled_1.fastq.gz -o SRR13957123_downsampled_2.fastq.gz \ | ||
SRR13957123_1.fastq.gz SRR13957123_2.fastq.gz | ||
|
||
# Salmonella enterica example, paired-end illumina | ||
rasusa reads \ | ||
--coverage 100 \ # use 100X coverage for downsampling | ||
--genome-size 5M \ # downsample to specific coverage based on genome size (5 million bases) | ||
-s 1 \ # set seed | ||
-O g \ # set output file compression format as gzip | ||
-o SRR10992628_downsampled_1.fastq.gz -o SRR10992628_downsampled_2.fastq.gz \ | ||
SRR10992628_1.fastq.gz SRR10992628_2.fastq.gz | ||
``` |