-
Notifications
You must be signed in to change notification settings - Fork 125
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #803 from HarryHung/poppunk-2.6.2
Add v2.6.2 to PopPUNK
- Loading branch information
Showing
5 changed files
with
249 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
FROM mambaorg/micromamba:1.5.3 as app | ||
|
||
# Version arguments | ||
# ARG variables only persist during build time | ||
ARG POPPUNK_VERSION="2.6.2" | ||
|
||
# build and run as root users since micromamba image has 'mambauser' set as the $USER | ||
USER root | ||
# set workdir to default for building; set to /data at the end | ||
WORKDIR / | ||
|
||
LABEL base.image="mambaorg/micromamba:1.5.3" | ||
LABEL dockerfile.version="2" | ||
LABEL software="PopPUNK" | ||
LABEL software.version=${POPPUNK_VERSION} | ||
LABEL description="POPulation Partitioning Using Nucleotide Kmers" | ||
LABEL website="https://github.com/bacpop/PopPUNK" | ||
LABEL license="https://github.com/bacpop/PopPUNK/blob/master/LICENSE" | ||
LABEL maintainer="Curtis Kapsak" | ||
LABEL maintainer.email="[email protected]" | ||
LABEL maintainer2="Harry Hung" | ||
LABEL maintainer2.email="[email protected]" | ||
|
||
# install dependencies; cleanup apt garbage | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
wget \ | ||
ca-certificates \ | ||
procps && \ | ||
apt-get autoclean && rm -rf /var/lib/apt/lists/* | ||
|
||
# Create PopPUNK conda environment called poppunk-env from bioconda recipe | ||
# clean up conda garbage | ||
RUN micromamba create -n poppunk-env -c conda-forge -c bioconda -c defaults poppunk=${POPPUNK_VERSION} && \ | ||
micromamba clean -a -y | ||
|
||
# set the environment, put new conda env in PATH by default; set locales to UTF-8 | ||
ENV PATH="/opt/conda/envs/poppunk-env/bin:${PATH}" \ | ||
LC_ALL=C.UTF-8 | ||
|
||
# set working directory to /data | ||
WORKDIR /data | ||
|
||
# new base for testing | ||
FROM app as test | ||
|
||
# so that mamba/conda env is active when running below commands | ||
ENV ENV_NAME="poppunk-env" | ||
ARG MAMBA_DOCKERFILE_ACTIVATE=1 | ||
|
||
# print out various help options and version | ||
RUN poppunk --help && \ | ||
poppunk_assign --help && \ | ||
poppunk_visualise --help && \ | ||
poppunk_mst --help && \ | ||
poppunk_references --help && \ | ||
poppunk_info --help && \ | ||
poppunk_mandrake --help && \ | ||
poppunk --version | ||
|
||
# Download 100 S. Pneumo assemblies from GPS Public Data on ENA | ||
# Build PopPUNK database from the assemblies | ||
# Assign clusters on the same assemblies using the built database | ||
# Compare the database clusters and assigned clusters of the assemblies | ||
COPY test.sh ftps.txt /data/ | ||
RUN bash test.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# PopPUNK container | ||
|
||
Main tool : | ||
- [PopPUNK](https://github.com/bacpop/PopPUNK) | ||
|
||
Additional tools: | ||
- biopython 1.81 | ||
- pp-sketchlib 2.1.1 | ||
- python 3.10.13 | ||
- rapidnj 2.3.2 | ||
- treeswift 1.1.38 | ||
|
||
Full documentation: [https://poppunk.readthedocs.io/en/latest/](https://poppunk.readthedocs.io/en/latest/) | ||
|
||
PopPUNK is also available as a webtool: [https://www.poppunk.net/](https://www.poppunk.net/) | ||
|
||
PopPUNK is a tool for clustering genomes. | ||
|
||
*NOTE: This docker image is intended for the CLI usage of the PopPUNK tool. It has not been built with the full web-interface functionality in mind.* | ||
|
||
## Example Usage | ||
|
||
This example is for usage of PopPUNK for Streptococcus pneumoniae clustering using a database & reference files provided by the [Global Pneumococcal Sequencing Project](https://www.pneumogen.net/gps/training_command_line.html). An example S. pneumoniae genome can be obtained from [here](https://github.com/rpetit3/pbptyper/blob/main/test/SRR2912551.fna.gz) | ||
|
||
```bash | ||
# poppunk requires an input File Of File Names (FOFN). headerless TSV with a sample name (first column), followed by path to input FASTA | ||
$ echo -e "SRR2912551\t/data/SRR2912551.fna.gz" > poppunk_input.tsv | ||
|
||
# showing reference files, FASTA input, and poppunk_input.tsv | ||
$ ls | ||
GPS_v6/ GPS_v6_external_clusters.csv SRR2912551.fna.gz poppunk_input.tsv | ||
|
||
# run the docker container interactively | ||
# followed by poppunk command run inside the container | ||
$ docker run --rm -ti -v ${PWD}:/data -u $(id -u):$(id -g) staphb/poppunk:2.6.2 | ||
$ poppunk_assign --db GPS_v6 --distances GPS_v6/GPS_v6.dists --query /data/poppunk_input.tsv --output docker_test --external-clustering GPS_v6_external_clusters.csv | ||
PopPUNK: assign | ||
(with backend: sketchlib v2.0.0 | ||
sketchlib: /opt/conda/envs/poppunk-env/lib/python3.10/site-packages/pp_sketchlib.cpython-310-x86_64-linux-gnu.so) | ||
|
||
Graph-tools OpenMP parallelisation enabled: with 1 threads | ||
Mode: Assigning clusters of query sequences | ||
|
||
Loading previously refined model | ||
Completed model loading | ||
Sketching 1 genomes using 1 thread(s) | ||
Progress (CPU): 1 / 1 | ||
Writing sketches to file | ||
WARNING: versions of input databases sketches are different, results may not be compatible | ||
Calculating distances using 1 thread(s) | ||
Progress (CPU): 100.0% | ||
Selected type isolate for distance QC is 10050_2#1 | ||
Network loaded: 42163 samples | ||
|
||
Done | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
ftp.sra.ebi.ac.uk/vol1/ERZ322/ERZ3224520/SAMEA3171250.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198719/SAMEA2554210.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ322/ERZ3225470/SAMEA3175912.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3213352/SAMEA2696388.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198630/SAMEA2554162.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3217126/SAMEA2783707.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ342/ERZ3423083/SAMEA3447953.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218603/SAMEA2797493.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3040777/SAMEA104035490.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3041170/SAMEA104035895.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206617/SAMEA2658361.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218818/SAMEA2814082.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ937/ERZ9377441/SAMEA4763391.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206566/SAMEA2658309.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3195033/SAMEA2467770.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079299/SAMEA104154757.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3193270/SAMEA2434815.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079844/SAMEA104155118.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ289/ERZ2890096/SAMEA102263668.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079332/SAMEA104154777.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218215/SAMEA2797058.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ322/ERZ3225910/SAMEA3176187.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3197406/SAMEA2521772.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ316/ERZ3164266/SAMEA2204200.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3192969/SAMEA2434607.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9307577/SAMEA3232684.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ313/ERZ3136035/SAMEA2051001.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218069/SAMEA2796905.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3041120/SAMEA104035851.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3078938/SAMEA104154484.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3207298/SAMEA2659051.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3197214/SAMEA2521572.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3078796/SAMEA104154345.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3217801/SAMEA2796638.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3251997/SAMEA3309548.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ314/ERZ3148576/SAMEA2066281.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3192932/SAMEA2434566.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206876/SAMEA2658626.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ342/ERZ3425910/SAMEA3486806.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3219036/SAMEA2814305.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ342/ERZ3423073/SAMEA3447941.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ315/ERZ3157278/SAMEA2160059.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ347/ERZ3470667/SAMEA3504771.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206664/SAMEA2658409.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3219281/SAMEA2814555.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3217894/SAMEA2796732.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198788/SAMEA2554243.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3078812/SAMEA104154360.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ937/ERZ9377474/SAMEA4763408.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079689/SAMEA104155019.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3217835/SAMEA2796676.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198813/SAMEA2554255.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ355/ERZ3557099/SAMEA4732546.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3041014/SAMEA104035733.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3193000/SAMEA2434629.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079342/SAMEA104154791.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ318/ERZ3180205/SAMEA2298232.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3204423/SAMEA2627391.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ347/ERZ3470928/SAMEA3504807.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3255898/SAMEA3354185.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ937/ERZ9378367/SAMEA4763819.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ313/ERZ3137747/SAMEA2057315.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ318/ERZ3180296/SAMEA2298295.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206874/SAMEA2658623.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ349/ERZ3499256/SAMEA3714360.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3040602/SAMEA104035309.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218412/SAMEA2797262.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ318/ERZ3181632/SAMEA2335756.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3255160/SAMEA3353584.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3204680/SAMEA2627527.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ322/ERZ3225180/SAMEA3175678.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9308145/SAMEA3233336.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ342/ERZ3421572/SAMEA3431627.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3256127/SAMEA3354364.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3204642/SAMEA2627509.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218686/SAMEA2813951.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198971/SAMEA2554336.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ355/ERZ3557464/SAMEA4732913.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ326/ERZ3260947/SAMEA3389675.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3213543/SAMEA2696586.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3204451/SAMEA2627406.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3258030/SAMEA3373712.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079114/SAMEA104154622.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ318/ERZ3180293/SAMEA2298296.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9303942/SAMEA3209083.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9308103/SAMEA3233306.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ288/ERZ2889920/SAMEA102184918.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9302272/SAMEA3206695.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9303776/SAMEA3208988.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3192979/SAMEA2434614.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206542/SAMEA2658288.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198331/SAMEA2553822.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ316/ERZ3164201/SAMEA2204129.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3254708/SAMEA3353251.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3041104/SAMEA104035825.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079033/SAMEA104154554.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3213369/SAMEA2696405.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3194930/SAMEA2467335.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ937/ERZ9379100/SAMEA4764119.contigs.fa.gz | ||
ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3040936/SAMEA104035654.contigs.fa.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#!/bin/bash | ||
set -euxo pipefail | ||
|
||
# Download 100 S. Pneumo assemblies from GPS Public Data on ENA | ||
mkdir assemblies | ||
while read link; do | ||
wget -q -P assemblies $link; | ||
done < ftps.txt | ||
|
||
# Generate r-file for creating PopPUNK database | ||
for FILE in assemblies/*; do | ||
printf $(basename -s .contigs.fa.gz $FILE)'\t'$FILE'\n' >> rfile.txt; | ||
done | ||
|
||
# Build PopPUNK database from the assemblies | ||
poppunk --create-db --output database --r-files rfile.txt --threads $(nproc) | ||
poppunk --fit-model bgmm --ref-db database | ||
|
||
# Assign clusters on the same assemblies using the built database | ||
# the sample names are modified as PopPUNK reject samples with names that are already in the database | ||
sed 's/^/prefix_/' rfile.txt > qfile.txt | ||
poppunk_assign --db database --query qfile.txt --output output --threads $(nproc) | ||
|
||
# Compare the database clusters and assigned clusters of the assemblies | ||
sed 's/^prefix_//' output/output_clusters.csv | awk 'NR == 1; NR > 1 { print $0 | "sort" }' > assigned.csv | ||
awk 'NR == 1; NR > 1 { print $0 | "sort" }' database/database_clusters.csv > database.csv | ||
cmp assigned.csv database.csv |