diff --git a/Program_Licenses.md b/Program_Licenses.md index d57e19474..b7e3f7419 100644 --- a/Program_Licenses.md +++ b/Program_Licenses.md @@ -69,6 +69,7 @@ The licenses of the open-source software that is contained in these Docker image | geNomad | ACADEMIC, INTERNAL, RESEARCH & DEVELOPMENT, NON-COMMERCIAL USE ONLY | https://github.com/apcamargo/genomad/blob/main/LICENSE | | GenoVi | BY-NC-SA Creative Commons License | https://github.com/robotoD/GenoVi/blob/main/LICENSE.txt | | gfastats | MIT | https://github.com/vgl-hub/gfastats/blob/main/LICENSE | +| Grandeur | GNU General Public License v3.0 | https://github.com/UPHL-BioNGS/Grandeur/blob/main/LICENSE | | Gubbins | GNU GPLv2 | https://github.com/nickjcroucher/gubbins/blob/master/LICENSE | | HeatCluster | GPL-3.0 | https://github.com/DrB-S/heatcluster/blob/main/LICENSE | | Hmmer | BSD-3 | http://eddylab.org/software/hmmer/Userguide.pdf | diff --git a/README.md b/README.md index 363353eed..9cf6d4dd3 100644 --- a/README.md +++ b/README.md @@ -178,6 +178,7 @@ To learn more about the docker pull rate limits and the open source software pro | [geNomad](https://hub.docker.com/r/staphb/genomad)
[![docker pulls](https://badgen.net/docker/pulls/staphb/genomad)](https://hub.docker.com/r/staphb/genomad) | | https://github.com/apcamargo/genomad | | [GenoVi](https://hub.docker.com/r/staphb/genovi)
[![docker pulls](https://badgen.net/docker/pulls/staphb/genovi)](https://hub.docker.com/r/staphb/genovi) | | https://github.com/robotoD/GenoVi | | [gfastats](https://hub.docker.com/r/staphb/gfastats)
[![docker pulls](https://badgen.net/docker/pulls/staphb/gfastats)](https://hub.docker.com/r/staphb/gfastats) | | https://github.com/vgl-hub/gfastats | +| [grandeur_ref](https://hub.docker.com/r/staphb/grandeur_ref)
[![docker pulls](https://badgen.net/docker/pulls/staphb/grandeur_ref)](https://hub.docker.com/r/staphb/grandeur_ref) | | Part of https://github.com/UPHL-BioNGS/Grandeur | | [Gubbins](https://hub.docker.com/r/staphb/gubbins)
[![docker pulls](https://badgen.net/docker/pulls/staphb/gubbins)](https://hub.docker.com/r/staphb/gubbins) | | https://github.com/nickjcroucher/gubbins | | [heatcluster](https://hub.docker.com/r/staphb/heatcluster)
[![docker pulls](https://badgen.net/docker/pulls/staphb/heatcluster)](https://hub.docker.com/r/staphb/heatcluster) | | https://github.com/DrB-S/heatcluster/tree/main | | [hmmer](https://hub.docker.com/r/staphb/hmmer)
[![docker pulls](https://badgen.net/docker/pulls/staphb/hmmer)](https://hub.docker.com/r/staphb/hmmer) | | http://hmmer.org/ | diff --git a/grandeur_ref/4.5/Dockerfile b/grandeur_ref/4.5/Dockerfile new file mode 100644 index 000000000..22416aad7 --- /dev/null +++ b/grandeur_ref/4.5/Dockerfile @@ -0,0 +1,41 @@ +FROM staphb/ncbi-datasets:16.35.0 AS fasta + +WORKDIR /ref + +COPY accessions.txt rename.sh /ref/ + +RUN grep -v ^# accessions.txt > ids.txt + +RUN datasets download genome accession --inputfile ids.txt + +RUN unzip *zip + +RUN mkdir prep && bash rename.sh + +FROM ubuntu:jammy as app + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="References for UPHL-BioNGS/Grandeur" +LABEL software.version="4.5" +LABEL description="Reference genomes for Grandeur" +LABEL website="https://github.com/UPHL-BioNGS/Grandeur" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +WORKDIR /ref + +COPY --from=fasta /ref/prep /ref + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +WORKDIR /data + +CMD ls /ref + +FROM app AS test + +RUN ls /ref diff --git a/grandeur_ref/4.5/README.md b/grandeur_ref/4.5/README.md new file mode 100644 index 000000000..ef91443c2 --- /dev/null +++ b/grandeur_ref/4.5/README.md @@ -0,0 +1,11 @@ +# Grandeur References container + +Main tool: Part of [UPHL-BioNGS/Grandeur](https://github.com/UPHL-BioNGS/Grandeur) + +Code repository: https://github.com/UPHL-BioNGS/Grandeur + +Basic information on how to use this tool: +- executable: NA +- help: NA +- version: NA +- description: Contains reference sequences that Grandeur uses by default. Not intended to be run separately. diff --git a/grandeur_ref/4.5/accessions.txt b/grandeur_ref/4.5/accessions.txt new file mode 100644 index 000000000..b032a469b --- /dev/null +++ b/grandeur_ref/4.5/accessions.txt @@ -0,0 +1,380 @@ +#Acinetobacter_baumannii +GCF_008632635.1 +GCF_009035845.1 +#Acinetobacter_bereziniae +GCF_016576965.1 +#Acinetobacter_calcoaceticus +GCF_002055515.1 +#Acinetobacter_guillouiae +GCF_002370525.2 +#Acinetobacter_haemolyticus +GCF_003323815.1 +#Acinetobacter_junii +GCF_018336855.1 +#Acinetobacter_lactucae +GCF_013122135.1 +#Acinetobacter_nosocomialis_M2 +GCF_005281455.1 +#Acinetobacter_pittii_PHEA-2 +GCF_000191145.1 +#Acinetobacter_proteolyticus +GCF_000367945.1 +#Acinetobacter_radioresistens +GCF_003258335.1 +#Acinetobacter_schindleri +GCF_010918895.1 +#Acinetobacter_seifertii +GCF_016064815.1 +#Acinetobacter_variabilis +GCF_018409485.1 +#Alcaligenes_faecalis +GCF_000967305.2 +#Burkholderia_cepacia +GCF_009586235.1 +#Burkholderia_multivorans +GCF_003019965.1 +#Campylobacter_coli +GCA_008011635.1 +#Campylobacter_coli +GCF_009730395.1 +#Campylobacter_fetus +GCF_011600945.2 +#Campylobacter_fetus +GCF_000015085.1 +GCF_000495505.1 +GCF_000759515.1 +#Campylobacter_hyointestinalis +GCF_001643955.1 +GCF_013372165.1 +#Campylobacter_jejuni_subsp._doylei_269.97 +GCF_000017485.1 +#Campylobacter_jejuni_subsp._jejuni +GCA_008011525.1 +#Campylobacter_jejuni_subsp._jejuni_NCTC_11168_ATCC_700819 +GCF_000009085.1 +#Campylobacter_lari +GCF_000019205.1 +GCF_000816225.1 +#Campylobacter_peloridis +GCF_014931075.1 +#Campylobacter_subantarcticus_LMG_24377 +GCF_000816305.1 +#Campylobacter_upsaliensis +GCA_008011615.1 +GCF_916098265.1 +#Citrobacter_amalonaticus +GCF_001558935.2 +#Citrobacter_braakii +GCF_009648935.1 +#Citrobacter_farmeri +GCF_003938205.1 +#Citrobacter_freundii +GCF_003812345.1 +#Citrobacter_gillenii +GCF_003429605.1 +#Citrobacter_koseri +GCF_000018045.1 +#Citrobacter_murliniae +GCF_004801125.1 +#Citrobacter_pasteurii +GCF_019047765.1 +#Citrobacter_portucalensis +GCF_008693605.1 +#Citrobacter_sedlakii +GCF_018128425.1 +#Citrobacter_werkmanii +GCF_008693645.1 +#Citrobacter_youngae +GCF_900638065.1 +#Clostridium_botulinum +GCF_000063585.1 +#Clostridium_perfringens +GCF_020138775.1 +#Clostridium_sporogenes +GCF_020450145.1 +#Cronobacter_dublinensis +GCF_001277235.1 +#Cronobacter_malonaticus +GCF_001277215.2 +#Cronobacter_muytjensii +GCF_001277195.1 +#Cronobacter_sakazakii +GCF_003516125.1 +#Cronobacter_turicensis +GCF_011605535.1 +#Cronobacter_universalis +GCF_001277175.1 +#Elizabethkingia_anophelis +GCF_002023665.2 +#Elizabethkingia_meningoseptica +GCF_002022145.1 +#Enterobacter_asburiae +GCF_007035805.1 +#Enterobacter_bugandensis +GCF_015137655.1 +#Enterobacter_bugandensis +GCF_020042625.1 +#Enterobacter_cancerogenus +GCF_019665745.1 +#Enterobacter_chengduensis +GCF_001984825.2 +#Enterobacter_chuandaensis +GCF_003594915.1 +#Enterobacter_cloacae +GCF_023702375.1 +GCF_905331265.2 +#Enterobacter_dykesii +GCF_018597265.1 +#Enterobacter_hormaechei +GCF_019048625.1 +#Enterobacter_huaxiensis +GCF_003594935.2 +#Enterobacter_kobei +GCF_000534275.1 +#Enterobacter_ludwigii +GCF_001750725.1 +#Enterobacter_mori +GCF_022014715.1 +#Enterobacter_oligotrophicus +GCF_009176645.1 +#Enterobacter_quasihormaechei +GCF_004331385.1 +#Enterobacter_quasimori +GCF_018597345.1 +#Enterobacter_quasiroggenkampii +GCF_003964805.1 +#Enterobacter_roggenkampii +GCF_001729805.1 +#Enterobacter_sichuanensis +GCF_009036245.1 +#Enterobacter_soli +GCF_000224675.1 +#Enterobacter_vonholyi +GCF_008364555.1 +#Enterobacter_wuhouensis +GCF_004331265.1 +#Enterococcus_faecalis +GCF_000393015.1 +#Enterococcus_faecium +GCF_009734005.1 +#Escherichia_albertii +GCF_016904755.1 +GCF_000512125.1 +#Escherichia_coli_O157:H7_str._Sakai +GCF_000008865.2 +#Escherichia_coli_O27:H7 +GCF_002741475.1 +#Escherichia_coli_str._K-12_substr._MG1655 +GCF_000005845.2 +#Escherichia_fergusonii +GCF_000026225.1 +GCF_020097475.1 +#Grimontia_hollisae +GCF_009665295.1 +#Haemophilus_aegyptius +GCF_900475885.1 +#Haemophilus_haemolyticus +GCF_900477945.1 +#Haemophilus_influenzae +GCF_000931575.1 +#Haemophilus_parainfluenzae_ATCC_33392 +GCF_000191405.1 +#Hafnia_alvei +GCF_011617105.1 +#Hafnia_paralvei +GCF_020150375.1 +#Klebsiella_aerogenes +GCF_007632255.1 +#Klebsiella_michiganensis +GCF_015139575.1 +#Klebsiella_oxytoca +GCF_003812925.1 +#Klebsiella_pasteurii +GCF_018139045.1 +#Klebsiella_pneumoniae +GCF_022869665.1 +GCF_000240185.1 +GCF_022699345.1 +#Klebsiella_quasipneumoniae +GCF_016415705.1 +GCF_020099175.1 +#Klebsiella_variicola +GCF_009648975.1 +#Kluyvera_ascorbata +GCF_023195735.1 +#Leclercia_adecarboxylata +GCF_001518835.1 +#Legionella_pneumophila +GCF_001753085.1 +#Lelliottia_amnigena +GCF_019355955.1 +#Listeria_innocua +GCF_009648575.1 +#Listeria_innocua +GCF_017363615.1 +GCF_017363655.1 +#Listeria_ivanovii +GCF_000252975.1 +#Listeria_marthii +GCF_017363645.1 +#Listeria_monocytogenes +GCF_000196035.1 +#Listeria_monocytogenes +GCF_001466295.1 +GCF_013625895.1 +GCF_013625995.1 +GCF_013626145.1 +GCF_014526935.1 +#Listeria_seeligeri +GCF_017363605.1 +#Listeria_welshimeri +GCF_002489005.1 +#Mixta_calida +GCA_007681265.1 +#Morganella_morganii +GCF_902387845.1 +#Mycobacterium_avium_subsp_hominissuis +GCF_022175585.1 +#Mycobacterium_leprae +GCF_003253775.1 +#Mycobacterium_marinum_E11 +GCF_000723425.2 +#Mycobacterium_tuberculosis_H37Rv +GCF_000195955.2 +#Mycobacterium_ulcerans +GCF_020616615.1 +#Neisseria_gonorrhoeae +GCF_013030075.1 +#Pantoea_ananatis +GCF_000233595.1 +#Photobacterium_damselae +GCF_009665375.1 +#Pluralibacter_gergoviae +GCF_003019925.1 +#Proteus_hauseri +GCF_004116975.1 +#Proteus_mirabilis +GCF_000069965.1 +#Proteus_vulgaris +GCF_000754995.1 +#Providencia_huaxiensis +GCF_002843235.3 +#Providencia_rettgeri +GCF_003204135.1 +#Providencia_stuartii +GCF_023547145.1 +GCF_029277985.1 +#Pseudescherichia_vulneris +GCF_902164725.1 +#Pseudomonas_aeruginosa +GCF_000006765.1 +GCF_001457615.1 +GCF_000981825.1 +#Pseudomonas_alcaligenes +GCF_001597285.1 +#Pseudomonas_fluorescens +GCF_900215245.1 +#Pseudomonas_fulva +GCF_001186195.1 +#Pseudomonas_furukawaii +GCF_002355475.1 +#Pseudomonas_multiresinivorans +GCF_012971725.1 +#Pseudomonas_nitroreducens +GCF_012986205.1 +#Pseudomonas_otitidis +GCF_011397855.1 +#Pseudomonas_paraeruginosa +GCF_003025345.2 +#Pseudomonas_putida +GCF_000412675.1 +#Ralstonia_pickettii +GCF_902374465.1 +#Raoultella_ornithinolytica +GCF_901421005.1 +#Raoultella_planticola +GCF_022637595.1 +#Salmonella_bongori +GCF_000439255.1 +#Salmonella_enterica +GCA_011388235.1 +#Salmonella_enterica_subsp._enterica_serovar_Typhimurium +GCF_000006945.2 +#Salmonella_enterica_subsp._houtenae +GCA_013588055.1 +#Serratia_marcescens +GCF_003516165.1 +GCF_017654245.1 +GCF_017298695.1 +#Serratia_nematodiphila +GCF_004768745.1 +#Shigella_boydii +GCF_002290485.1 +#Shigella_dysenteriae +GCF_022354085.1 +#Shigella_flexneri +GCF_000006925.2 +#Shigella_sonnei +GCF_013374815.1 +#Staphylococcus_aureus_subsp._aureus +GCF_000013425.1 +#Stenotrophomonas_maltophilia +GCF_900475405.1 +#Streptococcus_anginosus +GCF_001412635.1 +#Streptococcus_dysgalactiae +GCF_016724885.1 +#Streptomyces_iconiensis +GCF_028657195.3 +#Streptococcus_pneumoniae +GCF_002076835.1 +#Streptococcus_pseudopneumoniae +GCF_000221985.1 +#Streptococcus_pyogenes +GCF_002055535.1 +GCF_900475035.1 +#Vibrio_alginolyticus +GCA_023650915.1 +#Vibrio_alginolyticus +GCF_009665435.1 +#Vibrio_cholerae +GCA_009665515.2 +GCF_008369605.1 +#Vibrio_cidicii +GCA_009665415.1 +#Vibrio_cincinnatiensis +GCF_009665395.1 +#Vibrio_fluvialis +GCF_009665355.1 +#Vibrio_furnissii +GCF_009665335.1 +#Vibrio_harveyi +GCF_009665315.1 +#Vibrio_metoecus +GCF_009665255.1 +#Vibrio_metoecus +GCF_009665275.1 +#Vibrio_metschnikovii +GCF_009665235.1 +#Vibrio_mimicus +GCF_009665195.1 +GCF_000176375.1 +#Vibrio_navarrensis +GCF_009665215.1 +GCF_012275065.1 +#Vibrio_paracholerae +GCA_003311965.1 +#Vibrio_parahaemolyticus +GCF_009665495.1 +GCF_000196095.1 +#Vibrio_vulnificus +GCF_002204915.1 +GCF_009665455.1 +GCF_009665475.1 +#Yersinia_pestis +GCF_000222975.1 +GCF_000834755.1 +#Yersinia_enterocolitica +GCF_025758635.1 +#Yersinia_pseudotuberculosis +GCF_000834295.1 diff --git a/grandeur_ref/4.5/rename.sh b/grandeur_ref/4.5/rename.sh new file mode 100644 index 000000000..d44e430ac --- /dev/null +++ b/grandeur_ref/4.5/rename.sh @@ -0,0 +1,11 @@ +#/bin/bash + +for file in */data/*/*.fna +do + genus=$(head -n 1 $file | sed 's/,//g' | sed 's/://g' | sed 's/\[//g' | sed 's/\]//g' | sed 's/\.//g' | sed 's/UNVERIFIED_ORG//g' | awk '{print $2}' ) + species=$(head -n 1 $file | sed 's/,//g' | sed 's/://g' | sed 's/\[//g' | sed 's/\]//g' | sed 's/\.//g' | sed 's/UNVERIFIED_ORG//g' | awk '{print $3}' ) + echo "The organism is $genus $species" + accession=$(echo $file | cut -f 3 -d "/" ) + echo "The accession is $accession" + gzip -c $file > prep/${genus}_${species}_${accession}.fna.gz +done