Merge pull request #43 from UPHL-BioNGS/erin-dev

remove commas in mlst
UPHL-BioNGS · Mar 1, 2023 · 0e9bb28 · 0e9bb28
2 parents 61cc6a4 + 0dfbfa8
commit 0e9bb28
Show file tree

Hide file tree

Showing 7 changed files with 16 additions and 11 deletions.
diff --git a/assets/genome_sizes.json b/assets/genome_sizes.json
@@ -147,9 +147,13 @@
     "Stenotrophomonas_maltophilia" : 4481118,
     "Sutterella_wadsworthensis" : 3026517,
     "Variovorax_paradoxus" : 6664268,
+    "Vibrio_alginolyticus": 5146637,
     "Vibrio_cholerae" : 4138412,
+    "Vibrio_fluvialis" : 4787490,
+    "Vibrio_harveyi" : 5854100,
     "Vibrio_mimicus" : 4313453,
     "Vibrio_navarrensis" : 4287126,
+    "Vibrio_paracholerae" : 3953770,
     "Vibrio_parahaemolyticus" : 5165770,
     "Vibrio_vulnificus" : 5117890
   }

diff --git a/assets/genomes.txt b/assets/genomes.txt
@@ -149,6 +149,7 @@ GCF_013374815.1	Shigella_sonnei
 GCF_000013425.1	Staphylococcus_aureus_subsp._aureus_NCTC_8325
 GCF_900475405.1	Stenotrophomonas_maltophilia
 GCF_009665435.1	Vibrio_alginolyticus
+GCA_023650915.1	Vibrio_alginolyticus
 GCF_009665515.2	Vibrio_cholerae
 GCF_008369605.1	Vibrio_cholerae
 GCF_009665415.1	Vibrio_cidicii
@@ -163,6 +164,7 @@ GCF_009665195.1	Vibrio_mimicus
 GCF_000176375.1	Vibrio_mimicus_MB451
 GCF_009665215.1	Vibrio_navarrensis
 GCF_012275065.1	Vibrio_navarrensis
+GCA_003311965.1	Vibrio_paracholerae
 GCF_009665495.1	Vibrio_parahaemolyticus
 GCF_000196095.1	Vibrio_parahaemolyticus_RIMD_2210633
 GCF_009665455.1	Vibrio_vulnificus

diff --git a/bin/summary.py b/bin/summary.py
@@ -23,7 +23,7 @@
 kleborate      = 'kleborate_results.tsv'
 kraken2        = 'kraken2_summary.csv'
 mash           = 'mash_summary.csv'
-mlst           = 'mlst_summary.csv'
+mlst           = 'mlst_summary.tsv'
 plasmidfinder  = 'plasmidfinder_result.tsv'
 quast          = 'quast_report.tsv'
 seqsero2       = 'seqsero2_results.txt'
@@ -38,8 +38,8 @@
 # grouping similar files                 #
 ##########################################
 
-csv_files = [ fastqscan, mlst ]
-tsv_files = [ quast, seqsero2, kleborate ]
+csv_files = [ fastqscan ]
+tsv_files = [ quast, seqsero2, kleborate, mlst ]
 
 top_hit    = [ fastani ]
 
@@ -116,8 +116,7 @@
     print("Adding results for " + file)
     analysis = "fastani"
     new_df = pd.read_csv(file, dtype = str, index_col= False)
-    new_df['ref1'] = new_df['reference'].str.split("/").str[1]
-    new_df['genome (ANI estimate)'] = new_df['ref1'].str.split('_').str[0] + " " + new_df['ref1'].str.split("_").str[1] + " " + new_df['ref1'].str.split('_').str[-2] + "_" + new_df['ref1'].str.split('_').str[-1] + " (" + new_df['ANI estimate'] + ")"
+    new_df['genome (ANI estimate)'] = new_df['reference'].str.split('_').str[0] + " " + new_df['reference'].str.split("_").str[1] + " " + new_df['reference'].str.split('_').str[-2] + "_" + new_df['reference'].str.split('_').str[-1] + " (" + new_df['ANI estimate'] + ")"
     new_df = new_df[['sample', 'genome (ANI estimate)']]
     new_df = new_df.groupby('sample', as_index=False).agg({'genome (ANI estimate)': lambda x: list(x)})
     new_df = new_df.add_prefix(analysis + '_')

diff --git a/modules/fastani.nf b/modules/fastani.nf
@@ -44,7 +44,7 @@ process fastani {
       | tee -a $log_file
 
     echo "sample,query,reference,ANI estimate,total query sequence fragments,fragments aligned as orthologous matches" > fastani/!{sample}_fastani.csv
-    cat fastani/!{sample}.txt | sed 's/,//g' | tr "\\t" "," | awk -v sample=!{sample} '{ print sample "," $0 }' >> fastani/!{sample}_fastani.csv
+    cat fastani/!{sample}.txt | sed 's/,//g' | sed 's/!{genomes}\\///g' | tr "\\t" "," | awk -v sample=!{sample} '{ print sample "," $0 }' >> fastani/!{sample}_fastani.csv
 
     top_hit=$(head -n 2 fastani/!{sample}_fastani.csv | tail -n 1 | cut -f 3 -d , )
     if [ -f "$top_hit" ]; then mkdir -p top_hit ; cp $top_hit top_hit/. ; fi

diff --git a/modules/mlst.nf b/modules/mlst.nf
@@ -13,7 +13,7 @@ process mlst {
   tuple val(sample), file(contig)
 
   output:
-  path "mlst/${sample}_mlst.csv"                                 , emit: collect
+  path "mlst/${sample}_mlst.tsv"                                 , emit: collect
   path "logs/${task.process}/${sample}.${workflow.sessionId}.log", emit: log
 
   shell:
@@ -28,11 +28,11 @@ process mlst {
     echo "Nextflow command : " >> $log_file
     cat .command.sh >> $log_file
 
-    echo "sample,filename,matching PubMLST scheme,ST,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15" > mlst/!{sample}_mlst.csv
+    echo -e "sample\\tfilename\\tmatching PubMLST scheme\\tST\\tID1\\tID2\\tID3\\tID4\\tID5\\tID6\\tID7\\tID8\\tID9\\tID10\\tID11\\tID12\\tID13\\tID14\\tID15" > mlst/!{sample}_mlst.tsv
 
     mlst !{params.mlst_options} \
       --threads !{task.cpus} \
       !{contig} | \
-      awk -v sample=!{sample} '{print sample "," $1 "," $2 "," $3 "," $4 "," $5 "," $6 "," $7 "," $8 "," $9 "," $10 "," $11 "," $12 "," $13 "," $14 "," $15 "," $16 "," $17 "," $18}' >> mlst/!{sample}_mlst.csv
+      awk -v sample=!{sample} '{print sample "\\t" $1 "\\t" $2 "\\t" $3 "\\t" $4 "\\t" $5 "\\t" $6 "\\t" $7 "\\t" $8 "\\t" $9 "\\t" $10 "\\t" $11 "\\t" $12 "\\t" $13 "\\t" $14 "\\t" $15 "\\t" $16 "\\t" $17 "\\t" $18}' >> mlst/!{sample}_mlst.tsv
   '''
 }
diff --git a/nextflow.config b/nextflow.config
@@ -3,7 +3,7 @@ manifest {
   author                          = 'Erin Young'
   homePage                        = 'https://github.com/UPHL-BioNGS/Grandeur'
   mainScript                      = 'grandeur.nf'
-  version                         = '3.0.20230205'
+  version                         = '3.0.20230310'
   defaultBranch                   = 'main'
   description                     = 'Grandeur is short-read de novo assembly pipeline with serotyping.'
 }

diff --git a/subworkflows/information.nf b/subworkflows/information.nf
@@ -59,7 +59,7 @@ workflow information {
       .set { flag_summary }
 
     mlst.out.collect
-      .collectFile(name: "mlst_summary.csv",
+      .collectFile(name: "mlst_summary.tsv",
         keepHeader: true,
         sort: { file -> file.text },
         storeDir: "${params.outdir}/mlst")