Skip to content

Commit

Permalink
Merge pull request #43 from UPHL-BioNGS/erin-dev
Browse files Browse the repository at this point in the history
remove commas in mlst
  • Loading branch information
erinyoung authored Mar 1, 2023
2 parents 61cc6a4 + 0dfbfa8 commit 0e9bb28
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 11 deletions.
4 changes: 4 additions & 0 deletions assets/genome_sizes.json
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,13 @@
"Stenotrophomonas_maltophilia" : 4481118,
"Sutterella_wadsworthensis" : 3026517,
"Variovorax_paradoxus" : 6664268,
"Vibrio_alginolyticus": 5146637,
"Vibrio_cholerae" : 4138412,
"Vibrio_fluvialis" : 4787490,
"Vibrio_harveyi" : 5854100,
"Vibrio_mimicus" : 4313453,
"Vibrio_navarrensis" : 4287126,
"Vibrio_paracholerae" : 3953770,
"Vibrio_parahaemolyticus" : 5165770,
"Vibrio_vulnificus" : 5117890
}
Expand Down
2 changes: 2 additions & 0 deletions assets/genomes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ GCF_013374815.1 Shigella_sonnei
GCF_000013425.1 Staphylococcus_aureus_subsp._aureus_NCTC_8325
GCF_900475405.1 Stenotrophomonas_maltophilia
GCF_009665435.1 Vibrio_alginolyticus
GCA_023650915.1 Vibrio_alginolyticus
GCF_009665515.2 Vibrio_cholerae
GCF_008369605.1 Vibrio_cholerae
GCF_009665415.1 Vibrio_cidicii
Expand All @@ -163,6 +164,7 @@ GCF_009665195.1 Vibrio_mimicus
GCF_000176375.1 Vibrio_mimicus_MB451
GCF_009665215.1 Vibrio_navarrensis
GCF_012275065.1 Vibrio_navarrensis
GCA_003311965.1 Vibrio_paracholerae
GCF_009665495.1 Vibrio_parahaemolyticus
GCF_000196095.1 Vibrio_parahaemolyticus_RIMD_2210633
GCF_009665455.1 Vibrio_vulnificus
Expand Down
9 changes: 4 additions & 5 deletions bin/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
kleborate = 'kleborate_results.tsv'
kraken2 = 'kraken2_summary.csv'
mash = 'mash_summary.csv'
mlst = 'mlst_summary.csv'
mlst = 'mlst_summary.tsv'
plasmidfinder = 'plasmidfinder_result.tsv'
quast = 'quast_report.tsv'
seqsero2 = 'seqsero2_results.txt'
Expand All @@ -38,8 +38,8 @@
# grouping similar files #
##########################################

csv_files = [ fastqscan, mlst ]
tsv_files = [ quast, seqsero2, kleborate ]
csv_files = [ fastqscan ]
tsv_files = [ quast, seqsero2, kleborate, mlst ]

top_hit = [ fastani ]

Expand Down Expand Up @@ -116,8 +116,7 @@
print("Adding results for " + file)
analysis = "fastani"
new_df = pd.read_csv(file, dtype = str, index_col= False)
new_df['ref1'] = new_df['reference'].str.split("/").str[1]
new_df['genome (ANI estimate)'] = new_df['ref1'].str.split('_').str[0] + " " + new_df['ref1'].str.split("_").str[1] + " " + new_df['ref1'].str.split('_').str[-2] + "_" + new_df['ref1'].str.split('_').str[-1] + " (" + new_df['ANI estimate'] + ")"
new_df['genome (ANI estimate)'] = new_df['reference'].str.split('_').str[0] + " " + new_df['reference'].str.split("_").str[1] + " " + new_df['reference'].str.split('_').str[-2] + "_" + new_df['reference'].str.split('_').str[-1] + " (" + new_df['ANI estimate'] + ")"
new_df = new_df[['sample', 'genome (ANI estimate)']]
new_df = new_df.groupby('sample', as_index=False).agg({'genome (ANI estimate)': lambda x: list(x)})
new_df = new_df.add_prefix(analysis + '_')
Expand Down
2 changes: 1 addition & 1 deletion modules/fastani.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ process fastani {
| tee -a $log_file
echo "sample,query,reference,ANI estimate,total query sequence fragments,fragments aligned as orthologous matches" > fastani/!{sample}_fastani.csv
cat fastani/!{sample}.txt | sed 's/,//g' | tr "\\t" "," | awk -v sample=!{sample} '{ print sample "," $0 }' >> fastani/!{sample}_fastani.csv
cat fastani/!{sample}.txt | sed 's/,//g' | sed 's/!{genomes}\\///g' | tr "\\t" "," | awk -v sample=!{sample} '{ print sample "," $0 }' >> fastani/!{sample}_fastani.csv
top_hit=$(head -n 2 fastani/!{sample}_fastani.csv | tail -n 1 | cut -f 3 -d , )
if [ -f "$top_hit" ]; then mkdir -p top_hit ; cp $top_hit top_hit/. ; fi
Expand Down
6 changes: 3 additions & 3 deletions modules/mlst.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ process mlst {
tuple val(sample), file(contig)

output:
path "mlst/${sample}_mlst.csv" , emit: collect
path "mlst/${sample}_mlst.tsv" , emit: collect
path "logs/${task.process}/${sample}.${workflow.sessionId}.log", emit: log

shell:
Expand All @@ -28,11 +28,11 @@ process mlst {
echo "Nextflow command : " >> $log_file
cat .command.sh >> $log_file
echo "sample,filename,matching PubMLST scheme,ST,ID1,ID2,ID3,ID4,ID5,ID6,ID7,ID8,ID9,ID10,ID11,ID12,ID13,ID14,ID15" > mlst/!{sample}_mlst.csv
echo -e "sample\\tfilename\\tmatching PubMLST scheme\\tST\\tID1\\tID2\\tID3\\tID4\\tID5\\tID6\\tID7\\tID8\\tID9\\tID10\\tID11\\tID12\\tID13\\tID14\\tID15" > mlst/!{sample}_mlst.tsv
mlst !{params.mlst_options} \
--threads !{task.cpus} \
!{contig} | \
awk -v sample=!{sample} '{print sample "," $1 "," $2 "," $3 "," $4 "," $5 "," $6 "," $7 "," $8 "," $9 "," $10 "," $11 "," $12 "," $13 "," $14 "," $15 "," $16 "," $17 "," $18}' >> mlst/!{sample}_mlst.csv
awk -v sample=!{sample} '{print sample "\\t" $1 "\\t" $2 "\\t" $3 "\\t" $4 "\\t" $5 "\\t" $6 "\\t" $7 "\\t" $8 "\\t" $9 "\\t" $10 "\\t" $11 "\\t" $12 "\\t" $13 "\\t" $14 "\\t" $15 "\\t" $16 "\\t" $17 "\\t" $18}' >> mlst/!{sample}_mlst.tsv
'''
}
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ manifest {
author = 'Erin Young'
homePage = 'https://github.com/UPHL-BioNGS/Grandeur'
mainScript = 'grandeur.nf'
version = '3.0.20230205'
version = '3.0.20230310'
defaultBranch = 'main'
description = 'Grandeur is short-read de novo assembly pipeline with serotyping.'
}
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/information.nf
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ workflow information {
.set { flag_summary }

mlst.out.collect
.collectFile(name: "mlst_summary.csv",
.collectFile(name: "mlst_summary.tsv",
keepHeader: true,
sort: { file -> file.text },
storeDir: "${params.outdir}/mlst")
Expand Down

0 comments on commit 0e9bb28

Please sign in to comment.