From 68577853bdb2d48a29799f47adf68b288e49d33c Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 8 Jun 2022 11:30:00 -0400 Subject: [PATCH 1/7] rename task package_genbank_ftp_submission to package_sc2_genbank_ftp_submission because NCBI does not yet permit this for everything --- pipes/WDL/tasks/tasks_ncbi.wdl | 2 +- pipes/WDL/workflows/sarscov2_illumina_full.wdl | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl index 3b07d9ec3..da4c5bf33 100644 --- a/pipes/WDL/tasks/tasks_ncbi.wdl +++ b/pipes/WDL/tasks/tasks_ncbi.wdl @@ -846,7 +846,7 @@ task prepare_genbank { } } -task package_genbank_ftp_submission { +task package_sc2_genbank_ftp_submission { meta { description: "Prepares a zip and xml file for FTP-based NCBI Genbank submission according to instructions at https://www.ncbi.nlm.nih.gov/viewvc/v1/trunk/submit/public-docs/genbank/SARS-CoV-2/." } diff --git a/pipes/WDL/workflows/sarscov2_illumina_full.wdl b/pipes/WDL/workflows/sarscov2_illumina_full.wdl index a5aad5b28..650c79e6f 100644 --- a/pipes/WDL/workflows/sarscov2_illumina_full.wdl +++ b/pipes/WDL/workflows/sarscov2_illumina_full.wdl @@ -329,7 +329,7 @@ workflow sarscov2_illumina_full { sequences = submittable_filter.filtered_fasta, keep_list = [biosample_to_genbank.sample_ids] } - call ncbi.package_genbank_ftp_submission { + call ncbi.package_sc2_genbank_ftp_submission { input: sequences_fasta = submit_genomes.filtered_fasta, source_modifier_table = biosample_to_genbank.genbank_source_modifier_table, @@ -464,9 +464,9 @@ workflow sarscov2_illumina_full { File assembly_stats_relineage_tsv = sarscov2_batch_relineage.assembly_stats_relineage_tsv File assembly_stats_final_relineage_tsv = sc2_meta_final.meta_tsv - File submission_zip = package_genbank_ftp_submission.submission_zip - File submission_xml = package_genbank_ftp_submission.submission_xml - File submit_ready = package_genbank_ftp_submission.submit_ready + File submission_zip = package_sc2_genbank_ftp_submission.submission_zip + File submission_xml = package_sc2_genbank_ftp_submission.submission_xml + File submit_ready = package_sc2_genbank_ftp_submission.submit_ready Array[File] vadr_outputs = select_all(vadr.outputs_tgz) File genbank_source_table = biosample_to_genbank.genbank_source_modifier_table From fe4289b64f58e8c1ebea8f480ac9b221250b659a Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 8 Jun 2022 11:32:06 -0400 Subject: [PATCH 2/7] lofreq bugfix --- pipes/WDL/tasks/tasks_intrahost.wdl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/tasks/tasks_intrahost.wdl b/pipes/WDL/tasks/tasks_intrahost.wdl index 90d18a051..6e70e3323 100644 --- a/pipes/WDL/tasks/tasks_intrahost.wdl +++ b/pipes/WDL/tasks/tasks_intrahost.wdl @@ -128,13 +128,16 @@ task lofreq { lofreq version | grep version | sed 's/.* \(.*\)/\1/g' | tee LOFREQ_VERSION - samtools faidx "~{reference_fasta}" - samtools index "~{aligned_bam}" + # make local copies because CWD is writeable but localization dir isn't always + cp "~{reference_fasta}" reference.fasta + cp "~{aligned_bam}" aligned.bam + samtools faidx reference.fasta + samtools index aligned.bam lofreq call \ - -f "~{reference_fasta}" \ + -f reference.fasta \ -o "~{out_basename}.vcf" \ - "~{aligned_bam}" + aligned.bam >>> output { From ac5d2b3717e15fe5220d03acafabeda220c463b7 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 8 Jun 2022 11:49:31 -0400 Subject: [PATCH 3/7] rename inputs --- pipes/WDL/workflows/sarscov2_illumina_full.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/workflows/sarscov2_illumina_full.wdl b/pipes/WDL/workflows/sarscov2_illumina_full.wdl index 83d069708..39fd19cd7 100644 --- a/pipes/WDL/workflows/sarscov2_illumina_full.wdl +++ b/pipes/WDL/workflows/sarscov2_illumina_full.wdl @@ -330,7 +330,7 @@ workflow sarscov2_illumina_full { sequences = submittable_filter.filtered_fasta, keep_list = [biosample_to_genbank.sample_ids] } - call ncbi.package_sc2_genbank_ftp_submission { + call ncbi.package_sc2_genbank_ftp_submission as package_genbank_ftp_submission { input: sequences_fasta = submit_genomes.filtered_fasta, source_modifier_table = biosample_to_genbank.genbank_source_modifier_table, @@ -465,9 +465,9 @@ workflow sarscov2_illumina_full { File assembly_stats_relineage_tsv = sarscov2_batch_relineage.assembly_stats_relineage_tsv File assembly_stats_final_relineage_tsv = sc2_meta_final.meta_tsv - File submission_zip = package_sc2_genbank_ftp_submission.submission_zip - File submission_xml = package_sc2_genbank_ftp_submission.submission_xml - File submit_ready = package_sc2_genbank_ftp_submission.submit_ready + File submission_zip = package_genbank_ftp_submission.submission_zip + File submission_xml = package_genbank_ftp_submission.submission_xml + File submit_ready = package_genbank_ftp_submission.submit_ready Array[File] vadr_outputs = select_all(vadr.outputs_tgz) File genbank_source_table = biosample_to_genbank.genbank_source_modifier_table From 252fcca69f17e461f91801bd764c2eab9705d5a8 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 8 Jun 2022 11:54:16 -0400 Subject: [PATCH 4/7] fix --- pipes/WDL/workflows/sarscov2_genbank.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/sarscov2_genbank.wdl b/pipes/WDL/workflows/sarscov2_genbank.wdl index 130fa1411..a4a21ec42 100644 --- a/pipes/WDL/workflows/sarscov2_genbank.wdl +++ b/pipes/WDL/workflows/sarscov2_genbank.wdl @@ -123,7 +123,7 @@ workflow sarscov2_genbank { defaults_yaml = author_sbt_defaults_yaml, j2_template = author_sbt_j2_template } - call ncbi.package_genbank_ftp_submission as passing_package_genbank { + call ncbi.package_sc2_genbank_ftp_submission as passing_package_genbank { input: sequences_fasta = passing_fasta.combined, source_modifier_table = passing_source_modifiers.genbank_source_modifier_table, @@ -159,7 +159,7 @@ workflow sarscov2_genbank { assembly_stats_tsv = assembly_stats_tsv, filter_to_ids = weird_ids.ids_txt } - call ncbi.package_genbank_ftp_submission as weird_package_genbank { + call ncbi.package_sc2_genbank_ftp_submission as weird_package_genbank { input: sequences_fasta = weird_fasta.combined, source_modifier_table = weird_source_modifiers.genbank_source_modifier_table, From a7994780adc99ce19b837879fb3c71f844a98fbf Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 8 Jun 2022 12:00:34 -0400 Subject: [PATCH 5/7] fix again --- pipes/WDL/workflows/sarscov2_sra_to_genbank.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/sarscov2_sra_to_genbank.wdl b/pipes/WDL/workflows/sarscov2_sra_to_genbank.wdl index 3c801a207..f63bc4abd 100644 --- a/pipes/WDL/workflows/sarscov2_sra_to_genbank.wdl +++ b/pipes/WDL/workflows/sarscov2_sra_to_genbank.wdl @@ -199,7 +199,7 @@ workflow sarscov2_sra_to_genbank { assembly_stats_tsv = write_tsv(flatten([[['SeqID','Assembly Method','Coverage','Sequencing Technology']],select_all(assembly_cmt)])), filter_to_ids = write_lines(select_all(submittable_id)) } - call ncbi.package_genbank_ftp_submission { + call ncbi.package_sc2_genbank_ftp_submission as package_genbank_ftp_submission { input: sequences_fasta = submit_genomes.combined, source_modifier_table = biosample_to_genbank.genbank_source_modifier_table, From e83041d80f4bc1b5a83b70c1fdc3ac71bcc6ef8c Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 8 Jun 2022 21:42:35 -0400 Subject: [PATCH 6/7] make task lofreq work on empty fasta input --- pipes/WDL/tasks/tasks_intrahost.wdl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pipes/WDL/tasks/tasks_intrahost.wdl b/pipes/WDL/tasks/tasks_intrahost.wdl index 6e70e3323..1eca344ed 100644 --- a/pipes/WDL/tasks/tasks_intrahost.wdl +++ b/pipes/WDL/tasks/tasks_intrahost.wdl @@ -131,9 +131,18 @@ task lofreq { # make local copies because CWD is writeable but localization dir isn't always cp "~{reference_fasta}" reference.fasta cp "~{aligned_bam}" aligned.bam + + # samtools faidx fails if fasta is empty + if [ $(grep -v '^>' reference.fasta | tr -d '\nNn' | wc -c) == "0" ]; then + touch "~{out_basename}.vcf" + exit 0 + fi + + # index for lofreq samtools faidx reference.fasta samtools index aligned.bam + # lofreq lofreq call \ -f reference.fasta \ -o "~{out_basename}.vcf" \ From 6171e49fa6127305db41757e9480a6b3045c4111 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 8 Jun 2022 21:58:54 -0400 Subject: [PATCH 7/7] name properly --- pipes/WDL/workflows/metagenomic_denovo.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pipes/WDL/workflows/metagenomic_denovo.wdl b/pipes/WDL/workflows/metagenomic_denovo.wdl index db2a6694e..9abdd116e 100644 --- a/pipes/WDL/workflows/metagenomic_denovo.wdl +++ b/pipes/WDL/workflows/metagenomic_denovo.wdl @@ -188,6 +188,7 @@ workflow metagenomic_denovo { input: contigs_fasta = assemble.contigs_fasta, reads_bam = dehosted_bam, + sample_name = sample_name, reference_genome_fasta = reference_genome_fasta }