From fbb5132d038a55a2c83b094d39190068a18ad15d Mon Sep 17 00:00:00 2001 From: Jitao David Zhang Date: Mon, 18 Mar 2024 17:07:33 +0100 Subject: [PATCH 1/5] simplify mutation output: Mut followed by Percentage --- workflow/AA_vc_code/sam2aaFreq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflow/AA_vc_code/sam2aaFreq.py b/workflow/AA_vc_code/sam2aaFreq.py index b9bea808..1566169f 100644 --- a/workflow/AA_vc_code/sam2aaFreq.py +++ b/workflow/AA_vc_code/sam2aaFreq.py @@ -2,7 +2,7 @@ from seq_utils import * import numpy as np -mutation_cutoff_perc_list = [1,2,5,15,50] +mutation_cutoff_perc_list = [1, 2, 5, 10, 15, 50] def extractOverlapAASeq(gene_start, gene_end, read_start, read_end, read_seq): # return overlap_or_not, aa_seq, start_aa_in_gene if gene_start >= read_end or read_start >= gene_end: @@ -128,7 +128,7 @@ def pass_mut_cutoff(uniq_nt_or_aa_seq, tmp_freq_list, tmp_total, tmp_ref, tmp_cu for tmp_nt in uniq_nt_seq: out_nt.write(f',{tmp_nt}') for tmp_cutoff in mutation_cutoff_perc_list: - out_nt.write(f',{tmp_cutoff}%Mut') + out_nt.write(f',Mut{tmp_cutoff}') out_nt.write('\n') for tmp_nt_pos_index in range(ref_seq_len): tmp_nt_pos = tmp_nt_pos_index + 1 @@ -152,7 +152,7 @@ def pass_mut_cutoff(uniq_nt_or_aa_seq, tmp_freq_list, tmp_total, tmp_ref, tmp_cu for tmp_aa in uniq_aa_seq: out.write(f',{tmp_aa}') for tmp_cutoff in mutation_cutoff_perc_list: - out.write(f',{tmp_cutoff}%Mut') + out.write(f',Mut{tmp_cutoff}') out.write('\n') for tmp_gene_index in range(gene_num): From cf3e8dbba84f44e2505cf6e9faa2a0da0cbe9061 Mon Sep 17 00:00:00 2001 From: Jitao David Zhang Date: Mon, 18 Mar 2024 17:07:50 +0100 Subject: [PATCH 2/5] delete sorted.sam file after using --- workflow/rules/AA_vc.smk | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/workflow/rules/AA_vc.smk b/workflow/rules/AA_vc.smk index d04e45dc..05a761c5 100644 --- a/workflow/rules/AA_vc.smk +++ b/workflow/rules/AA_vc.smk @@ -5,17 +5,12 @@ config: "config/config.yaml" sample_annotation = config['sample_annotation'] samples, fq1dict, fq2dict = parse_sample_annotation(sample_annotation) -#rule all: -# input: -# expand("results/variant-calling-AA/{sample}.sorted.sam", sample = samples), -# expand("results/variant-calling-AA/{sample}.sam2AAFreq.done", sample = samples), -# "results/infref/inferred_strain_dup.fasta" rule bam2sam_vc: input: "results/{inpt}_bam/{inpt}_{sample}.sorted.bam" output: - "results/variant-calling-AA/{inpt}/{inpt}_{sample}.sorted.sam" + temp("results/variant-calling-AA/{inpt}/{inpt}_{sample}.sorted.sam") shell: "samtools view -h {input} -o {output}" @@ -24,7 +19,7 @@ rule bam2sam_vc_perSamp: input: "results/perSamp_bam/{sample}.sorted.bam" output: - "results/variant-calling-AA/perSamp/{sample}/{sample}.sorted.sam" + temp("results/variant-calling-AA/perSamp/{sample}/{sample}.sorted.sam") shell: "samtools view -h {input} -o {output}" From 201c805bbf25014239d67c33d7561372354739ec Mon Sep 17 00:00:00 2001 From: Jitao David Zhang Date: Mon, 18 Mar 2024 17:13:35 +0100 Subject: [PATCH 3/5] use parameter vc_params --- workflow/rules/varscan_vc.smk | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/workflow/rules/varscan_vc.smk b/workflow/rules/varscan_vc.smk index a3481ada..e1810e83 100644 --- a/workflow/rules/varscan_vc.smk +++ b/workflow/rules/varscan_vc.smk @@ -1,6 +1,7 @@ import snakemake config: "config/config.yaml" sample_annotation = config['sample_annotation'] +vc_params = config['vc_params'] samples, fq1dict, fq2dict = parse_sample_annotation(sample_annotation) @@ -60,8 +61,8 @@ rule varscan: vcfFile="results/variant-calling/{inpt}/{inpt}_{sample}_varscan.vcf" shell: """ - samtools mpileup -f {input.refDup} {input.sortBam} > {output.pileupFile} - varscan mpileup2snp {output.pileupFile} --min-var-freq 0.01 --output-vcf > {output.vcfFile} + samtools mpileup -f {input.refDup} {input.sortBam} > {output.pileupFile} + varscan mpileup2snp {output.pileupFile} {vc_params} --output-vcf > {output.vcfFile} """ @@ -75,7 +76,7 @@ rule varscan_perSamp: shell: """ samtools mpileup -f {input.refDup} {input.sortBam} > {output.pileupFile} - varscan mpileup2snp {output.pileupFile} --min-var-freq 0.01 --output-vcf > {output.vcfFile} + varscan mpileup2snp {output.pileupFile} {vc_params} --output-vcf > {output.vcfFile} """ From 128bcfa83369b0a47e1b0196c944b5228ca04c9d Mon Sep 17 00:00:00 2001 From: Jitao David Zhang Date: Mon, 18 Mar 2024 17:25:52 +0100 Subject: [PATCH 4/5] add template file --- config/config_template.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 config/config_template.yaml diff --git a/config/config_template.yaml b/config/config_template.yaml new file mode 100644 index 00000000..14018e70 --- /dev/null +++ b/config/config_template.yaml @@ -0,0 +1,21 @@ +## use either way to indicate input files: either sample_annotation, or biokit_outdir +## sample_annotation should be a tab-delimited file containing at least four columns (#ID, GROUP, FASTQ1, FASTQ2) +sample_annotation: .test/sampleAnnotation +biokit_outdir: None + +## options for read trimming with Trimmomatic +trim_reads: False +illumina_clip_file: 'resources/trim_reads/primers.fasta' +illumina_clip_opts: ':2:30:10:2:keepBothReads' +# trimmomatic steps (excluding ILLUMINACLIP) +trimmomatic_steps: 'LEADING:20 TRAILING:20 SLIDINGWINDOW:4:15 MINLEN:35 CROP:140' + +## variant calling parameters +vc_params: '--min-var-freq 0.01 --min-coverage 100 --min-reads2 50 --p-value 0.01 --strand-filter 1' + +## Run modes +doInputRef: False +inputRef: 'gnl|hbvnuc|AB064313_FT00000_C-G' +doPerSamp: True + + From 9758dd78c7e75ee7376c4355412086b867f531e9 Mon Sep 17 00:00:00 2001 From: Jitao David Zhang Date: Mon, 18 Mar 2024 17:38:15 +0100 Subject: [PATCH 5/5] ignore config/config.yaml --- .gitignore | 2 ++ config/config.yaml | 18 ------------------ 2 files changed, 2 insertions(+), 18 deletions(-) delete mode 100644 config/config.yaml diff --git a/.gitignore b/.gitignore index cdd7d754..ea2f92aa 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,7 @@ IGV HBV_refgenomes testdata-out testbiokit/HBVouroboros +config/config.yaml .snakemake README.pdf @@ -40,3 +41,4 @@ test-outputs resources/ref results *.pyc + diff --git a/config/config.yaml b/config/config.yaml deleted file mode 100644 index dd3c500c..00000000 --- a/config/config.yaml +++ /dev/null @@ -1,18 +0,0 @@ -## use either way to indicate input files: either sample_annotation, or biokit_outdir -## sample_annotation should be a tab-delimited file containing at least four columns (#ID, GROUP, FASTQ1, FASTQ2) -sample_annotation: .test/sampleAnnotation -biokit_outdir: None - -## 3. options for read trimming with Trimmomatic -trim_reads: False -illumina_clip_file: 'resources/trim_reads/primers.fasta' -illumina_clip_opts: ':2:30:10:2:keepBothReads' -# trimmomatic steps (excluding ILLUMINACLIP) -trimmomatic_steps: 'LEADING:20 TRAILING:20 SLIDINGWINDOW:4:15 MINLEN:35 CROP:140' - -## Run modes -doInputRef: False -inputRef: 'gnl|hbvnuc|AB064313_FT00000_C-G' -doPerSamp: False - -