-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdefaults.toml
173 lines (136 loc) · 6.28 KB
/
defaults.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
[workflow]
# Cohorts to use as inputs.
#input_cohorts = []
# Datasets to load inputs. If not provided, datasets will be determined
# automatically based on the input provider implementation:
#input_datasets = []
# Datasets to skip:
#skip_datasets = []
# Sequencing groups to skip:
#skip_sgs = []
# Only process the following sequencing groups:
#only_sgs = []
# Process the following sequencing groups even if outputs exist:
#force_sgs = []
# Skip these stages:
#skip_stages = []
# Skip all other stages:
#only_stages = []
# Start from this stage:
#first_stages = []
# Finish at this stage:
#last_stages = []
# Force stage rerun
#force_stages = []
# Map of stages to lists of sequencing groups, to skip for specific stages
#[workflow.skip_stages_for_sgs]
#CramQC = ['CPGaaa']
# Name of the workflow (to prefix output paths)
#name =
# Description of the workflow (to display in the Batch GUI)
#description =
# Suffix the workflow outputs location (`get_workflow().prefix`) with this string.
# By default, the hash of all input paths will be used.
#output_version = "0.1"
# Check input file existence (e.g. FASTQ files). When they are missing,
# the `skip_sgs_with_missing_input` option controls whether such
# sequencing groups should be ignored, or it should cause raising an error.
check_inputs = true
# For the first (not-skipped) stage, if the input for a target does
# not exist, just skip this target instead of failing. E.g. if the first
# stage is Align, and `sequencing_group.alignment_input` for a sequencing group do not exist,
# remove this sequencing group, instead of failing. In other words, ignore sequencing groups
# that are missing results from skipped stages.
skip_sgs_with_missing_input = false
# Within jobs, check all in-job intermediate files for possible reuse.
# If set to False, will overwrite all intermediates. Used by `utils.can_reuse(path)`.
check_intermediates = true
# Before running a stage, check if input (i.e. expected outputs from required stages)
# already exist. If it exists, do not submit stage jobs.
check_expected_outputs = true
# Limit to data of this sequencing type
#sequencing_type = 'genome'
# Realign CRAM when available, instead of using FASTQ.
# The parameter value should correspond to CRAM version
# (e.g. v0 in gs://cpg-fewgenomes-main/cram/v0/CPGaaa.cram
#realign_from_cram_version = 'v0'
# Calling intervals (defauls to whole genome intervals)
#intervals_path =
# The GQ bands used for ReblockGVCF, specified as exclusive upper bounds for reference
# confidence GQ bands (must be in [1, 100] and specified in increasing order). Finer
# granularity bands result in more reference blocks and therefore larger GVCFs.
reblock_gq_bands = [20, 30, 40]
# Only print the final merged config and a list of stages to be submitted.
# Will skip any communication with Metamist, Hail Batch, and Cloud Storage, so
# the code can be run without permissions.
#dry_run = true
# By default, BamToCram stage will create CRAM analysis types, this can be overridden
# bam_to_cram_analysis_type = 'pacbio_cram'
# Map internally used validation sample external_id to truth sample names
[validation.sample_map]
HG001_NA12878 = 'na12878'
SYNDIP = 'syndip'
[hail]
delete_scratch_on_exit = false
[resource_overrides]
# Override default resource requirements for unusually large seq data without
# demanding higher resources for all operations as standard. Examples below
# picard MarkDuplicates overrides for unreasnobly large sequnce groups
#picard_mem_gb = 100
#picard_storage_gb = 350
# haplotype caller overrides, see production-pipelines PR#381
# defaults in code are 40 for genomes, none for exomes
#haplotypecaller_storage = 80
# Use highmem machine type for alignment step
# align_use_highmem = true
# Use additional storage in postproc_gvcf job for large gVCFs
# postproc_gvcf_storage = 50
# JointGenotyping GenomicsDBImport job overrides
# genomicsdb_import_mem_gb = 32
# genomicsdb_import_use_highmem = false
# JointGenotyping GenotypeGVCFs job overrides
# genotype_gvcfs_mem_gb = 15
# genotype_gvcfs_use_highmem = false
[mito_snv]
# Example config for broad wdl found here:
# https://raw.githubusercontent.com/broadinstitute/gatk/master/scripts/mitochondria_m2_wdl/ExampleInputsMitochondriaPipeline.json
# f_score_beta is not configured so will use tool default of 1.0
f_score_beta = 1.0
# Sarah Stenton from Broad runs this pipline for seqr ingest and indicated they use a
# threshold of 0.01 for seqr cohorts.
vaf_filter_threshold = 0.01
# Use verifybamid in addition to haplocheck for contamination estimate
use_verifybamid = true
[stripy]
# Analysis_type can be "standard" (fast) or "extended" (marginally slower
# but also uses unmapped reads for genotying)
analysis_type = "extended"
# See https://gitlab.com/andreassh/stripy-pipeline#list-of-loci
# Excluded by default: C9orf72, HTT
target_loci = """AFF2,AR,ARX_1,ARX_2,ATN1,ATXN1,ATXN10,ATXN2,ATXN3,ATXN7,ATXN8OS,\
BEAN1,CACNA1A,CBL,CNBP,COMP,CSTB,DAB1,DIP2B,DMD,DMPK,EIF4A3,FGF14,FMR1,FOXL2,FXN,GIPC1,\
GLS,HOXA13_1,HOXA13_2,HOXA13_3,HOXD13,JPH3,LRP12,MARCHF6,NIPA1,NOP56,NOTCH2NLC,\
NUTM2B-AS1,PABPN1,PHOX2B,PPP2R2B,PRDM12,PRNP,RAPGEF2,RFC1,RILPL1,RUNX2,SAMD12,SOX3,\
STARD7,TBP,TBX1,TCF4,TNRC6A,VWA1,XYLT1,YEATS2,ZFHX3,ZIC2,ZIC3"""
# Path to bed+ file containins extra loci to include in the analysis. Tab-delimited BED
# file containing at least the following four values: chromosome, start and end position
# of the STR locus and motif on the plus strand. Optionally, the locus name/ID can be
# specified as fifth value. Additionally, you can also specify disease name, inheritance,
# normal range and pathogenic cut-off values which are then being used to colourise
# results e.g.:
# https://gitlab.com/andreassh/stripy-pipeline/-/blob/main/examples/vntr.bed
# custom_loci_path = "gs://cpg-reference/hg38/loci/seqr/seqr_stripy_custom_loci.bed"
# Set to empty string if no custom loci are to be used.
custom_loci_path = ""
# Change the path the stripy report is saved to, useful when testing novel loci
output_prefix = "stripy"
# Add in specific multiQC report config options
# See https://multiqc.info/docs/getting_started/config for more details
# [workflow.cram_multiqc]
# send_to_slack = true
# [workflow.cram_multiqc.extra_config]
# plots_force_interactive = true
# [workflow.gvcf_multiqc]
# send_to_slack = true
# [workflow.gvcf_multiqc.extra_config]
# plots_force_interactive = true