-
Notifications
You must be signed in to change notification settings - Fork 0
/
wes_config.local.yaml
127 lines (109 loc) · 4.72 KB
/
wes_config.local.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#NOTE: you won't typically have to change the following two lines!
metasheet: 'metasheet.csv'
ref: "cidc_wes/ref.yaml"
# IF YOU ARE using snakemake remote files then you will need to set the
# 'remote_path' parameter to your (google) bucket path.
# **REMEMBER to add the / to the end of the path**
#
# OTHERWISE, if running locally, LEAVE it commented out
# remote_path: "bucket_name/path/to/wes_project/"
# The transfer bucket is used by the wes_automator transfer script to
# tx the run AFTER a wes_automator run
# transfer_bucket: "gs://some/bucket/path/wes_project/"
transfer_path: "gs://some/bucket/path/wes_project/"
# CIMAC center: choices are {'mocha', 'mda', 'broad' (default)}
cimac_center: 'broad'
# SOMATIC CALLER to use, options are {tnsnv, tnhaplotyper2, tnscope}
# tnhaplotyper2 is the default EVEN if the somatic_caller param is NOT defined
somatic_caller: tnscope
#Path to sentieon binary
sentieon_path: "/home/taing/sentieon/sentieon-genomics-202010.01/bin/"
#Tell us what assembly to use (!! MUST be defined in chips/ref.yaml !!)
assembly: hg38
vcf2maf_bufferSize: 50000 #for standard8 #50000 for highmem-64
# neoantigen module - Define whether to run class II results
# This will trigger an additional HLA caller, xHLA to generate class II
# HLA alleles. Uncomment the following line to enable it-
neoantigen_run_classII: True
# neoantigen module - Define which callers to use in a space-separated list
# Callers avaliable:
# 'MHCflurry', 'MHCnuggetsI', 'MHCnuggetsII', 'NNalign', 'NetMHC',
# 'NetMHCIIpan', 'NetMHCcons', 'NetMHCpan', 'PickPocket', 'SMM',
# 'SMMPMBEC', 'SMMalign'
neoantigen_callers: "NetMHCpan NetMHCIIpan"
# neoantigen module - Define which epitope lengths to analyze in comma
# separated list
neoantigen_epitope_lengths_cls1: "8,9,10,11"
neoantigen_epitope_lengths_cls2: "12,13,14,15,16,17,18"
#effective_size: # of base-pairs covered in target regions
effective_size: 30000000
#tcga_panel - Define a list of TCGA cancer types with which to compare
#the samples against (optional)
#ref: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations
#The available cancer types are:
# ACC,BLCA,BRCA,CESC,CHOL,COAD,DLBC,ESCA,GBM,HNSC,KICH,KIRC,KIRP,LGG,LIHC,
# LUAD,LUSC,MESO,OV,PAAD,PCPG,PRAD,READ,SARC,SKCM,STAD,TGCT,THCA,THYM,UCEC,
# UCS,UVM
# The signature of all cancer types, i.e. TCGA-PANCANCER is included in the
# panel by default
#NOTE: please list them as a space separated string, e.g. "LUAD LUSC"
#tcga_panel: "LUAD LUSC"
# trim_soft_clip - When performing somatic variant calling, ignore soft clipped
# reads
# ref: https://support.sentieon.com/manual/usages/general/#tnhaplotyper2-algorithm
# NOTE: only supported in TNscope and TNhaplotyper2; NOT in TNsnv
# IF TNsnv is selected as somatic_caller, then this param will be ignored
# Default: soft clipped reads are NOT ignored
# to turn on this feature uncomment the following line
#
trim_soft_clip: False
# tumor_only - IF the normal sample is NOT available, then set this flag to
# be True by uncommenting the line below.
# Also make sure that the normal samples in the metasheet are empty.
#
# Default: tumor_only: False
tumor_only: False
# Skip modules in this WES run by uncommenting the skipped modules header and
# any lines containing modules that you would like to skip.
#skipped_modules:
# - "clonality"
# - "copynumber"
# - "cnvkit"
# - "msisensor2"
# - "neoantigen"
# - "purity"
# - "tcellextrect"
# (Optional) Define any RNA-seq expression data associated with the TUMOR
# samples only. Currently, only expression results from Salmon are supported.
# The sample names in this section must match the same names used in the
# previous section.
# BOTH bam and expression files are required
#rna:
# SAMPLE1:
# bam_file: rna_data/sample1.sorted.bam
# expression_file: rna_data/sample1.quant.sf
# SAMPLE2:
# bam_file: rna_data/sample2.sorted.bam
# expression_file: rna_data/sample2.quant.sf
#The following are optional meta information that will be stored in the report
#wes_run_info:
# wes_commit: e4b875
# wes_ref_snapshot: wes-human-ref-ver1-3
# wes_image: wes-ver2-1
# DEFINE the samples- each sample should have a name, e.g. SAMPLE1
# and a path to the input file, e.g. data/sample1.fastq.gz
# VALID INPUTS: fastq, fastq.gz, bam
# NOTE: for PAIRED-END fastq/fastq.gz, give both pairs to the sample:
# SAMPLE_1_PE:
# - data/sample1_pair1.fastq
# - data/sample1_pair2.fastq
# WARNING: DO not mix Paired-END and Single-End samples!!!
# ALSO: having the string "rep" or '.' in your sample name will throw an ERROR
# '.' causes Rscripts chop off names; and it's used as replicate splitter
samples:
SAMPLE1:
- data/sample1.fastq.gz
SAMPLE1_input:
- data/sample1.input.fastq.gz
SAMPLE2:
- data/sample2.fastq.gz