wes_config.local.yaml

#NOTE: you won't typically have to change the following two lines!
metasheet: 'metasheet.csv'
ref: "cidc_wes/ref.yaml"

# IF YOU ARE using snakemake remote files then you will need to set the
# 'remote_path' parameter to your (google) bucket path.
# **REMEMBER to add the / to the end of the path**
#
# OTHERWISE, if running locally, LEAVE it commented out
# remote_path: "bucket_name/path/to/wes_project/"

# The transfer bucket is used by the wes_automator transfer script to
# tx the run AFTER a wes_automator run
# transfer_bucket: "gs://some/bucket/path/wes_project/"
transfer_path: "gs://some/bucket/path/wes_project/"

# CIMAC center: choices are {'mocha', 'mda', 'broad' (default)}
cimac_center: 'broad'

# SOMATIC CALLER to use, options are {tnsnv, tnhaplotyper2, tnscope}
# tnhaplotyper2 is the default EVEN if the somatic_caller param is NOT defined
somatic_caller: tnscope

#Path to sentieon binary
sentieon_path: "/home/taing/sentieon/sentieon-genomics-202010.01/bin/"

#Tell us what assembly to use (!! MUST be defined in chips/ref.yaml !!)
assembly: hg38

vcf2maf_bufferSize: 50000 #for standard8 #50000 for highmem-64

# neoantigen module - Define whether to run class II results
# This will trigger an additional HLA caller, xHLA to generate class II
# HLA alleles.  Uncomment the following line to enable it-
neoantigen_run_classII: True

# neoantigen module - Define which callers to use in a space-separated list
# Callers avaliable:
# 'MHCflurry', 'MHCnuggetsI', 'MHCnuggetsII', 'NNalign', 'NetMHC',
# 'NetMHCIIpan', 'NetMHCcons', 'NetMHCpan', 'PickPocket', 'SMM',
# 'SMMPMBEC', 'SMMalign'
neoantigen_callers: "NetMHCpan NetMHCIIpan"

# neoantigen module - Define which epitope lengths to analyze in comma
# 	                  separated list
neoantigen_epitope_lengths_cls1: "8,9,10,11"
neoantigen_epitope_lengths_cls2: "12,13,14,15,16,17,18"

#effective_size: # of base-pairs covered in target regions
effective_size: 30000000

#tcga_panel - Define a list of TCGA cancer types with which to compare
#the samples against (optional)
#ref: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations
#The available cancer types are:
#  ACC,BLCA,BRCA,CESC,CHOL,COAD,DLBC,ESCA,GBM,HNSC,KICH,KIRC,KIRP,LGG,LIHC,
#  LUAD,LUSC,MESO,OV,PAAD,PCPG,PRAD,READ,SARC,SKCM,STAD,TGCT,THCA,THYM,UCEC,
#  UCS,UVM
# The signature of all cancer types, i.e. TCGA-PANCANCER is included in the
# panel by default
#NOTE: please list them as a space separated string, e.g. "LUAD LUSC"
#tcga_panel: "LUAD LUSC"

# trim_soft_clip - When performing somatic variant calling, ignore soft clipped
# reads
# ref: https://support.sentieon.com/manual/usages/general/#tnhaplotyper2-algorithm
# NOTE: only supported in TNscope and TNhaplotyper2; NOT in TNsnv
# IF TNsnv is selected as somatic_caller, then this param will be ignored
# Default: soft clipped reads are NOT ignored
# to turn on this feature uncomment the following line
#
trim_soft_clip: False

# tumor_only - IF the normal sample is NOT available, then set this flag to
# be True by uncommenting the line below.
# Also make sure that the normal samples in the metasheet are empty.
#
# Default: tumor_only: False
tumor_only: False

# Skip modules in this WES run  by uncommenting  the skipped modules header and
# any lines containing modules that you would like to skip.
#skipped_modules:
#  - "clonality"
#  - "copynumber"
#  - "cnvkit"
#  - "msisensor2"
#  - "neoantigen"
#  - "purity"
#  - "tcellextrect"

# (Optional) Define any RNA-seq expression data associated with the TUMOR
# samples only.  Currently, only expression results from Salmon are supported.
# The sample names in this section must match the same names used in the
# previous section.
# BOTH bam and expression files are required
#rna:
#  SAMPLE1:
#    bam_file: rna_data/sample1.sorted.bam
#    expression_file: rna_data/sample1.quant.sf
#  SAMPLE2:
#    bam_file: rna_data/sample2.sorted.bam
#    expression_file: rna_data/sample2.quant.sf

#The following are optional meta information that will be stored in the report
#wes_run_info:
#  wes_commit: e4b875
#  wes_ref_snapshot: wes-human-ref-ver1-3
#  wes_image: wes-ver2-1

# DEFINE the samples- each sample should have a name, e.g. SAMPLE1
# and a path to the input file, e.g. data/sample1.fastq.gz
# VALID INPUTS: fastq, fastq.gz, bam
# NOTE: for PAIRED-END fastq/fastq.gz, give both pairs to the sample:
# SAMPLE_1_PE:
#   - data/sample1_pair1.fastq
#   - data/sample1_pair2.fastq
# WARNING: DO not mix Paired-END and Single-End samples!!!
# ALSO: having the string "rep" or '.' in your sample name will throw an ERROR
# '.' causes Rscripts chop off names; and it's used as replicate splitter
samples:
  SAMPLE1:
    - data/sample1.fastq.gz
  SAMPLE1_input:
    - data/sample1.input.fastq.gz
  SAMPLE2:
    - data/sample2.fastq.gz