config.yaml

# NOTE: you won't typically have to change the following two lines!
metasheet: 'metasheet.csv'
ref: "CHIPS/ref.yaml"

# Where you want to store your results
# NOTE: you could comment this line, the default path is "analysis"
output_path: "analysis"

# Genome trackview parameters
genes_to_plot: GAPDH ACTB TP53
upstream: 50000
downstream: 50000

trim_adapter: true

# Tell us what assembly to use (!! MUST be defined in chips/ref.yaml !!)
assembly: GDC_hg38

# ALIGNER: if you have sentieon license and got sentieon installed on your machine
# Uncomment the following line and set absolute path of sentieon
# This software could speed up your alignment
# sentieon: "/path/to/sentieon"

# CUTOFF: Some results show that long reads do not make sense in peaks calling
# Change the number to set the cutoff
# To skip filtering, set cutoff 0 or leave it as blank
cutoff: 150

# PEAKS CALLING: Call broad peaks for some factors
# We do not recommand perform the motif analysis for broad peaks.
# When this parameter is set, motif result may not be accurate. 
# Uncomment below line and set value as true
macs2_broadpeaks: false

# for narrowpeak calling, extra parameters passed to macs2.
# e.g, --nomodel.  --nomodel is turned on for broad peaks by default,
# for narrowpeak, has to modify the below parameter.
# macs_extra_param: --nomodel

# MOTIF CALLER: if you want to use MDSeqPos, set your aligner as 'mdseqpos'
# OTHERWISE leave it as 'homer' (default)
# To skip motif scanning, simply comment out BOTH lines
motif: 'homer'
# motif: 'mdseqpos'

# Mapping reads to other genome to identify the contamination
contamination_panel_qc: true

# NOTE: CNV analysis using QDNAseq R pkg- {true, false}
cnv_analysis: true

# Uncomment and set to 'SNAP', 'CAP', or 'KACYL' for your type of epicypher
# spike-ins
# default: 'SNAP'
# epicypher_analysis: 'SNAP'

# FOR CISTROME DATABASE TEAM:
# Uncomment it to enable json and cistrome modules
# This will bulid a adapter for Cistrome DB at {Cistrome_path}
# Cistrome_path is optional, the default path is {output_path}/cistrome
# CistromeApi: true
# Cistrome_path: "analysis/cistrome"

# DEFINE the samples- each sample should have a name, e.g. SAMPLE1
# and a path to the input file, e.g. data/sample1.fastq.gz
# VALID INPUTS: fastq, fastq.gz, bam
# NOTE: for PAIRED-END fastq/fastq.gz, give both pairs to the sample:
# SAMPLE_1_PE:
#   - data/sample1_pair1.fastq
#   - data/sample1_pair2.fastq
# WARNING: DO not mix Paired-END and Single-End samples!!!
# ALSO: having the string "rep" or '.' in your sample name will throw an ERROR
# '.' causes Rscripts chop off names; and it's used as replicate splitter
samples:
  SAMPLE1:
    - data/sample1.fastq.gz
  SAMPLE1_input:
    - data/sample1.input.fastq.gz
  SAMPLE2:
    - data/sample2_pair1.fastq.gz
    - data/sample2_pair2.fastq.gz