-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathChIP_walkthrough.sh
145 lines (106 loc) · 4.08 KB
/
ChIP_walkthrough.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#####################################################################
# Processing of Data for ChIP-Seq, MARS, anything with 'peaks' #
#####################################################################
#####################################################################
# Demultiplexing #
#####################################################################
#In terminal
cd /net/hawkins/vol1/NEXTSEQ_YYMMDD
#
# data are located in
/net/fields/vol2/fieldslab-inst/nextseq/Output/<RUNFOLDER>/
# e.g
<RUNFOLDER> = 141016_NS500272_0021_AH1441BGXX <date>_<machine>_<run>_<FlowCellID>
# create SampleSheet.csv in run folder
[Data],,
Sample_ID,Sample_Name,index
CV160,CV160,TGACCTTGA
# all samples must have same lenght barcode sequence
# create output folder "NEXTSEQ_YYMMDD" in /net/hawkins/vol1/
#$ -S /bin/bash
#$ -pe serial 1
#$ -l h_rt=100:00:00 -l mfree=5G
cd /net/hawkins/vol1/NEXTSEQ_141222
bcl2fastq -R /net/fields/vol2/fieldslab-inst/nextseq/Output/141222_NS500272_0031_AH3HKKBGXX/ -o /net/hawkins/vol1/NEXTSEQ_141222/ --barcode-mismatches 0 --no-lane-splitting
#####################################################################
# Processing #
#####################################################################
# We will make 2 scripts
# 1. script with all commands for trimming, mapping, etc.
# use ${file} in place of all sample names
# 2. script will submit 1st script with ${file}
# So we don't overwrite our file
cp A.fastq.gz A_NAME.fastq.gz
cp B.fastq.gz B_NAME.fastq.gz
# basics of vi
# i insert mode
# ESC leave insert mode
# :wq save and exit
# :q! quit without saving
# dd delete line
# create file and open
vi script.sh
# make sure you are using a
#####################################################################
# SCRIPT 1
#$ -S /bin/bash
#$ -pe serial 1
#$ -l h_rt=120:00:00 -l mfree=5G
cd /net/hawkins/vol1/ChIP_walkthrough/raw_fq
# trim galore options to be aware of:
# --fastqc : quality control, makes html report
# -q : quality cut off, worth lowering if mapping bad
# Use 30 for BS
# --paired : takes 2 fastq files, return matching read pairs
# --gzip : output is compressed
# paired end output will be _val_1.fq and _val_2.fq
trim_galore -q 30 --phred33 --gzip ${file}.fastq.gz
gunzip ${file}_trimmed.fq.gz
# need to specify -1 and -2 for paired end
# -x : location of bowtie2 index
# -t: shows time taken for .o file
# -N: number mismatches in seed alignment
# -L: length of seed
bowtie2 -N 1 -L 25 -t -x /net/hawkins/vol1/scripts_files/hg19/hg19 -S ${file}.sam ${file}_trimmed.fq
# convert sam to binary alignment bam
samtools view -bS ${file}.sam > ${file}_nonSorted.bam
# sort bam file
samtools sort ${file}_nonSorted.bam ${file}
# create index for bam file
# some tools need this to run
samtools index ${file}.bam
# make bigwig file from bam
# bamCoverage is part of deepTools
# http://deeptools.readthedocs.io/en/latest/content/tools/bamCoverage.html
# consider ignoring duplicates,
# could there be duplicates not from amplification?
# ignore chrX since there could be 1 or 2!
bamCoverage --ignoreDuplicates --normalizeUsingRPKM --bam ${file}.bam --ignoreForNormalization chrX -o ${file}.bw
# save some space by compressing fastqs
# about 1/4 the size
gzip ${file}_trimmed.fq
# remove intermediate files
rm ${file}.sam
rm ${file}_nonSorted.bam
mv ${file}.bw /net/hawkins/vol1/ChIP_walkthrough/aligned
mv ${file}.bam* /net/hawkins/vol1/ChIP_walkthrough/aligned
mv ${file}_trimmed.fq.gz /net/hawkins/vol1/ChIP_walkthrough/trimmed_fq
####################################################################
# SCRIPT 2
#Submit script for each sample
#$ -S /bin/bash
#$ -pe serial 1
#$ -l h_rt=120:00:00 -l mfree=5G
# Change NAME to your name
indexnames=(
A_NAME
B_NAME
)
cd /net/hawkins/vol1/ChIP_walkthrough/
for i in ${indexnames[@]}; do
qsub -cwd -v file=${i} -N ${i} chip_pipeline.sh
done
#####################################################################
# Visualization #
#####################################################################
# Instruction for uploading are in new_ucsc_system.txt