-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathRECAP_diffReps.sh
206 lines (171 loc) · 6.65 KB
/
RECAP_diffReps.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/bin/bash
# ===============================================================
# RECAP Wrapper
# RECAP is a wrapper algorithm that resamples ChIP-seq and control
# data to estimate and control for biases built into peak calling
# algorithms.
# This wrapper script is designed to conveniently peak call
# ChIP-seq data using diffReps and recalibrate peak p-values using RECAP
# in one convenient package.
# A detailed explanation of the algorithm can be found under
# PUBLICATIONS.
#
# HISTORY:
# 29/08/2017 - v1.0.0 - First Creation
# 14/01/2019 - v1.0.1 - Proper command line parameters
# 15/01/2019 - v1.0.2 - Input validation
#
# CREDITS:
# RECAP was developed by Justin G. Chitpin, Aseel Awdeh,
# and Theodore J. Perkins.
# Development of RECAP was carried out at the Ottawa Hospital
# Research Institute in the Perkins Lab.
#
# PUBLICATIONS:
# If you use RECAP, please cite the following paper:
# <INSERT PUBLICATION HERE>
#
# QUESTIONS:
# Please contact [email protected]
# ===============================================================
# ===============================================================
# Script version number
VERSION="1.0.2"
# Provide a variable for the location of this and other scripts
SCRIPT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
REMIX_PATH=$(find ~/ -type f -name "RECAP_Re-Mix.sh" | head -n 1)
PERL_PATH=$(find ~/ -type f -name "RECAP.pl" | head -n 1)
# Text display commands
bold=$(tput bold)
normal=$(tput sgr0)
# ===============================================================
function mainWrapper() {
####################### Begin Script Here #######################
#################################################################
## PLEASE EDIT AND ADD YOUR DESIRED diffReps PARAMETERS IN 2) AND 3)
# ===============================================================
if [[ ! -d $INPUT_DIR ]]
then
echo -e "\nERROR: Input directory does not exist"
exit 1
fi
if [[ ! -d $OUTPUT_DIR ]]
then
echo -e "\nERROR: Output directory does not exist"
exit 1
fi
cd $INPUT_DIR
if [[ ! -e $CHIP_NAME ]]
then
echo -e "\nERROR: Treatment bed file does not exist"
exit 1
fi
if [[ ! -e $CONTROL_NAME ]]
then
echo -e "\nERROR: Control bed file does not exist."
echo -e " Must include the full extension."
exit 1
fi
if [[ ! $BOOTSTRAP -gt 0 ]]
then
echo -e "\nERROR: Specify the number of re-mixes"
echo -e " Must be a natural number"
exit 1
fi
if [[ ! $HEADER -ge 0 ]]
then
echo -e "\nERROR: Specify the number of header lines in output"
echo -e " Must be an integer (zero or greater)"
exit 1
fi
# 1) Re-mix ChIP and control bed files
bash $REMIX_PATH -i $INPUT_DIR -t $CHIP_NAME -c $CONTROL_NAME -o $OUTPUT_DIR -m unequal -b $BOOTSTRAP
# 2) Call original peaks using diffReps
# Please specify your own diffReps parameters!
# NOTE: p-value threshold must be set to 1.0 for diffReps
cd $OUTPUT_DIR
mkdir -p diffReps_original
diffReps.pl --treatment "$INPUT_DIR/$CHIP_NAME" --control "$INPUT_DIR/$CONTROL_NAME" -meth gt -gname hg19 --pval 1 --mode n --report "$OUTPUT_DIR/diffReps_original/${CHIP_NAME%.bed}.txt" --nohs --noanno --nsd 20
# 3) Call re-mixed peaks using diffReps specifying desired parameters
# Please specify your own diffReps parameters!
# NOTE: p-value threshold must be set to 1.0 for diffReps
cd $OUTPUT_DIR
mkdir -p diffReps_re-mix
for (( i=1; i<=$BOOTSTRAP; i++ ))
do
diffReps.pl --treatment "$OUTPUT_DIR/re-mix/${CHIP_NAME%.bed}.bootstrap_$i.bed" --control "$OUTPUT_DIR/re-mix/${CONTROL_NAME%.bed}.bootstrap_$i.bed" -meth gt -gname hg19 --pval 1 --mode n --report "$OUTPUT_DIR/diffReps_re-mix/${CHIP_NAME%.bed}.bootstrap_$i.txt" --nohs --noanno --nsd 20
done
# 4) Recalibrate original peak p-values using RECAP
# NOTE: Check for correct header and p-value column if you obtain any errors here
cd $OUTPUT_DIR
mkdir diffReps_RECAP
perl $PERL_PATH --dirOrig "$OUTPUT_DIR/diffReps_original" --nameOrig "${CHIP_NAME%.bed}.txt" --dirRemix "$OUTPUT_DIR/diffReps_re-mix" --nameRemix "${CHIP_NAME%.*}" --dirOutput "$OUTPUT_DIR/diffReps_RECAP" --nameOutput "${CHIP_NAME%.bed}.RECAP.bootstrap_${BOOTSTRAP}.txt" --bootstrap $BOOTSTRAP --header $HEADER --pvalCol 13 --delim t --software D
#################################################################
######################## End Script Here ########################
}
#################### Begin Options and Usage ####################
# Print usage
usage() {
echo -n "
[Input directory] [Treatment file] [Control file]
[Output directory] [Bootstrap] [Header]
${bold}USAGE:${normal}
-i, --input Input file directory (absolute path)
-t, --treatment Treatment file (full name with extension)
-c, --control Control file (full name with extension)
-o, --output Output file directory (absolute path)
-b, --bootstrap Number of re-mixes
-e, --header Header number of peak calling output files
${bold}OPTIONS:${normal}
-h, --help Display this help and exit
"
}
# ===============================================================
# Iterate over options breaking --foo=bar into --foo bar
unset options
while (($#)); do
case $1 in
# If option is of type --foo=bar
--?*=*) options+=("${1%%=*}" "${1#*=}") ;;
# add --endopts for --
--) options+=(--endopts) ;;
# Otherwise, nothing special
*) options+=("$1") ;;
esac
shift
done
set -- "${options[@]}"
unset options
# ===============================================================
# ===============================================================
# Print help if no arguments or the incorrect number were passed.
[[ $# -eq 0 ]] && set -- "--help"
[[ $# -lt 6 ]] && set -- "--help"
# ===============================================================
# ===============================================================
# Read the options and set stuff
while [[ $1 = -?* ]]; do
case $1 in
-i|--input) shift; INPUT_DIR=${1} ;;
-t|--treatment) shift; CHIP_NAME=${1} ;;
-c|--control) shift; CONTROL_NAME=${1} ;;
-o|--output) shift; OUTPUT_DIR=${1} ;;
-b|--bootstrap) shift; BOOTSTRAP=${1} ;;
-e|--header) shift; HEADER=${1} ;;
-h|--help) usage >&2; exit 0 ;;
*) echo "ERROR: Bad argument ${1}" ; exit 1 ;;
esac
shift
done
# Store the remaining part as arguments.
args+=("$@")
# ===============================================================
##################### End Options and Usage #####################
# ===============================================================
# Set IFS to preferred implementation
IFS=$'\n\t'
# ===============================================================
# ===============================================================
# Run script
mainWrapper
# ===============================================================