-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcallCNV
executable file
·142 lines (117 loc) · 4.91 KB
/
callCNV
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/bin/bash
#*********************************************************************#
# AUTHOR: SRIHARSHA VOGETI #
# EMAIL: [email protected] #
# SOURCE: #
# DESCRIPTION: pCNVD PIPELINE SCRIPT #
# LAST UPDATED: 14/07/16 #
#*********************************************************************#
shopt -s extglob
set -e
start_time=`date +%s`
#PARAMETERS WITH DEFAULT VALUES
upper_threshold=1.45; # upper threshold ratio
lower_threshold=0.55; # lower threshold ratio
outputPrefix=""; # OUTPUT PREFIX && MANDATORY PARAMETER
windowFile=""; # bed file containing windows
mergeFraction=0.2; #threshold on gap merging
SOURCEDIR=$(dirname $(readlink -f $0)); # SOURCE DIRECTORY
genome=""; # indicates genome assembly used. Useful for obtaining required annotation file
genomeFlag=0; # flag to check if genome is specified or not;
#***************************************#
# READ OPTIONS FROM CMD #
# AND SET PARAMETERS #
#***************************************#
#***************************************************************# #
# FUNCTION USED ARE DEFINED BELOW #
# AVALIABLE FUNCTIONS: printStderr(), for p value #
# retChrom(), runDNACopy(), runTValgorithm() #
#***************************************************************#
# PRINTS ON STDERR
function printStderr {
echo "$@" >&2;
}
# GET CHROMOSOME FUNCTION
function retChrom {
windowFile=$1
cName=$(head -1 $windowFile | awk '{print $1}')
echo $cName
}
function mainFunc {
rm -rf $outputPrefix"_pCNVD.call"
paste $outputPrefix"_pCNVD.bincor" $1"_pCNVD.output" > $outputPrefix"_pCNVD.call";
bedcnv="_pCNVD.bed";
lower_cutoff=$(echo $lower_threshold $avg1 | awk '{print $1*$2}' )
upper_cutoff=$(echo $upper_threshold $avg1 | awk '{print $1*$2}' )
echo $lower_cutoff $upper_cutoff
rm -rf $1$bedcnv
awk -v OFS='\t' -v lc="$lower_cutoff" -v hc="$upper_cutoff" -v outfile="$1$bedcnv" -v avg="$avg1" -v chrName="$chrName" -v mergeFraction="$mergeFraction" -f $SOURCEDIR"/source/awkLib.awk" $outputPrefix"_pCNVD.call"
bedtools subtract -a $1$bedcnv -b $gapFile > $output"_temp1";
mv $output"_temp1" $1$bedcnv;
rm -rf $outputPrefix"_pCNVD.call"
}
SHORTOPTS="l:o:u:z:f:"
LONGOPTS="gene:,hg18,hg19,genome:,help"
PROGNAME="callCNV"
ARGS=$(getopt -s bash --options $SHORTOPTS --longoptions $LONGOPTS --name $PROGNAME -- "$@" )
eval set -- "$ARGS"
while true; do
case "$1" in
-o) outputPrefix="$2"; shift 2;;
-l) lower_threshold="$2" ; shift 2;;
-u) upper_threshold="$2"; shift 2 ;;
-z) windowFile="$2" ; shift 2;;
-f) mergeFraction="$2" ; shift 2;;
--gene) geneFile="$2"; shift 2;;
--hg18) genome="hg18"; genomeFlag=1; shift ;;
--hg19) genome="hg19"; genomeFlag=1; shift ;;
--genome) genome="$2"; genomeFlag=1; shift 2 ;;
--help) echo "Usage: callCNV -o <output Prefix> -z <window file> <genome-flag>";
echo "For additional parameters please go through read me"; exit 1 ;;
--) shift; break;;
*) echo "Internal Error!"; exit 1;;
esac
done
#***********************************************#
# MAIN FUNCTION STARTS HERE #
# PRECPROCESS-SEGMENTATION-POSTPROCESS #
#***********************************************#
tvmOutputFile=$outputPrefix"_tvm_pCNVD.output"
cbsOutputFile=$outputPrefix"_cbs_pCNVD.output"
bincorFile=$outputPrefix"_pCNVD.bincor"
# check for genomeFlag
if [[ $genomeFlag -eq 0 ]]; then
echo "Error: Genome needs to be specified. --hg18 and --hg19 are valid flags. Alternatively genome name can be passed to --genome" >&2;
exit 1;
fi
# checking if requried files are avialable
if [[ ! -f "$bincorFile" || ! -f $outputPrefix"_pCNVD.input" ]]; then
echo "Error: File contaning coordinates $bincorFile or corresponding $outputPrefix"_pCNVD.input"not found. Check if such a file exists and correct prefix is passed to -o" >&2
exit
fi
# check if proper window file is given as input
if [[ ! -f "$windowFile" ]]; then
echo "Error: File contaning bins $windowFile not found!"
exit
fi
#check if any of the one outputfiles exits
if [[ ! -f "$tvmOutputFile" && ! -f "$cbsOutputFile" ]]; then
echo "Error: Neither $tvmOutputFile nor $cbsOutputFile were found. Please check if such file(s) exist and correct prefix is passed to -o" >&2
exit
fi
numWindows=$(wc -l $outputPrefix"_pCNVD.input" | awk '{print $1}')
avg1=$($SOURCEDIR"/calAvg" $numWindows < $outputPrefix"_pCNVD.input" | awk '{print $1}');
chrName=$(retChrom $windowFile) # chromosome name
gapFile="$SOURCEDIR/annotations/"$genome"_gap.bed"
if [[ ! -f "$gapFile" ]]; then
echo "Error: gapFile not found"
exit 1;
fi
# if tvm output exists try to postprocess that file
if [[ -f "$tvmOutputFile" ]]; then
mainFunc $outputPrefix"_tvm"
fi
# if cbs output exists try to postprocess that file
if [[ -f "$cbsOutputFile" ]]; then
mainFunc $outputPrefix"_cbs"
fi