-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrunSegmentation
executable file
·158 lines (130 loc) · 4.1 KB
/
runSegmentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/bin/bash
#*********************************************************************#
# AUTHOR: SRIHARSHA VOGETI #
# EMAIL: [email protected] #
# SOURCE: #
# DESCRIPTION: segmentation script in pCNVD pipeline #
# LAST UPDATED: 13/07/16 #
#*********************************************************************#
shopt -s extglob
#set -e
start_time=`date +%s`
#PARAMETERS WITH DEFAULT VALUES
function printStderr {
echo "$@" >&2;
}
# GET CHROMOSOME FUNCTION
function retChrom {
windowFile=$1
cName=$(head -1 $windowFile | awk '{print $1}')
echo $cName
}
#***********************************#
# CODE FOR DNA COPY #
#***********************************#
function runDNACopy {
# Run R script and generate segments
Rscript $SOURCEDIR"/source/R/RunDNACopy.R" $output"_pCNVD.input" $numWindows $noOfProcesses > $output".Dtemp"
rm -rf $output"_cbs_pCNVD.output"
# convert output into .input value
awk -v outputFile=$output"_cbs_pCNVD.output" ' NR>5 {
for(i=1; i<=$6; ++i){
print $7 >> outputFile
}
}' $output".Dtemp"
rm -rf $output".Dtemp"
}
#*********************************#
# CODE FOR RUNNING TV ALGORITHM #
# DIVIDE INPUT FILE INTO CHUNKS #
# RUN SEGMENTATION FOR EACH CHUNK #
# MERGE BACK CHUNKS #
#*********************************#
function runTVAlgorithm {
# calculate number of entries in each chunk
rm -rf $output"_inp"*
y=$(wc -l $output"_pCNVD.input" | awk '{print $1}')
N=$y
y=`expr $y / $noOfProcesses`;
split -l $y $output"_pCNVD.input" $output"_inp";
p=`expr $noOfProcesses - 1`
q=`expr $p + 1`
k=0
FILE=$output"_inp"$q;
# change splits name
for i in $( ls $output"_inp"* )
do
mv -f $i $output"_inp"$k
((k++))
done
# check for the last remaining input values
if [ -f $FILE ];
then
cat $FILE >> $output"_inp"$p
rm $output"_inp"$q
fi
# comes into play when creating overlapping input files, otherwise has no affect. But needded
# stop1=`expr $noOfProcesses - 1`
# for((i=0; i<stop1; i++ )); do
# j=`expr $i + 1`
#tail -20 $output"_inp"$i > $output$tempString
# cat $output"_inp"$j > $output$tempString
# mv $output$tempString $output"_inp"$j
# done
# run segmentation algorith using segment
mpirun -np $noOfProcesses $SOURCEDIR/segment $output;
rm -rf $output"_inp"*;
rm -rf $output"_tvm_pCNVD.output"
# merge segmented chunk files
for ((n=0;n<$noOfProcesses;n++))
do
cat $output"_out"$n >> $output"_tvm_pCNVD.output"
rm -rf $output"_out"$n
done
}
#***********************************************#
# MAIN FUNCTION STARTS HERE #
# PRECPROCESS-SEGMENTATION-POSTPROCESS #
#***********************************************#
##############################################################################################
# varaible declarations
outputPrefix="";
noOfProcesses=32;
tvmFlag=0;
dnaCopyFlag=0;
SOURCEDIR=$(dirname $(readlink -f $0));
SHORTOPTS="p:o:td"
LONGOPTS="help"
PROGNAME="runSegmentation"
ARGS=$(getopt -s bash --options $SHORTOPTS --longoptions $LONGOPTS --name $PROGNAME -- "$@" )
eval set -- "$ARGS"
while true; do
case "$1" in
-o) outputPrefix="$2"; shift 2; ;;
-p) noOfProcesses="$2"; shift 2; ;;
-t) tvmFlag=1; shift; ;;
-d) dnaCopyFlag=1; shift ; ;;
--help) echo "Usage: runSegmentation -o <output prefix> <one or both segmentation flags (-t, -d)>"; exit 1; shift; ;;
--) shift; break ;;
\?) echo "Invalid option: -$1" >& 2 exit ;;
esac
done
output=$outputPrefix
inputFile=$output"_pCNVD.input"
if [[ ! -f "$inputFile" ]]; then
echo "ERROR: input file" $inputFile "not found. Check if such a file exists and correct prefix is passed to -o" >&2
exit
fi
if [[ tvmFlag -eq 0 && dnaCopyFlag -eq 0 ]]; then
echo "Requires flag anyone of the flags -t and -d"
exit
fi
numWindows=$(wc -l $output"_pCNVD.input" | awk '{print $1}')
### CALL SEGMENTATION ALGORITHMS #######
if [[ tvmFlag -eq 1 ]]; then
runTVAlgorithm ;
fi
if [[ dnaCopyFlag -eq 1 ]]; then
runDNACopy ;
fi
#################################################################################################