-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess_input.sh
executable file
·132 lines (122 loc) · 4.47 KB
/
preprocess_input.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/bin/sh
: ' This is bash program to filter the data
and remove any char that is not a number
but keeping the negative numbers intact
'
# script vars
#var to store the uptaded working path
path=$(pwd)
#vars that stores the search patterns to be applied
search_pattern_float="-[0-9]|-[0-9][0-9]|-[0-9][0-9][0-9]" #filters any numbers with 1, 2 or 3 digits
search_pattern_int="-[0-9]$|-[0-9][0-9]$|-[0-9][0-9][0-9]$" #forces filtering only numbers with 1, 2 or 3 digits #NOTE this does NOT work with floats
search_pattern=$search_pattern_float #selects the desired pattern #TODO make this configurable through ARGS
# Help function, displays the usage of the script
my_help()
{
echo #new line
echo "Usage:"
echo $0 "input_file_name [options]"
echo #new line
echo "Options:"
echo "-t --test Enables the test mode (only displays the result on stdout)"
echo "-o --output Specifies the output file where results will be saved (WARNING: it will overwrite the file specified by this param)"
echo "-c --column Specifies which column to get data from (default is 0, which prints all columns)"
echo "-s --separator Specifies which separator to use when reading data (defaults to SPACE)"
echo #new line
echo "Examples:"
echo $0 "dataset-files/input.csv -t"
echo $0 "dataset-files/input.csv -t -c 2"
echo $0 "dataset-files/input.csv -t -s '\n'"
echo $0 "dataset-files/input.csv -t -c 2 -s '\n'"
echo $0 "dataset-files/input.csv -o dataset-files/output.csv"
echo $0 "dataset-files/input.csv -o dataset-files/output.csv -c 1"
echo $0 "dataset-files/input.csv -o dataset-files/output.csv -s '\t'"
echo $0 "dataset-files/input.csv -o dataset-files/output.csv -c 1 -s '\t'"
exit
}
# prints an error message to stderr
err()
{
echo "E: $*" >>/dev/stderr
}
print_Sucessful_Save()
{
echo "The preprocessed results were saved at" $path"/"$1
}
print_Error_Args()
{
err "Error processing options... Please check them and try again"
my_help
}
option_Column_and_Separator_with_Save()
{
if [ "$4" == "-c" ] || [ "$4" == "--column" ] && [ $# -eq 7 ]; then #checks if the parameters are correct
if [ "$6" == "-s" ] || [ "$6" == "--separator" ]; then #continues checking if the parameters are correct
awk -F $7 '/'$search_pattern'/{print $'$5'}' $1 > $3
print_Sucessful_Save $3
else
print_Error_Args
fi
elif [ "$4" == "-c" ] || [ "$4" == "--column" ] && [ $# -eq 5 ]; then #checks if the parameters are correct
awk '/'$search_pattern'/{print $'$5'}' $1 > $3
print_Sucessful_Save $3
elif [ "$4" == "-s" ] || [ "$4" == "--separator" ] && [ $# -eq 5 ]; then #checks if the parameters are correct
awk -F $5 '/'$search_pattern'/{print}' $1 > $3
print_Sucessful_Save $3
else
print_Error_Args
fi
}
option_Column_and_Separator()
{
if [ "$3" == "-c" ] || [ "$3" == "--column" ] && [ $# -eq 6 ]; then #checks if the parameters are correct
if [ "$5" == "-s" ] || [ "$5" == "--separator" ]; then #continues checking if the parameters are correct
awk -F $6 '/'$search_pattern'/{print $'$4'}' $1
else
print_Error_Args
fi
elif [ "$3" == "-c" ] || [ "$3" == "--column" ] && [ $# -eq 4 ]; then #checks if the parameters are correct
awk '/'$search_pattern'/{print $'$4'}' $1
elif [ "$3" == "-s" ] || [ "$3" == "--separator" ] && [ $# -eq 4 ]; then #checks if the parameters are correct
awk -F $4 '/'$search_pattern'/{print}' $1
else
print_Error_Args
fi
}
# Main functionality
main()
{
#checks if the number of ARGS are ok
if [ $# -lt 2 ]; then
err "Not enough ARGS!"
elif [ $# -gt 7 ]; then
err "Too many ARGS!"
elif [ ! -s $1 ]; then
err "File $1 not found!"
# my_help
exit 1
fi
#tests which execution flow to execute
case "$2" in
-o|--output)
if [ $# -eq 3 ]; then
awk '/'$search_pattern'/{print}' $1 > $3 #filters negative numbers with 1, 2 or 3 digits
print_Sucessful_Save $3
else
option_Column_and_Separator_with_Save $@ #$@ sends all the ARGS from main to the function scope
fi
;;
-t|--test)
echo "WARNING: Test mode is enabled"
if [ $# -eq 2 ]; then
awk '/'$search_pattern'/{print}' $1 #filters negative numbers with 1, 2 or 3 digits
else
option_Column_and_Separator $@ #$@ sends all the ARGS from main to the function scope
fi
echo "CAUTION: This is only a preview, the results were NOT saved yet"
;;
*)
print_Error_Args
esac
}
main $@ #calls the program's main function