-
Notifications
You must be signed in to change notification settings - Fork 8
/
overview.sh
executable file
·43 lines (32 loc) · 2.17 KB
/
overview.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/bin/bash
# Generate a table with an overview (number of lines, number of characters, recognition error)
# over the books used in the training process.
#
# After running this script you can use
# gnuplot overview.gnuplot
# to genernate a few nice diagrams showing the number of ground-truth data and the character accuracy
# for each of the books used in the model training process.
ocropus-rpred -h > /dev/null 2>/dev/null
if [ $? != 0 ]; then
echo "OCRopus not installed correctly. 'ocropus-rpred' could not be executed."
exit 1
fi
echo "Recognizing training data..."
ocropus-rpred -q -n -m `pwd`/fraktur.pyrnn.gz -Q6 "training/*.bin.png"
ocropus-rpred -q -n -m `pwd`/fraktur.pyrnn.gz -Q6 "training/*.nrm.png"
echo "Recognizing testing data..."
ocropus-rpred -q -n -m `pwd`/fraktur.pyrnn.gz -Q6 "testing/*.bin.png"
ocropus-rpred -q -n -m `pwd`/fraktur.pyrnn.gz -Q6 "testing/*.nrm.png"
ls training/*.gt.txt | cut -c 10- | sed 's/_.*//' | uniq | while read book; do echo -ne "$book\t"; LANG= wc -m -l training/${book}_*.gt.txt |grep total | sed 's/[^ 0-9].*//' ; done > r1.txt
ls training/*.gt.txt | cut -c 10- | sed 's/_.*//' | uniq | while read book; do echo -ne "$book\t"; LANG= wc -m -l testing/${book}_*.gt.txt |grep total | sed 's/[^ 0-9].*//' ; done > r2.txt
ls training/*.gt.txt | cut -c 10- | sed 's/_.*//' | uniq | while read book; do echo -ne "$book\t"; ocropus-errs -e training/${book}_*.gt.txt |grep ^0; done > r3.txt
ls training/*.gt.txt | cut -c 10- | sed 's/_.*//' | uniq | while read book; do echo -ne "$book\t"; ocropus-errs -e testing/${book}_*.gt.txt |grep ^0; done> r4.txt
NUMBER_OF_BOOKS=`wc -l r1.txt | sed 's/ .*//'`
ERR_TRAIN=`ocropus-errs -e training/*.gt.txt |grep ^0`
ERR_TEST=`ocropus-errs -e testing/*.gt.txt |grep ^0`
echo -e "# book\ttraining lines\ttraining characters\ttraining error\ttest lines\ttest characters\ttest error" > overview.csv
join r1.txt r3.txt | join - r2.txt | join - r4.txt | tr ' ' \\t |sed 's/\t\t/\t/' >> overview.csv
echo -e "0\t${ERR_TRAIN}\t${ERR_TEST}" > overview-error.txt
echo -e "${NUMBER_OF_BOOKS}\t${ERR_TRAIN}\t${ERR_TEST}" >> overview-error.txt
echo "Result can be found in 'overview.csv'."
rm -f r1.txt r2.txt r3.txt r4.txt