Skip to content

Commit

Permalink
Allow saving of box/tiff pairs during base tesseract training
Browse files Browse the repository at this point in the history
  • Loading branch information
Shreeshrii committed Jun 14, 2019
1 parent 17c8ac2 commit 832c6ed
Showing 1 changed file with 24 additions and 12 deletions.
36 changes: 24 additions & 12 deletions src/training/tesstrain_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ generate_font_image() {
common_args+=" --char_spacing=${CHAR_SPACING} --exposure=${EXPOSURE}"
common_args+=" --outputbase=${outbase} --max_pages=${MAX_PAGES}"
if $DISTORT_IMAGE; then
common_args+=" --distort_image "
common_args+=" --distort_image --invert=false"
fi
# add --writing_mode=vertical-upright to common_args if the font is
Expand Down Expand Up @@ -326,6 +326,17 @@ phase_I_generate_image() {
check_file_readable ${outbase}.box ${outbase}.tif
done
done
if $SAVE_BOX_TIFF && ( ! $LINEDATA ) ; then
tlog "\n=== Saving box/tiff pairs for training data ==="
for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
tlog "Moving ${f} to ${OUTPUT_DIR}"
cp "${f}" "${OUTPUT_DIR}"
done
for f in "${TRAINING_DIR}/${LANG_CODE}".*.tif; do
tlog "Moving ${f} to ${OUTPUT_DIR}"
cp "${f}" "${OUTPUT_DIR}"
done
fi
}
# Phase UP : Generate (U)nicharset and (P)roperties file.
Expand Down Expand Up @@ -386,7 +397,7 @@ phase_D_generate_dawg() {
# Punctuation DAWG
# -r arguments to wordlist2dawg denote RTL reverse policy
# (see Trie::RTLReversePolicy enum in tesseract/src/dict/trie.h).
# (see Trie::RTLReversePolicy enum in third_party/tesseract/dict/trie.h).
# We specify 0/RRP_DO_NO_REVERSE when generating number DAWG,
# 1/RRP_REVERSE_IF_HAS_RTL for freq and word DAWGS,
# 2/RRP_FORCE_REVERSE for the punctuation DAWG.
Expand Down Expand Up @@ -562,17 +573,18 @@ make__lstmdata() {
--output_dir "${OUTPUT_DIR}" --lang "${LANG_CODE}" \
"${pass_through}" "${lang_is_rtl}"
if $SAVE_BOX_TIFF; then
if $SAVE_BOX_TIFF ; then
tlog "\n=== Saving box/tiff pairs for training data ==="
for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
tlog "Moving ${f} to ${OUTPUT_DIR}"
mv "${f}" "${OUTPUT_DIR}"
done
for f in "${TRAINING_DIR}/${LANG_CODE}".*.tif; do
tlog "Moving ${f} to ${OUTPUT_DIR}"
mv "${f}" "${OUTPUT_DIR}"
done
fi
for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
tlog "Moving ${f} to ${OUTPUT_DIR}"
mv "${f}" "${OUTPUT_DIR}"
done
for f in "${TRAINING_DIR}/${LANG_CODE}".*.tif; do
tlog "Moving ${f} to ${OUTPUT_DIR}"
mv "${f}" "${OUTPUT_DIR}"
done
fi
tlog "\n=== Moving lstmf files for training data ==="
for f in "${TRAINING_DIR}/${LANG_CODE}".*.lstmf; do
tlog "Moving ${f} to ${OUTPUT_DIR}"
Expand Down

0 comments on commit 832c6ed

Please sign in to comment.