diff --git a/src/training/language-specific.sh b/src/training/language-specific.sh index e81532839d..6e52dd9d6d 100755 --- a/src/training/language-specific.sh +++ b/src/training/language-specific.sh @@ -603,7 +603,7 @@ BURMESE_FONTS=( \ "Padauk" \ "TharLon" \ ) - + JAVANESE_FONTS=( \ "Prada" \ ) @@ -909,7 +909,8 @@ set_lang_specific_parameters() { # Language to mix with the language for maximum accuracy. Defaults to eng. # If no language is good, set to the base language. MIX_LANG="eng" - FONTS= + EXPOSURES=${EXPOSURES:-} + FONTS=${FONTS:-} case ${lang} in # Latin languages. @@ -1176,7 +1177,7 @@ set_lang_specific_parameters() { test -z "$FONTS" && FONTS=( "${LATIN_FONTS[@]}" ) # Default to 0 exposure if it hasn't been set - test -z "${EXPOSURES:-}" && EXPOSURES=0 + test -z "$EXPOSURES" && EXPOSURES=0 # Set right-to-left and normalization mode. case "${LANG_CODE}" in ara | div| fas | pus | snd | syr | uig | urd | kur_ara | heb | yid ) diff --git a/src/training/tesstrain_utils.sh b/src/training/tesstrain_utils.sh index 6ca07a11ce..e18f6ea8ce 100644 --- a/src/training/tesstrain_utils.sh +++ b/src/training/tesstrain_utils.sh @@ -47,11 +47,19 @@ TESSDATA_PREFIX=${TESSDATA_PREFIX:-} # Logging helper functions. tlog() { - echo -e $* 2>&1 1>&2 | tee -a ${LOG_FILE} + if test -z "${LOG_FILE:-}"; then + echo -e $* + else + echo -e $* | tee -a ${LOG_FILE} + fi } err_exit() { - echo -e "ERROR: "$* 2>&1 1>&2 | tee -a ${LOG_FILE} + if test -z "${LOG_FILE:-}"; then + echo -e "ERROR: "$* + else + echo -e "ERROR: "$* | tee -a ${LOG_FILE} + fi exit 1 } @@ -95,8 +103,8 @@ check_file_readable() { # if it looks like a flag. # Usage: parse_value VAR_NAME VALUE parse_value() { - local val="$2" - if [[ -z $val ]]; then + local val="${2:-}" + if [[ -z "$val" ]]; then err_exit "Missing value for variable $1" exit fi @@ -137,19 +145,19 @@ parse_flags() { parse_value "EXPOSURES" "$exp" i=$((j-1)) ;; --fonts_dir) - parse_value "FONTS_DIR" ${ARGV[$j]} + parse_value "FONTS_DIR" ${ARGV[$j]:-} i=$j ;; --lang) - parse_value "LANG_CODE" ${ARGV[$j]} + parse_value "LANG_CODE" ${ARGV[$j]:-} i=$j ;; --langdata_dir) - parse_value "LANGDATA_ROOT" ${ARGV[$j]} + parse_value "LANGDATA_ROOT" ${ARGV[$j]:-} i=$j ;; --maxpages) - parse_value "MAX_PAGES" ${ARGV[$j]} + parse_value "MAX_PAGES" ${ARGV[$j]:-} i=$j ;; --output_dir) - parse_value "OUTPUT_DIR" ${ARGV[$j]} + parse_value "OUTPUT_DIR" ${ARGV[$j]:-} i=$j ;; --overwrite) OVERWRITE=1 ;; @@ -162,18 +170,18 @@ parse_flags() { --noextract_font_properties) EXTRACT_FONT_PROPERTIES=0 ;; --tessdata_dir) - parse_value "TESSDATA_DIR" ${ARGV[$j]} + parse_value "TESSDATA_DIR" ${ARGV[$j]:-} i=$j ;; --training_text) - parse_value "TRAINING_TEXT" "${ARGV[$j]}" + parse_value "TRAINING_TEXT" "${ARGV[$j]:-}" i=$j ;; --wordlist) - parse_value "WORDLIST_FILE" ${ARGV[$j]} + parse_value "WORDLIST_FILE" ${ARGV[$j]:-} i=$j ;; --workspace_dir) rmdir "$FONT_CONFIG_CACHE" rmdir "$WORKSPACE_DIR" - parse_value "WORKSPACE_DIR" ${ARGV[$j]} + parse_value "WORKSPACE_DIR" ${ARGV[$j]:-} FONT_CONFIG_CACHE=$WORKSPACE_DIR/fc-cache mkdir -p $FONT_CONFIG_CACHE i=$j ;; @@ -182,13 +190,13 @@ parse_flags() { esac i=$((i+1)) done - if [[ -z ${LANG_CODE} ]]; then + if [[ -z ${LANG_CODE:-} ]]; then err_exit "Need to specify a language --lang" fi - if [[ -z ${LANGDATA_ROOT} ]]; then + if [[ -z ${LANGDATA_ROOT:-} ]]; then err_exit "Need to specify path to language files --langdata_dir" fi - if [[ -z ${TESSDATA_DIR} ]]; then + if [[ -z ${TESSDATA_DIR:-} ]]; then if [[ -z ${TESSDATA_PREFIX} ]]; then err_exit "Need to specify a --tessdata_dir or have a "\ "TESSDATA_PREFIX variable defined in your environment" @@ -267,13 +275,13 @@ generate_font_image() { # Phase I : Generate (I)mages from training text for each font. phase_I_generate_image() { - local par_factor=$1 + local par_factor=${1:-} if [[ -z ${par_factor} || ${par_factor} -le 0 ]]; then par_factor=1 fi tlog "\n=== Phase I: Generating training images ===" - if [[ -z ${TRAINING_TEXT} ]] || [[ ! -r ${TRAINING_TEXT} ]]; then - err_exit "Could not find training text file ${TRAINING_TEXT}" + if [[ -z ${TRAINING_TEXT:-} ]] || test ! -r "${TRAINING_TEXT}"; then + err_exit "Could not find training text file ${TRAINING_TEXT:-}" fi CHAR_SPACING="0.0" @@ -545,7 +553,7 @@ make__lstmdata() { --puncs "${lang_prefix}.punc" \ --output_dir "${OUTPUT_DIR}" --lang "${LANG_CODE}" \ "${pass_through}" "${lang_is_rtl}" - + if ((SAVE_BOX_TIFF)); then tlog "\n=== Saving box/tiff pairs for training data ===" for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do