diff --git a/ccmain/control.cpp b/ccmain/control.cpp index d40c26329b..66a2a8bb3e 100644 --- a/ccmain/control.cpp +++ b/ccmain/control.cpp @@ -1556,7 +1556,7 @@ void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word, word->fix_quotes(); if (tessedit_fix_hyphens) word->fix_hyphens(); - /* Dont trust fix_quotes! - though I think I've fixed the bug */ + /* Don't trust fix_quotes! - though I think I've fixed the bug */ if (word->best_choice->length() != word->box_word->length()) { tprintf("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;" " #Blobs=%d\n", @@ -1694,7 +1694,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string( goto not_a_word; /* Allow a single hyphen in a lower case word - - dont trust upper case - I've seen several cases of "H" -> "I-I" + - don't trust upper case - I've seen several cases of "H" -> "I-I" */ if (lengths[i] == 1 && s[offset] == '-') { hyphen_pos = i; diff --git a/ccmain/docqual.cpp b/ccmain/docqual.cpp index 6a7e6e67ef..327d7cbc55 100644 --- a/ccmain/docqual.cpp +++ b/ccmain/docqual.cpp @@ -129,7 +129,7 @@ inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) { int expected_outline_count; if (STRING (outlines_odd).contains (c)) - return 0; //Dont use this char + return 0; //Don't use this char else if (STRING (outlines_2).contains (c)) expected_outline_count = 2; else @@ -157,7 +157,7 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it, * - Word segmentation is the same as the original image * - All characters have the expected number of outlines * NOTE - the rejection counts are recalculated after unrejection - * - CANT do it in a single pass without a bit of fiddling + * - CAN'T do it in a single pass without a bit of fiddling * - keep it simple but inefficient *************************************************************************/ void Tesseract::unrej_good_quality_words( //unreject potential @@ -403,7 +403,7 @@ void Tesseract::doc_and_block_rejection( //reject big chunks /************************************************************************* * reject_whole_page() - * Dont believe any of it - set the reject map to 00..00 in all words + * Don't believe any of it - set the reject map to 00..00 in all words * *************************************************************************/ diff --git a/ccmain/fixspace.cpp b/ccmain/fixspace.cpp index 0a561ac9a0..e42617c053 100644 --- a/ccmain/fixspace.cpp +++ b/ccmain/fixspace.cpp @@ -55,7 +55,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, WERD_RES *word_res; WERD_RES_LIST fuzzy_space_words; inT16 new_length; - BOOL8 prevent_null_wd_fixsp; // DONT process blobless wds + BOOL8 prevent_null_wd_fixsp; // DON'T process blobless wds inT32 word_index; // current word block_res_it.set_to_list(&page_res->block_res_list); @@ -222,7 +222,7 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row, * fuzzy spaces. The problem with the basic measure is that "561 63" would score * the same as "56163", though given our knowledge that the space is fuzzy, and * that there is a "1" next to the fuzzy space, we need to ensure that "56163" - * is prefered. + * is preferred. * * The solution is to NOT COUNT the score of any word which has a digit at one * end and a "1Il" as the character the other side of the space. @@ -272,8 +272,8 @@ inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) { } else { /* Can we add the prev word score and potentially count this word? - Yes IF it didnt end in a 1 when the first char of this word is a digit - AND it didnt end in a digit when the first char of this word is a 1 + Yes IF it didn't end in a 1 when the first char of this word is a digit + AND it didn't end in a digit when the first char of this word is a 1 */ word_len = word->reject_map.length(); current_word_ok_so_far = FALSE; @@ -507,7 +507,7 @@ BOOL8 Tesseract::fixspace_thinks_word_done(WERD_RES *word) { /* Use all the standard pass 2 conditions for mode 5 in set_done() in - reject.c BUT DONT REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DONT + reject.c BUT DON'T REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DON'T CARE WHETHER WE HAVE of/at on/an etc. */ if (fixsp_done_mode > 0 && diff --git a/ccmain/output.cpp b/ccmain/output.cpp index 42623b9ec8..ddfcfc54b6 100644 --- a/ccmain/output.cpp +++ b/ccmain/output.cpp @@ -297,7 +297,7 @@ UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) { // what char is repeated? /************************************************************************* * SUSPECT LEVELS * - * 0 - dont reject ANYTHING + * 0 - don't reject ANYTHING * 1,2 - partial rejection * 3 - BEST * @@ -337,7 +337,7 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) { rating_per_ch = word.rating() / word_res->reject_map.length(); if (rating_per_ch >= suspect_rating_per_ch) - return; //Dont touch bad ratings + return; //Don't touch bad ratings if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) { /* Unreject any Tess Acceptable word - but NOT tess reject chs*/ diff --git a/ccmain/paramsd.cpp b/ccmain/paramsd.cpp index b141bede62..7784f85361 100644 --- a/ccmain/paramsd.cpp +++ b/ccmain/paramsd.cpp @@ -329,13 +329,13 @@ void ParamsEditor::WriteParams(char *filename, fclose(fp); sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename); int a = sv_window_->ShowYesNoDialog(msg_str); - if (a == 'n') { return; } // dont write + if (a == 'n') { return; } // don't write } fp = fopen (filename, "wb"); // can we write to it? if (fp == NULL) { - sv_window_->AddMessage("Cant write to file " "%s" "", filename); + sv_window_->AddMessage("Can't write to file " "%s" "", filename); return; } diff --git a/ccmain/reject.cpp b/ccmain/reject.cpp index 607b84179c..aacc80dd6e 100644 --- a/ccmain/reject.cpp +++ b/ccmain/reject.cpp @@ -521,7 +521,7 @@ BOOL8 Tesseract::word_contains_non_1_digit(const char *word, /************************************************************************* * dont_allow_1Il() - * Dont unreject LONE accepted 1Il conflict set chars + * Don't unreject LONE accepted 1Il conflict set chars *************************************************************************/ void Tesseract::dont_allow_1Il(WERD_RES *word) { int i = 0; @@ -633,7 +633,7 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) { next_left = 9999; else next_left = word_res->rebuild_word->blobs[i + 1]->bounding_box().left(); - // Dont touch small or touching blobs - it is too dangerous. + // Don't touch small or touching blobs - it is too dangerous. if ((out_box.width() > 8 * word_res->denorm.x_scale()) && (out_box.left() > prev_right) && (out_box.right() < next_left)) { aspect_ratio = out_box.width() / (float) out_box.height(); diff --git a/ccmain/tesseractclass.cpp b/ccmain/tesseractclass.cpp index 0c52f0efd9..e348c93f98 100644 --- a/ccmain/tesseractclass.cpp +++ b/ccmain/tesseractclass.cpp @@ -136,7 +136,7 @@ Tesseract::Tesseract() BOOL_MEMBER(tessedit_fix_fuzzy_spaces, true, "Try to improve fuzzy spaces", this->params()), BOOL_MEMBER(tessedit_unrej_any_wd, false, - "Dont bother with word plausibility", this->params()), + "Don't bother with word plausibility", this->params()), BOOL_MEMBER(tessedit_fix_hyphens, true, "Crunch double hyphens?", this->params()), BOOL_MEMBER(tessedit_redo_xheight, true, "Check/Correct x-height", @@ -310,19 +310,19 @@ Tesseract::Tesseract() this->params()), INT_MEMBER(crunch_pot_indicators, 1, "How many potential indicators needed", this->params()), - BOOL_MEMBER(crunch_leave_ok_strings, true, "Dont touch sensible strings", + BOOL_MEMBER(crunch_leave_ok_strings, true, "Don't touch sensible strings", this->params()), BOOL_MEMBER(crunch_accept_ok, true, "Use acceptability in okstring", this->params()), BOOL_MEMBER(crunch_leave_accept_strings, false, - "Dont pot crunch sensible strings", this->params()), + "Don't pot crunch sensible strings", this->params()), BOOL_MEMBER(crunch_include_numerals, false, "Fiddle alpha figures", this->params()), INT_MEMBER(crunch_leave_lc_strings, 4, - "Dont crunch words with long lower case strings", + "Don't crunch words with long lower case strings", this->params()), INT_MEMBER(crunch_leave_uc_strings, 4, - "Dont crunch words with long lower case strings", + "Don't crunch words with long lower case strings", this->params()), INT_MEMBER(crunch_long_repetitions, 3, "Crunch words with long repetitions", this->params()), @@ -393,21 +393,21 @@ Tesseract::Tesseract() INT_MEMBER(suspect_space_level, 100, "Min suspect level for rejecting spaces", this->params()), INT_MEMBER(suspect_short_words, 2, - "Dont Suspect dict wds longer than this", this->params()), + "Don't suspect dict wds longer than this", this->params()), BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected", this->params()), - double_MEMBER(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit", + double_MEMBER(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit", this->params()), double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit", this->params()), BOOL_MEMBER(tessedit_minimal_rejection, false, "Only reject tess failures", this->params()), - BOOL_MEMBER(tessedit_zero_rejection, false, "Dont reject ANYTHING", + BOOL_MEMBER(tessedit_zero_rejection, false, "Don't reject ANYTHING", this->params()), BOOL_MEMBER(tessedit_word_for_word, false, "Make output have exactly one word per WERD", this->params()), BOOL_MEMBER(tessedit_zero_kelvin_rejection, false, - "Dont reject ANYTHING AT ALL", this->params()), + "Don't reject ANYTHING AT ALL", this->params()), BOOL_MEMBER(tessedit_consistent_reps, true, "Force all rep chars the same", this->params()), INT_MEMBER(tessedit_reject_mode, 0, "Rejection algorithm", @@ -424,7 +424,7 @@ Tesseract::Tesseract() "Use DOC dawg in 11l conf. detector", this->params()), BOOL_MEMBER(rej_1Il_use_dict_word, false, "Use dictword test", this->params()), - BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Dont double check", + BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Don't double check", this->params()), BOOL_MEMBER(rej_use_tess_accepted, true, "Individual rejection control", this->params()), diff --git a/ccmain/tesseractclass.h b/ccmain/tesseractclass.h index 50141bf942..6666dec36b 100644 --- a/ccmain/tesseractclass.h +++ b/ccmain/tesseractclass.h @@ -733,7 +733,7 @@ class Tesseract : public Wordrec { GenericVector* class_ids); // Resegments the word to achieve the target_text from the classifier. // Returns false if the re-segmentation fails. - // Uses brute-force combination of upto kMaxGroupSize adjacent blobs, and + // Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and // applies a full search on the classifier results to find the best classified // segmentation. As a compromise to obtain better recall, 1-1 ambigiguity // substitutions ARE used. @@ -833,7 +833,7 @@ class Tesseract : public Wordrec { BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true, "Try to improve fuzzy spaces"); BOOL_VAR_H(tessedit_unrej_any_wd, false, - "Dont bother with word plausibility"); + "Don't bother with word plausibility"); BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?"); BOOL_VAR_H(tessedit_redo_xheight, true, "Check/Correct x-height"); BOOL_VAR_H(tessedit_enable_doc_dict, true, @@ -954,15 +954,15 @@ class Tesseract : public Wordrec { double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this"); INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch"); INT_VAR_H(crunch_pot_indicators, 1, "How many potential indicators needed"); - BOOL_VAR_H(crunch_leave_ok_strings, true, "Dont touch sensible strings"); + BOOL_VAR_H(crunch_leave_ok_strings, true, "Don't touch sensible strings"); BOOL_VAR_H(crunch_accept_ok, true, "Use acceptability in okstring"); BOOL_VAR_H(crunch_leave_accept_strings, false, - "Dont pot crunch sensible strings"); + "Don't pot crunch sensible strings"); BOOL_VAR_H(crunch_include_numerals, false, "Fiddle alpha figures"); INT_VAR_H(crunch_leave_lc_strings, 4, - "Dont crunch words with long lower case strings"); + "Don't crunch words with long lower case strings"); INT_VAR_H(crunch_leave_uc_strings, 4, - "Dont crunch words with long lower case strings"); + "Don't crunch words with long lower case strings"); INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions"); INT_VAR_H(crunch_debug, 0, "As it says"); INT_VAR_H(fixsp_non_noise_limit, 1, @@ -1010,16 +1010,16 @@ class Tesseract : public Wordrec { INT_VAR_H(suspect_space_level, 100, "Min suspect level for rejecting spaces"); INT_VAR_H(suspect_short_words, 2, - "Dont Suspect dict wds longer than this"); + "Don't Suspect dict wds longer than this"); BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected"); - double_VAR_H(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit"); + double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit"); double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit"); BOOL_VAR_H(tessedit_minimal_rejection, false, "Only reject tess failures"); - BOOL_VAR_H(tessedit_zero_rejection, false, "Dont reject ANYTHING"); + BOOL_VAR_H(tessedit_zero_rejection, false, "Don't reject ANYTHING"); BOOL_VAR_H(tessedit_word_for_word, false, "Make output have exactly one word per WERD"); BOOL_VAR_H(tessedit_zero_kelvin_rejection, false, - "Dont reject ANYTHING AT ALL"); + "Don't reject ANYTHING AT ALL"); BOOL_VAR_H(tessedit_consistent_reps, true, "Force all rep chars the same"); INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm"); BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug"); @@ -1030,7 +1030,7 @@ class Tesseract : public Wordrec { "Aspect ratio dot/hyphen test"); BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector"); BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test"); - BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Dont double check"); + BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Don't double check"); BOOL_VAR_H(rej_use_tess_accepted, true, "Individual rejection control"); BOOL_VAR_H(rej_use_tess_blanks, true, "Individual rejection control"); BOOL_VAR_H(rej_use_good_perm, true, "Individual rejection control");