diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index c08b477aed..1bc8345dca 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -498,7 +498,7 @@ class TESS_API TessBaseAPI { * metadata used by side-effect processes, such as reading a box * file or formatting as hOCR. * - * See ProcessPages for desciptions of other parameters. + * See ProcessPages for descriptions of other parameters. */ bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, diff --git a/java/com/google/scrollview/ScrollView.java b/java/com/google/scrollview/ScrollView.java index c3494af32f..e98af3dd58 100644 --- a/java/com/google/scrollview/ScrollView.java +++ b/java/com/google/scrollview/ScrollView.java @@ -85,7 +85,7 @@ public static String receiveMessage() throws IOException { } /** - * The main program loop. Basically loops trough receiving messages and + * The main program loop. Basically loops through receiving messages and * processing them and then sending messages (if there are any). */ private static void IOLoop() { diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index b1a4db3716..667e588c22 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -235,7 +235,7 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT continue; } } - // Sync pr_it with the wth WordData. + // Sync pr_it with the WordData. while (pr_it->word() != nullptr && pr_it->word() != word->word) { pr_it->forward(); } diff --git a/src/ccmain/fixspace.cpp b/src/ccmain/fixspace.cpp index f5687d2495..71fbade3b6 100644 --- a/src/ccmain/fixspace.cpp +++ b/src/ccmain/fixspace.cpp @@ -25,7 +25,7 @@ #include "errcode.h" // for ASSERT_HOST #include "normalis.h" // for kBlnXHeight, kBlnBaselineOffset #include "pageres.h" // for WERD_RES_IT, WERD_RES, WERD_RES_LIST -#include "params.h" // for IntParam, StringParam, BoolParam, Doub... +#include "params.h" // for IntParam, StringParam, BoolParam, DoubleParam, ... #include "ratngs.h" // for WERD_CHOICE, FREQ_DAWG_PERM, NUMBER_PERM #include "rect.h" // for TBOX #include "stepblob.h" // for C_BLOB_IT, C_BLOB_LIST, C_BLOB diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index 9507993839..cb34422ae9 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -256,7 +256,7 @@ Tesseract::Tesseract() , INT_MEMBER(fixsp_non_noise_limit, 1, "How many non-noise blbs either side?", this->params()) , double_MEMBER(fixsp_small_outlines_size, 0.28, "Small if lt xht x this", this->params()) , BOOL_MEMBER(tessedit_prefer_joined_punct, false, "Reward punctuation joins", this->params()) - , INT_MEMBER(fixsp_done_mode, 1, "What constitues done for spacing", this->params()) + , INT_MEMBER(fixsp_done_mode, 1, "What constitutes done for spacing", this->params()) , INT_MEMBER(debug_fix_space_level, 0, "Contextual fixspace debug", this->params()) , STRING_MEMBER(numeric_punctuation, ".,", "Punct. chs expected WITHIN numbers", this->params()) , INT_MEMBER(x_ht_acceptance_tolerance, 8, diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 1f676906e8..b2a41c4f50 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -902,7 +902,7 @@ class TESS_API Tesseract : public Wordrec { INT_VAR_H(fixsp_non_noise_limit, 1, "How many non-noise blbs either side?"); double_VAR_H(fixsp_small_outlines_size, 0.28, "Small if lt xht x this"); BOOL_VAR_H(tessedit_prefer_joined_punct, false, "Reward punctuation joins"); - INT_VAR_H(fixsp_done_mode, 1, "What constitues done for spacing"); + INT_VAR_H(fixsp_done_mode, 1, "What constitutes done for spacing"); INT_VAR_H(debug_fix_space_level, 0, "Contextual fixspace debug"); STRING_VAR_H(numeric_punctuation, ".,", "Punct. chs expected WITHIN numbers"); INT_VAR_H(x_ht_acceptance_tolerance, 8, "Max allowed deviation of blob top outside of font data"); diff --git a/src/ccstruct/blobbox.h b/src/ccstruct/blobbox.h index 60feea0980..f2b935e2e1 100644 --- a/src/ccstruct/blobbox.h +++ b/src/ccstruct/blobbox.h @@ -93,7 +93,7 @@ enum BlobSpecialTextType { BSTT_NONE, // No special. BSTT_ITALIC, // Italic style. BSTT_DIGIT, // Digit symbols. - BSTT_MATH, // Mathmatical symobls (not including digit). + BSTT_MATH, // Mathematical symbols (not including digit). BSTT_UNCLEAR, // Characters with low recognition rate. BSTT_SKIP, // Characters that we skip labeling (usually too small). BSTT_COUNT diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 0ce033f82e..ce39330700 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -217,7 +217,7 @@ class TESS_API WERD_RES : public ELIST_LINK { std::vector>> timesteps; // Stores the lstm choices of every timestep segmented by character std::vector>>> segmented_timesteps; - // Symbolchoices aquired during CTC + // Symbolchoices acquired during CTC std::vector>> CTC_symbol_choices; // Stores if the timestep vector starts with a space bool leading_space = false; diff --git a/src/ccstruct/polyaprx.cpp b/src/ccstruct/polyaprx.cpp index 65cc66bbf4..2d7405d3af 100644 --- a/src/ccstruct/polyaprx.cpp +++ b/src/ccstruct/polyaprx.cpp @@ -296,7 +296,7 @@ void fix2( // polygonal approx /*single fixed step */ if (edgept->flags[FLAGS] & FIXED && edgept->flags[RUNLENGTH] == 1 - /*and neighours free */ + /*and neighbours free */ && edgept->next->flags[FLAGS] & FIXED && (edgept->prev->flags[FLAGS] & FIXED) == 0 /*same pair of dirs */ diff --git a/src/ccutil/ambigs.cpp b/src/ccutil/ambigs.cpp index a4dcd242a1..a5c50f152f 100644 --- a/src/ccutil/ambigs.cpp +++ b/src/ccutil/ambigs.cpp @@ -333,7 +333,7 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve return false; } if (version > 0) { - // The next field being true indicates that the abiguity should + // The next field being true indicates that the ambiguity should // always be substituted (e.g. '' should always be changed to "). // For such "certain" n -> m ambigs tesseract will insert character // fragments for the n pieces in the unicharset. AmbigsFound() diff --git a/src/classify/adaptmatch.cpp b/src/classify/adaptmatch.cpp index e9490f01eb..e803a29355 100644 --- a/src/classify/adaptmatch.cpp +++ b/src/classify/adaptmatch.cpp @@ -190,7 +190,7 @@ void SetAdaptiveThreshold(float Threshold); * @param Blob blob to be classified * @param[out] Choices List of choices found by adaptive matcher. * filled on return with the choices found by the - * class pruner and the ratings therefrom. Also + * class pruner and the ratings there from. Also * contains the detailed results of the integer matcher. * */ diff --git a/src/dict/dict.h b/src/dict/dict.h index 42b672bd72..ff221b0c92 100644 --- a/src/dict/dict.h +++ b/src/dict/dict.h @@ -322,7 +322,7 @@ class TESS_API Dict { * initialized to NO_EDGE. Since the punctuation dawg includes the empty * pattern " " (meaning anything without surrounding punctuation), having a * single entry for the punctuation dawg will cover all dawgs reachable - * therefrom -- that includes all number and word dawgs. The only dawg + * there from -- that includes all number and word dawgs. The only dawg * non-reachable from the punctuation_dawg is the pattern dawg. * If hyphen state needs to be applied, initial dawg_args->active_dawgs can * be copied from the saved hyphen state (maintained by Dict). diff --git a/src/lstm/recodebeam.h b/src/lstm/recodebeam.h index c8d99e4c82..45df426157 100644 --- a/src/lstm/recodebeam.h +++ b/src/lstm/recodebeam.h @@ -335,7 +335,7 @@ class TESS_API RecodeBeamSearch { int xCoord); // Calculates more accurate character boundaries which can be used to - // provide more acurate alternative symbol choices. + // provide more accurate alternative symbol choices. static void calculateCharBoundaries(std::vector *starts, std::vector *ends, std::vector *character_boundaries_, int maxWidth); diff --git a/src/textord/tablerecog.cpp b/src/textord/tablerecog.cpp index 0a7c8bb24a..c73d239986 100644 --- a/src/textord/tablerecog.cpp +++ b/src/textord/tablerecog.cpp @@ -733,7 +733,7 @@ int StructuredTable::CountHorizontalIntersections(int y) { } // Counts how many text partitions are in this box. -// This is used to count partitons in cells, as that can indicate +// This is used to count partitions in cells, as that can indicate // how "strong" a potential table row/column (or even full table) actually is. int StructuredTable::CountPartitions(const TBOX &box) { ColPartitionGridSearch gsearch(text_grid_); diff --git a/src/textord/tabvector.cpp b/src/textord/tabvector.cpp index 771a5f0dbe..1d82397dd5 100644 --- a/src/textord/tabvector.cpp +++ b/src/textord/tabvector.cpp @@ -776,7 +776,7 @@ void TabVector::Evaluate(const ICOORD &vertical, TabFind *finder) { } // (Re)Fit a line to the stored points. Returns false if the line -// is degenerate. Althougth the TabVector code mostly doesn't care about the +// is degenerate. Although the TabVector code mostly doesn't care about the // direction of lines, XAtY would give silly results for a horizontal line. // The class is mostly aimed at use for vertical lines representing // horizontal tab stops. diff --git a/src/textord/tabvector.h b/src/textord/tabvector.h index fc78e66cc2..99c1b0200e 100644 --- a/src/textord/tabvector.h +++ b/src/textord/tabvector.h @@ -363,7 +363,7 @@ class TabVector : public ELIST2_LINK { void Evaluate(const ICOORD &vertical, TabFind *finder); // (Re)Fit a line to the stored points. Returns false if the line - // is degenerate. Althougth the TabVector code mostly doesn't care about the + // is degenerate. Although the TabVector code mostly doesn't care about the // direction of lines, XAtY would give silly results for a horizontal line. // The class is mostly aimed at use for vertical lines representing // horizontal tab stops. diff --git a/src/textord/textlineprojection.cpp b/src/textord/textlineprojection.cpp index 6c5d3fb59e..19eb86df0b 100644 --- a/src/textord/textlineprojection.cpp +++ b/src/textord/textlineprojection.cpp @@ -35,7 +35,7 @@ const int kWrongWayPenalty = 4; // Ratio between parallel gap and perpendicular gap used to measure total // distance of a box from a target box in curved textline space. // parallel-gap is treated more favorably by this factor to allow catching -// quotes and elipsis at the end of textlines. +// quotes and ellipsis at the end of textlines. const int kParaPerpDistRatio = 4; // Multiple of scale_factor_ that the inter-line gap must be before we start // padding the increment box perpendicular to the text line. diff --git a/src/textord/tordmain.cpp b/src/textord/tordmain.cpp index 6a0218e6f7..13d4ac71c7 100644 --- a/src/textord/tordmain.cpp +++ b/src/textord/tordmain.cpp @@ -502,14 +502,14 @@ bool Textord::clean_noise_from_row( // remove empties blob_box = outline->bounding_box(); blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height(); if (blob_size < textord_noise_sizelimit * row->x_height()) { - dot_count++; // count smal outlines + dot_count++; // count small outlines } if (!outline->child()->empty() && blob_box.height() < (1 + textord_noise_syfract) * row->x_height() && blob_box.height() > (1 - textord_noise_syfract) * row->x_height() && blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() && blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) { - super_norm_count++; // count smal outlines + super_norm_count++; // count small outlines } } } else { @@ -598,14 +598,14 @@ void Textord::clean_noise_from_words( // remove empties blob_box = outline->bounding_box(); blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height(); if (blob_size < textord_noise_sizelimit * row->x_height()) { - dot_count++; // count smal outlines + dot_count++; // count small outlines } if (!outline->child()->empty() && blob_box.height() < (1 + textord_noise_syfract) * row->x_height() && blob_box.height() > (1 - textord_noise_syfract) * row->x_height() && blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() && blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) { - norm_count++; // count smal outlines + norm_count++; // count small outlines } } } else { diff --git a/src/textord/tospace.cpp b/src/textord/tospace.cpp index b4964838ca..e6ee85502e 100644 --- a/src/textord/tospace.cpp +++ b/src/textord/tospace.cpp @@ -64,7 +64,7 @@ void Textord::to_spacing(ICOORD page_tr, // topright of page block_non_space_gap_width); // Make sure relative values of block-level space and non-space gap // widths are reasonable. The ratio of 1:3 is also used in - // block_spacing_stats, to corrrect the block_space_gap_width + // block_spacing_stats, to correct the block_space_gap_width // Useful for arabic and hindi, when the non-space gap width is // often over-estimated and should not be trusted. A similar ratio // is found in block_spacing_stats. @@ -1695,7 +1695,7 @@ TBOX Textord::reduced_box_next(TO_ROW *row, // current row * the xheight. * * - * !!!!!!! WONT WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on + * !!!!!!! WON'T WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on * "home". Perhaps we need something which say if the width ABOVE the * xht alone includes the whole of the reduced width, then use the full * blob box - Might still fail on italic F diff --git a/src/training/degradeimage.cpp b/src/training/degradeimage.cpp index 5d658102f9..3ab57f9bf5 100644 --- a/src/training/degradeimage.cpp +++ b/src/training/degradeimage.cpp @@ -78,7 +78,7 @@ const int kMinRampSize = 1000; // With no dilation, after covolution, the images are so light that a heavy // constant offset is required to make the 0 image look reasonable. A simple // constant offset multiple of exposure to undo this value is enough to achieve -// all the required lightening. This gives the advantage that exposure level 1 +// all the required lighting. This gives the advantage that exposure level 1 // with a single dilation gives a good impression of the broken-yet-too-dark // problem that is often seen in scans. // A small random rotation gives some varying greyscale values on the edges, diff --git a/unittest/README.md b/unittest/README.md index bf4f83fefb..f047f028cc 100644 --- a/unittest/README.md +++ b/unittest/README.md @@ -69,7 +69,7 @@ ### Fonts -* Microsoft fonts: arialbi.ttf, times.ttf, verdana.ttf - [instalation guide](https://www.makeuseof.com/tag/how-to-install-microsoft-core-fonts-in-ubuntu-linux/) +* Microsoft fonts: arialbi.ttf, times.ttf, verdana.ttf - [installation guide](https://www.makeuseof.com/tag/how-to-install-microsoft-core-fonts-in-ubuntu-linux/) * [ae_Arab.ttf](https://www.wfonts.com/download/data/2014/12/03/ae-arab/ae-arab.zip) * dejavu-fonts: [DejaVuSans-ExtraLight.ttf](https://dejavu-fonts.github.io/Download.html) * [Lohit-Hindi.ttf](https://raw.githubusercontent.com/pratul/packageofpractices/master/assets/fonts/Lohit-Hindi.ttf) diff --git a/unittest/paragraphs_test.cc b/unittest/paragraphs_test.cc index db053910b8..c30f859a3d 100644 --- a/unittest/paragraphs_test.cc +++ b/unittest/paragraphs_test.cc @@ -182,7 +182,7 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n, } dbg_lines.push_back(absl::StrCat(correct[i].ascii, annotation)); } - LOG(INFO) << "Discrepency!\n" << absl::StrJoin(dbg_lines, "\n"); + LOG(INFO) << "Discrepancy!\n" << absl::StrJoin(dbg_lines, "\n"); } }