diff --git a/ocrs-cli/src/main.rs b/ocrs-cli/src/main.rs index 5f0d44c..96439ef 100644 --- a/ocrs-cli/src/main.rs +++ b/ocrs-cli/src/main.rs @@ -283,6 +283,7 @@ fn main() -> Result<(), Box> { } else { DecodeMethod::Greedy }, + ..Default::default() })?; // Read image into HWC tensor. diff --git a/ocrs/src/lib.rs b/ocrs/src/lib.rs index c4131ff..0b81ba2 100644 --- a/ocrs/src/lib.rs +++ b/ocrs/src/lib.rs @@ -29,6 +29,9 @@ pub use preprocess::{DimOrder, ImagePixels, ImageSource, ImageSourceError}; pub use recognition::DecodeMethod; pub use text_items::{TextChar, TextItem, TextLine, TextWord}; +// nb. The "E" before "ABCDE" should be the EUR symbol. +const DEFAULT_ALPHABET: &str = " 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~EABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + /// Configuration for an [OcrEngine] instance. #[derive(Default)] pub struct OcrEngineParams { @@ -43,6 +46,9 @@ pub struct OcrEngineParams { /// Method used to decode outputs of text recognition model. pub decode_method: DecodeMethod, + + /// Alphabet used for text recognition. + pub alphabet: Option, } /// Detects and recognizes text in images. @@ -54,6 +60,7 @@ pub struct OcrEngine { recognizer: Option, debug: bool, decode_method: DecodeMethod, + alphabet: String, } /// Input image for OCR analysis. Instances are created using @@ -79,6 +86,9 @@ impl OcrEngine { recognizer, debug: params.debug, decode_method: params.decode_method, + alphabet: params + .alphabet + .unwrap_or_else(|| DEFAULT_ALPHABET.to_string()), }) } @@ -149,6 +159,7 @@ impl OcrEngine { RecognitionOpt { debug: self.debug, decode_method: self.decode_method, + alphabet: self.alphabet.clone(), }, ) } else { diff --git a/ocrs/src/recognition.rs b/ocrs/src/recognition.rs index d7f8984..b52a008 100644 --- a/ocrs/src/recognition.rs +++ b/ocrs/src/recognition.rs @@ -15,9 +15,6 @@ use crate::geom_util::{downwards_line, leftmost_edge, rightmost_edge}; use crate::preprocess::BLACK_VALUE; use crate::text_items::{TextChar, TextLine}; -// nb. The "E" before "ABCDE" should be the EUR symbol. -const DEFAULT_ALPHABET: &str = " 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~EABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - /// Return the smallest multiple of `factor` that is >= `val`. fn round_up< T: Copy @@ -226,6 +223,8 @@ pub struct RecognitionOpt { /// Method used to decode character sequence outputs to character values. pub decode_method: DecodeMethod, + + pub alphabet: String, } /// Input and output from recognition for a single text line. @@ -250,13 +249,15 @@ struct LineRecResult { /// for each line. /// /// Entries in the result may be `None` if no text was recognized for a line. -fn text_lines_from_recognition_results(results: &[LineRecResult]) -> Vec> { +fn text_lines_from_recognition_results( + results: &[LineRecResult], + alphabet: &str, +) -> Vec> { results .iter() .map(|result| { let line_rect = result.line.region.bounding_rect(); let x_scale_factor = (line_rect.width() as f32) / (result.line.resized_width as f32); - // Calculate how much the recognition model downscales the image // width. We assume this will be an integer factor, or close to it // if the input width is not an exact multiple of the downscaling @@ -289,7 +290,7 @@ fn text_lines_from_recognition_results(results: &[LineRecResult]) -> Vec