robertknight · robertknight · Aug 30, 2024 · Aug 22, 2024 · Aug 22, 2024 · Aug 26, 2024
diff --git a/ocrs-cli/src/main.rs b/ocrs-cli/src/main.rs
@@ -283,6 +283,7 @@ fn main() -> Result<(), Box<dyn Error>> {
         } else {
             DecodeMethod::Greedy
         },
+        ..Default::default()
     })?;
 
     // Read image into HWC tensor.

diff --git a/ocrs/src/lib.rs b/ocrs/src/lib.rs
@@ -29,6 +29,10 @@ pub use preprocess::{DimOrder, ImagePixels, ImageSource, ImageSourceError};
 pub use recognition::DecodeMethod;
 pub use text_items::{TextChar, TextItem, TextLine, TextWord};
 
+
+const DEFAULT_ALPHABET: &str = " 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~EABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+
 /// Configuration for an [OcrEngine] instance.
 #[derive(Default)]
 pub struct OcrEngineParams {
@@ -43,6 +47,9 @@ pub struct OcrEngineParams {
 
     /// Method used to decode outputs of text recognition model.
     pub decode_method: DecodeMethod,
+
+    /// Alphabet used for text recognition.
+    pub alphabet: Option<String>,
 }
 
 /// Detects and recognizes text in images.
@@ -54,6 +61,7 @@ pub struct OcrEngine {
     recognizer: Option<TextRecognizer>,
     debug: bool,
     decode_method: DecodeMethod,
+    alphabet: String,
 }
 
 /// Input image for OCR analysis. Instances are created using
@@ -79,6 +87,7 @@ impl OcrEngine {
             recognizer,
             debug: params.debug,
             decode_method: params.decode_method,
+            alphabet: params.alphabet.unwrap_or_else(|| DEFAULT_ALPHABET.to_string()), // Use the default alphabet if none is provided
         })
     }
 
@@ -149,13 +158,16 @@ impl OcrEngine {
                 RecognitionOpt {
                     debug: self.debug,
                     decode_method: self.decode_method,
+                    alphabet: self.alphabet.clone(),
                 },
             )
         } else {
             Err(anyhow!("Recognition model not loaded"))
         }
     }
 
+
+
     /// Prepare an image for input into the text line recognition model.
     ///
     /// This method exists to help with debugging recognition issues by exposing

diff --git a/ocrs/src/recognition.rs b/ocrs/src/recognition.rs
@@ -15,9 +15,6 @@ use crate::geom_util::{downwards_line, leftmost_edge, rightmost_edge};
 use crate::preprocess::BLACK_VALUE;
 use crate::text_items::{TextChar, TextLine};
 
-// nb. The "E" before "ABCDE" should be the EUR symbol.
-const DEFAULT_ALPHABET: &str = " 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~EABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
-
 /// Return the smallest multiple of `factor` that is >= `val`.
 fn round_up<
     T: Copy
@@ -226,8 +223,11 @@ pub struct RecognitionOpt {
 
     /// Method used to decode character sequence outputs to character values.
     pub decode_method: DecodeMethod,
+
+    pub alphabet: String,
 }
 
+
 /// Input and output from recognition for a single text line.
 struct LineRecResult {
     /// Input to the recognition model.
@@ -250,13 +250,12 @@ struct LineRecResult {
 /// for each line.
 ///
 /// Entries in the result may be `None` if no text was recognized for a line.
-fn text_lines_from_recognition_results(results: &[LineRecResult]) -> Vec<Option<TextLine>> {
+fn text_lines_from_recognition_results(results: &[LineRecResult], alphabet: &str) -> Vec<Option<TextLine>> {
     results
         .iter()
         .map(|result| {
             let line_rect = result.line.region.bounding_rect();
             let x_scale_factor = (line_rect.width() as f32) / (result.line.resized_width as f32);
-
             // Calculate how much the recognition model downscales the image
             // width. We assume this will be an integer factor, or close to it
             // if the input width is not an exact multiple of the downscaling
@@ -276,11 +275,9 @@ fn text_lines_from_recognition_results(results: &[LineRecResult]) -> Vec<Option<
                     } else {
                         result.line.resized_width
                     };
-
                     // Map X coords to those of the input image.
                     let [start_x, end_x] = [start_x, end_x]
                         .map(|x| line_rect.left() + (x as f32 * x_scale_factor) as i32);
-
                     // Since the recognition input is padded, it is possible to
                     // get predicted characters in the output with positions
                     // that correspond to the padding region, and thus are
@@ -289,7 +286,7 @@ fn text_lines_from_recognition_results(results: &[LineRecResult]) -> Vec<Option<
                         return None;
                     }
 
-                    let char = DEFAULT_ALPHABET
+                    let char = alphabet  // Use the provided alphabet
                         .chars()
                         .nth((step.label - 1) as usize)
                         .unwrap_or('?');
@@ -301,7 +298,7 @@ fn text_lines_from_recognition_results(results: &[LineRecResult]) -> Vec<Option<
                             start_x,
                             end_x,
                         )
-                        .expect("invalid X coords"),
+                            .expect("invalid X coords"),
                     })
                 })
                 .collect();
@@ -315,6 +312,7 @@ fn text_lines_from_recognition_results(results: &[LineRecResult]) -> Vec<Option<
         .collect()
 }
 
+
 /// Extracts character sequences and coordinates from text lines detected in
 /// an image.
 pub struct TextRecognizer {
@@ -430,6 +428,7 @@ impl TextRecognizer {
         let RecognitionOpt {
             debug,
             decode_method,
+            alphabet,
         } = opts;
 
         let [_, img_height, img_width] = image.shape();
@@ -535,7 +534,7 @@ impl TextRecognizer {
         // batching and parallel processing. Re-sort them into input order.
         line_rec_results.sort_by_key(|result| result.line.index);
 
-        let text_lines = text_lines_from_recognition_results(&line_rec_results);
+        let text_lines = text_lines_from_recognition_results(&line_rec_results, &alphabet);  // Pass the alphabet
 
         Ok(text_lines)
     }