Skip to content

Commit

Permalink
Merge pull request #100 from Phaired/customizable-alphabet
Browse files Browse the repository at this point in the history
feat: customizable alphabet using OcrEngineParams
  • Loading branch information
robertknight authored Aug 30, 2024
2 parents 71ba35e + b8b8322 commit faca741
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 7 deletions.
1 change: 1 addition & 0 deletions ocrs-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ fn main() -> Result<(), Box<dyn Error>> {
} else {
DecodeMethod::Greedy
},
..Default::default()
})?;

// Read image into HWC tensor.
Expand Down
11 changes: 11 additions & 0 deletions ocrs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ pub use preprocess::{DimOrder, ImagePixels, ImageSource, ImageSourceError};
pub use recognition::DecodeMethod;
pub use text_items::{TextChar, TextItem, TextLine, TextWord};

// nb. The "E" before "ABCDE" should be the EUR symbol.
const DEFAULT_ALPHABET: &str = " 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~EABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";

/// Configuration for an [OcrEngine] instance.
#[derive(Default)]
pub struct OcrEngineParams {
Expand All @@ -43,6 +46,9 @@ pub struct OcrEngineParams {

/// Method used to decode outputs of text recognition model.
pub decode_method: DecodeMethod,

/// Alphabet used for text recognition.
pub alphabet: Option<String>,
}

/// Detects and recognizes text in images.
Expand All @@ -54,6 +60,7 @@ pub struct OcrEngine {
recognizer: Option<TextRecognizer>,
debug: bool,
decode_method: DecodeMethod,
alphabet: String,
}

/// Input image for OCR analysis. Instances are created using
Expand All @@ -79,6 +86,9 @@ impl OcrEngine {
recognizer,
debug: params.debug,
decode_method: params.decode_method,
alphabet: params
.alphabet
.unwrap_or_else(|| DEFAULT_ALPHABET.to_string()),
})
}

Expand Down Expand Up @@ -149,6 +159,7 @@ impl OcrEngine {
RecognitionOpt {
debug: self.debug,
decode_method: self.decode_method,
alphabet: self.alphabet.clone(),
},
)
} else {
Expand Down
16 changes: 9 additions & 7 deletions ocrs/src/recognition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ use crate::geom_util::{downwards_line, leftmost_edge, rightmost_edge};
use crate::preprocess::BLACK_VALUE;
use crate::text_items::{TextChar, TextLine};

// nb. The "E" before "ABCDE" should be the EUR symbol.
const DEFAULT_ALPHABET: &str = " 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~EABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";

/// Return the smallest multiple of `factor` that is >= `val`.
fn round_up<
T: Copy
Expand Down Expand Up @@ -226,6 +223,8 @@ pub struct RecognitionOpt {

/// Method used to decode character sequence outputs to character values.
pub decode_method: DecodeMethod,

pub alphabet: String,
}

/// Input and output from recognition for a single text line.
Expand All @@ -250,13 +249,15 @@ struct LineRecResult {
/// for each line.
///
/// Entries in the result may be `None` if no text was recognized for a line.
fn text_lines_from_recognition_results(results: &[LineRecResult]) -> Vec<Option<TextLine>> {
fn text_lines_from_recognition_results(
results: &[LineRecResult],
alphabet: &str,
) -> Vec<Option<TextLine>> {
results
.iter()
.map(|result| {
let line_rect = result.line.region.bounding_rect();
let x_scale_factor = (line_rect.width() as f32) / (result.line.resized_width as f32);

// Calculate how much the recognition model downscales the image
// width. We assume this will be an integer factor, or close to it
// if the input width is not an exact multiple of the downscaling
Expand Down Expand Up @@ -289,7 +290,7 @@ fn text_lines_from_recognition_results(results: &[LineRecResult]) -> Vec<Option<
return None;
}

let char = DEFAULT_ALPHABET
let char = alphabet
.chars()
.nth((step.label - 1) as usize)
.unwrap_or('?');
Expand Down Expand Up @@ -430,6 +431,7 @@ impl TextRecognizer {
let RecognitionOpt {
debug,
decode_method,
alphabet,
} = opts;

let [_, img_height, img_width] = image.shape();
Expand Down Expand Up @@ -535,7 +537,7 @@ impl TextRecognizer {
// batching and parallel processing. Re-sort them into input order.
line_rec_results.sort_by_key(|result| result.line.index);

let text_lines = text_lines_from_recognition_results(&line_rec_results);
let text_lines = text_lines_from_recognition_results(&line_rec_results, &alphabet);

Ok(text_lines)
}
Expand Down

0 comments on commit faca741

Please sign in to comment.