diff --git a/ocrs-cli/src/main.rs b/ocrs-cli/src/main.rs index 4f56e4c..9bee3d8 100644 --- a/ocrs-cli/src/main.rs +++ b/ocrs-cli/src/main.rs @@ -219,16 +219,18 @@ fn main() -> Result<(), Box> { let detection_model_src = args .detection_model .as_ref() - .map(|path| ModelSource::Path(path)) - .unwrap_or(ModelSource::Url(DETECTION_MODEL)); + .map_or(ModelSource::Url(DETECTION_MODEL), |path| { + ModelSource::Path(path) + }); let detection_model = load_model(detection_model_src) .file_error_context("Failed to load text detection model", detection_model_src)?; let recognition_model_src = args .recognition_model .as_ref() - .map(|path| ModelSource::Path(path)) - .unwrap_or(ModelSource::Url(RECOGNITION_MODEL)); + .map_or(ModelSource::Url(RECOGNITION_MODEL), |path| { + ModelSource::Path(path) + }); let recognition_model = load_model(recognition_model_src).file_error_context( "Failed to load text recognition model", recognition_model_src, diff --git a/ocrs-cli/src/output.rs b/ocrs-cli/src/output.rs index c702c92..d44a842 100644 --- a/ocrs-cli/src/output.rs +++ b/ocrs-cli/src/output.rs @@ -139,7 +139,7 @@ pub fn generate_annotated_png(args: GeneratePngArgs) -> NdTensor { let floor_point = |p: PointF| Point::from_yx(p.y as i32, p.x as i32); // Draw line bounding rects from layout analysis step. - for line in line_rects.iter() { + for line in line_rects { let line_points: Vec<_> = line .iter() .flat_map(|word_rect| word_rect.corners().into_iter()) diff --git a/ocrs/src/detection.rs b/ocrs/src/detection.rs index 608ccfb..39ba94a 100644 --- a/ocrs/src/detection.rs +++ b/ocrs/src/detection.rs @@ -83,11 +83,9 @@ impl TextDetector { // Add batch dim let image = image.reshaped([1, img_chans, img_height, img_width]); - let (in_height, in_width) = match self.input_shape[..] { - [_, _, Dimension::Fixed(h), Dimension::Fixed(w)] => (h, w), - _ => { - return Err("failed to get model dims".into()); - } + let [_, _, Dimension::Fixed(in_height), Dimension::Fixed(in_width)] = self.input_shape[..] + else { + return Err("failed to get model dims".into()); }; // Pad small images to the input size of the text detection model. This is diff --git a/ocrs/src/layout_analysis.rs b/ocrs/src/layout_analysis.rs index b7484c7..bdbdd14 100644 --- a/ocrs/src/layout_analysis.rs +++ b/ocrs/src/layout_analysis.rs @@ -92,18 +92,18 @@ pub fn find_block_separators(words: &[RotatedRect]) -> Vec { lines.sort_by_key(|l| l.first().unwrap().bounding_rect().top().round() as i32); let mut all_word_spacings = Vec::new(); - for line in lines.iter() { + for line in lines { if line.len() > 1 { let mut spacings: Vec<_> = zip(line.iter(), line.iter().skip(1)) .map(|(cur, next)| { (next.bounding_rect().left() - cur.bounding_rect().right()).round() as i32 }) .collect(); - spacings.sort(); + spacings.sort_unstable(); all_word_spacings.extend_from_slice(&spacings); } } - all_word_spacings.sort(); + all_word_spacings.sort_unstable(); let median_word_spacing = all_word_spacings .get(all_word_spacings.len() / 2) @@ -111,8 +111,7 @@ pub fn find_block_separators(words: &[RotatedRect]) -> Vec { .unwrap_or(10); let median_height = words .get(words.len() / 2) - .map(|r| r.height()) - .unwrap_or(10.) + .map_or(10.0, |r| r.height()) .round() as i32; // Scoring function for empty rectangles. Taken from Section 3.D in [1]. diff --git a/ocrs/src/layout_analysis/empty_rects.rs b/ocrs/src/layout_analysis/empty_rects.rs index 3614484..3e1207b 100644 --- a/ocrs/src/layout_analysis/empty_rects.rs +++ b/ocrs/src/layout_analysis/empty_rects.rs @@ -64,7 +64,7 @@ where queue.push(Partition { score: score(boundary), boundary, - obstacles: obstacles.to_vec(), + obstacles: obstacles.clone(), }); } diff --git a/ocrs/src/lib.rs b/ocrs/src/lib.rs index a29f2d2..2b396eb 100644 --- a/ocrs/src/lib.rs +++ b/ocrs/src/lib.rs @@ -95,10 +95,11 @@ impl OcrEngine { /// Returns an unordered list of the oriented bounding rectangles of each /// word found. pub fn detect_words(&self, input: &OcrInput) -> Result, Box> { - let Some(detector) = self.detector.as_ref() else { - return Err("Detection model not loaded".into()); - }; - detector.detect_words(input.image.view(), self.debug) + if let Some(detector) = self.detector.as_ref() { + detector.detect_words(input.image.view(), self.debug) + } else { + Err("Detection model not loaded".into()) + } } /// Perform layout analysis to group words into lines and sort them in @@ -128,17 +129,18 @@ impl OcrEngine { input: &OcrInput, lines: &[Vec], ) -> Result>, Box> { - let Some(recognizer) = self.recognizer.as_ref() else { - return Err("Recognition model not loaded".into()); - }; - recognizer.recognize_text_lines( - input.image.view(), - lines, - RecognitionOpt { - debug: self.debug, - decode_method: self.decode_method, - }, - ) + if let Some(recognizer) = self.recognizer.as_ref() { + recognizer.recognize_text_lines( + input.image.view(), + lines, + RecognitionOpt { + debug: self.debug, + decode_method: self.decode_method, + }, + ) + } else { + Err("Recognition model not loaded".into()) + } } /// Convenience API that extracts all text from an image as a single string. diff --git a/ocrs/src/preprocess.rs b/ocrs/src/preprocess.rs index 265e976..946a074 100644 --- a/ocrs/src/preprocess.rs +++ b/ocrs/src/preprocess.rs @@ -20,7 +20,7 @@ fn greyscale_image f32>( ) -> NdTensor { let [chans, height, width] = img.shape(); assert!( - chans == 1 || chans == 3 || chans == 4, + matches!(chans, 1 | 3 | 4), "expected greyscale, RGB or RGBA input image" ); diff --git a/ocrs/src/recognition.rs b/ocrs/src/recognition.rs index 22b2aae..5c8879d 100644 --- a/ocrs/src/recognition.rs +++ b/ocrs/src/recognition.rs @@ -157,14 +157,12 @@ fn polygon_slice_bounding_rect( let trunc_edge_start = e .to_f32() .y_for_x(min_x as f32) - .map(|y| Point::from_yx(y.round() as i32, min_x)) - .unwrap_or(e.start); + .map_or(e.start, |y| Point::from_yx(y.round() as i32, min_x)); let trunc_edge_end = e .to_f32() .y_for_x(max_x as f32) - .map(|y| Point::from_yx(y.round() as i32, max_x)) - .unwrap_or(e.end); + .map_or(e.end, |y| Point::from_yx(y.round() as i32, max_x)); Some(Line::from_endpoints(trunc_edge_start, trunc_edge_end)) }) @@ -370,7 +368,7 @@ impl TextRecognizer { let min_width = 10.; let max_width = 800.; let aspect_ratio = orig_width as f32 / orig_height as f32; - (height as f32 * aspect_ratio).max(min_width).min(max_width) as u32 + (height as f32 * aspect_ratio).clamp(min_width, max_width) as u32 } // Group lines into batches which will have similar widths after resizing @@ -489,11 +487,7 @@ mod tests { // Vary the orientation of words. The output of `line_polygon` // should be invariant to different orientations of a RotatedRect // that cover the same pixels. - let up = if i % 2 == 0 { - Vec2::from_yx(-1., 0.) - } else { - Vec2::from_yx(1., 0.) - }; + let up = Vec2::from_yx(if i % 2 == 0 { -1. } else { 1. }, 0.); RotatedRect::new(center, up, width, height) }) .collect(); diff --git a/ocrs/src/test_util.rs b/ocrs/src/test_util.rs index 43216b6..896fe6e 100644 --- a/ocrs/src/test_util.rs +++ b/ocrs/src/test_util.rs @@ -10,12 +10,12 @@ pub fn gen_rect_grid( rect_size: (i32, i32), gap_size: (i32, i32), ) -> Vec { - let mut rects = Vec::new(); - let (rows, cols) = grid_shape; let (rect_h, rect_w) = rect_size; let (gap_h, gap_w) = gap_size; + let mut rects = Vec::with_capacity((rows * cols) as usize); + for r in 0..rows { for c in 0..cols { let top = top_left.y + r * (rect_h + gap_h);