From eb2c2ddc55cd83cf48e3b7e52632607fdfd59391 Mon Sep 17 00:00:00 2001 From: Benjamin Kiessling Date: Wed, 4 Dec 2024 11:17:57 +0100 Subject: [PATCH] Add fixed offset bounding polygon option Hacky 'solution' for highly diacritized texts --- kraken/blla.py | 10 +++++--- kraken/lib/segmentation.py | 48 ++++++++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/kraken/blla.py b/kraken/blla.py index d78ee55af..968d8e4a8 100644 --- a/kraken/blla.py +++ b/kraken/blla.py @@ -168,6 +168,7 @@ def vec_lines(heatmap: torch.Tensor, suppl_obj: List[np.ndarray] = None, topline: Optional[bool] = False, raise_on_error: bool = False, + fixed_offset_polygons: bool = False, **kwargs) -> List[Dict[str, Any]]: r""" Computes lines from a stack of heatmaps, a class mapping, and scaling @@ -230,7 +231,8 @@ def vec_lines(heatmap: torch.Tensor, im_feats=im_feats, suppl_obj=suppl_obj, topline=topline, - raise_on_error=raise_on_error) + raise_on_error=raise_on_error, + fixed_offset_polygons=fixed_offset_polygons) if pol[0] is not None: lines.append((bl[0], bl[1], pol[0])) @@ -248,7 +250,8 @@ def segment(im: PIL.Image.Image, model: Union[List[vgsl.TorchVGSLModel], vgsl.TorchVGSLModel] = None, device: str = 'cpu', raise_on_error: bool = False, - autocast: bool = False) -> Segmentation: + autocast: bool = False, + fixed_offset_polygons: bool = False) -> Segmentation: r""" Segments a page into text lines using the baseline segmenter. @@ -344,7 +347,8 @@ def segment(im: PIL.Image.Image, text_direction=text_direction, suppl_obj=suppl_obj, topline=net.user_metadata['topline'] if 'topline' in net.user_metadata else False, - raise_on_error=raise_on_error) + raise_on_error=raise_on_error, + fixed_offset_polygons=fixed_offset_polygons) if 'ro_model' in net.aux_layers: logger.info(f'Using reading order model found in segmentation model {net}.') diff --git a/kraken/lib/segmentation.py b/kraken/lib/segmentation.py index 2c14d7bc9..96ecb05ca 100644 --- a/kraken/lib/segmentation.py +++ b/kraken/lib/segmentation.py @@ -577,7 +577,17 @@ def _calc_seam(baseline, polygon, angle, im_feats, bias=150): return seam -def _extract_patch(env_up, env_bottom, baseline, offset_baseline, end_points, dir_vec, topline, offset, im_feats, bounds): +def _extract_patch(env_up, + env_bottom, + baseline, + offset_baseline, + end_points, + dir_vec, + topline, + offset, + im_feats, + bounds, + fixed_offset_polygons=False): """ Calculate a line image patch from a ROI and the original baseline. """ @@ -599,16 +609,26 @@ def _extract_patch(env_up, env_bottom, baseline, offset_baseline, end_points, di upper_seam = geom.LineString(upper_seam).simplify(5) bottom_seam = geom.LineString(bottom_seam).simplify(5) - # ugly workaround against GEOM parallel_offset bug creating a - # MultiLineString out of offset LineString - if upper_seam.parallel_offset(offset//2, side='right').geom_type == 'MultiLineString' or offset == 0: - upper_seam = np.array(upper_seam.coords, dtype=int) - else: - upper_seam = np.array(upper_seam.parallel_offset(offset//2, side='right').coords, dtype=int)[::-1] - if bottom_seam.parallel_offset(offset//2, side='left').geom_type == 'MultiLineString' or offset == 0: - bottom_seam = np.array(bottom_seam.coords, dtype=int) + if not fixed_offset_polygons: + # ugly workaround against GEOM parallel_offset bug creating a + # MultiLineString out of offset LineString + if upper_seam.parallel_offset(offset//2, side='right').geom_type == 'MultiLineString' or offset == 0: + upper_seam = np.array(upper_seam.coords, dtype=int) + else: + upper_seam = np.array(upper_seam.parallel_offset(offset//2, side='right').coords, dtype=int)[::-1] + if bottom_seam.parallel_offset(offset//2, side='left').geom_type == 'MultiLineString' or offset == 0: + bottom_seam = np.array(bottom_seam.coords, dtype=int) + else: + bottom_seam = np.array(bottom_seam.parallel_offset(offset//2, side='left').coords, dtype=int) else: - bottom_seam = np.array(bottom_seam.parallel_offset(offset//2, side='left').coords, dtype=int) + # XXX: hacky trick to make sure dotting is included in bounding polygon by + # expanding it to its maximum distance from baseline. + baseline = geom.LineString(baseline) + us_dist = upper_seam.hausdorff_distance(baseline) + bs_dist = bottom_seam.hausdorff_distance(baseline) + + upper_seam = np.array(baseline.parallel_offset(us_dist, side='right').coords, dtype=int)[::-1] + bottom_seam = np.array(baseline.parallel_offset(bs_dist, side='left').coords, dtype=int) # offsetting might produce bounds outside the image. Clip it to the image bounds. polygon = np.concatenate(([end_points[0]], upper_seam, [end_points[-1]], bottom_seam[::-1])) @@ -692,7 +712,8 @@ def calculate_polygonal_environment(im: Image.Image = None, im_feats: np.ndarray = None, scale: Tuple[int, int] = None, topline: bool = False, - raise_on_error: bool = False): + raise_on_error: bool = False, + fixed_offset_polygons: bool = False): """ Given a list of baselines and an input image, calculates a polygonal environment around each baseline. @@ -717,6 +738,8 @@ def calculate_polygonal_environment(im: Image.Image = None, offset downwards. If set to None, no offset will be applied. raise_on_error: Raises error instead of logging them when they are not-blocking + fixed_offset_polygons: Switch enabling bounding polygons that are a + fixed distance (Hausdorff) from the baseline. Returns: List of lists of coordinates. If no polygonization could be compute for a baseline `None` is returned instead. @@ -772,7 +795,8 @@ def calculate_polygonal_environment(im: Image.Image = None, topline, offset, im_feats, - bounds)) + bounds, + fixed_offset_polygons=fixed_offset_polygons)) except Exception as e: if raise_on_error: raise