From eb2c2ddc55cd83cf48e3b7e52632607fdfd59391 Mon Sep 17 00:00:00 2001
From: Benjamin Kiessling <mittagessen@l.unchti.me>
Date: Wed, 4 Dec 2024 11:17:57 +0100
Subject: [PATCH] Add fixed offset bounding polygon option

Hacky 'solution' for highly diacritized texts
---
 kraken/blla.py             | 10 +++++---
 kraken/lib/segmentation.py | 48 ++++++++++++++++++++++++++++----------
 2 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/kraken/blla.py b/kraken/blla.py
index d78ee55af..968d8e4a8 100644
--- a/kraken/blla.py
+++ b/kraken/blla.py
@@ -168,6 +168,7 @@ def vec_lines(heatmap: torch.Tensor,
               suppl_obj: List[np.ndarray] = None,
               topline: Optional[bool] = False,
               raise_on_error: bool = False,
+              fixed_offset_polygons: bool = False,
               **kwargs) -> List[Dict[str, Any]]:
     r"""
     Computes lines from a stack of heatmaps, a class mapping, and scaling
@@ -230,7 +231,8 @@ def vec_lines(heatmap: torch.Tensor,
                                               im_feats=im_feats,
                                               suppl_obj=suppl_obj,
                                               topline=topline,
-                                              raise_on_error=raise_on_error)
+                                              raise_on_error=raise_on_error,
+                                              fixed_offset_polygons=fixed_offset_polygons)
         if pol[0] is not None:
             lines.append((bl[0], bl[1], pol[0]))
 
@@ -248,7 +250,8 @@ def segment(im: PIL.Image.Image,
             model: Union[List[vgsl.TorchVGSLModel], vgsl.TorchVGSLModel] = None,
             device: str = 'cpu',
             raise_on_error: bool = False,
-            autocast: bool = False) -> Segmentation:
+            autocast: bool = False,
+            fixed_offset_polygons: bool = False) -> Segmentation:
     r"""
     Segments a page into text lines using the baseline segmenter.
 
@@ -344,7 +347,8 @@ def segment(im: PIL.Image.Image,
                            text_direction=text_direction,
                            suppl_obj=suppl_obj,
                            topline=net.user_metadata['topline'] if 'topline' in net.user_metadata else False,
-                           raise_on_error=raise_on_error)
+                           raise_on_error=raise_on_error,
+                           fixed_offset_polygons=fixed_offset_polygons)
 
         if 'ro_model' in net.aux_layers:
             logger.info(f'Using reading order model found in segmentation model {net}.')
diff --git a/kraken/lib/segmentation.py b/kraken/lib/segmentation.py
index 2c14d7bc9..96ecb05ca 100644
--- a/kraken/lib/segmentation.py
+++ b/kraken/lib/segmentation.py
@@ -577,7 +577,17 @@ def _calc_seam(baseline, polygon, angle, im_feats, bias=150):
     return seam
 
 
-def _extract_patch(env_up, env_bottom, baseline, offset_baseline, end_points, dir_vec, topline, offset, im_feats, bounds):
+def _extract_patch(env_up,
+                   env_bottom,
+                   baseline,
+                   offset_baseline,
+                   end_points,
+                   dir_vec,
+                   topline,
+                   offset,
+                   im_feats,
+                   bounds,
+                   fixed_offset_polygons=False):
     """
     Calculate a line image patch from a ROI and the original baseline.
     """
@@ -599,16 +609,26 @@ def _extract_patch(env_up, env_bottom, baseline, offset_baseline, end_points, di
     upper_seam = geom.LineString(upper_seam).simplify(5)
     bottom_seam = geom.LineString(bottom_seam).simplify(5)
 
-    # ugly workaround against GEOM parallel_offset bug creating a
-    # MultiLineString out of offset LineString
-    if upper_seam.parallel_offset(offset//2, side='right').geom_type == 'MultiLineString' or offset == 0:
-        upper_seam = np.array(upper_seam.coords, dtype=int)
-    else:
-        upper_seam = np.array(upper_seam.parallel_offset(offset//2, side='right').coords, dtype=int)[::-1]
-    if bottom_seam.parallel_offset(offset//2, side='left').geom_type == 'MultiLineString' or offset == 0:
-        bottom_seam = np.array(bottom_seam.coords, dtype=int)
+    if not fixed_offset_polygons:
+        # ugly workaround against GEOM parallel_offset bug creating a
+        # MultiLineString out of offset LineString
+        if upper_seam.parallel_offset(offset//2, side='right').geom_type == 'MultiLineString' or offset == 0:
+            upper_seam = np.array(upper_seam.coords, dtype=int)
+        else:
+            upper_seam = np.array(upper_seam.parallel_offset(offset//2, side='right').coords, dtype=int)[::-1]
+        if bottom_seam.parallel_offset(offset//2, side='left').geom_type == 'MultiLineString' or offset == 0:
+            bottom_seam = np.array(bottom_seam.coords, dtype=int)
+        else:
+            bottom_seam = np.array(bottom_seam.parallel_offset(offset//2, side='left').coords, dtype=int)
     else:
-        bottom_seam = np.array(bottom_seam.parallel_offset(offset//2, side='left').coords, dtype=int)
+        # XXX: hacky trick to make sure dotting is included in bounding polygon by
+        #      expanding it to its maximum distance from baseline.
+        baseline = geom.LineString(baseline)
+        us_dist = upper_seam.hausdorff_distance(baseline)
+        bs_dist = bottom_seam.hausdorff_distance(baseline)
+
+        upper_seam = np.array(baseline.parallel_offset(us_dist, side='right').coords, dtype=int)[::-1]
+        bottom_seam = np.array(baseline.parallel_offset(bs_dist, side='left').coords, dtype=int)
 
     # offsetting might produce bounds outside the image. Clip it to the image bounds.
     polygon = np.concatenate(([end_points[0]], upper_seam, [end_points[-1]], bottom_seam[::-1]))
@@ -692,7 +712,8 @@ def calculate_polygonal_environment(im: Image.Image = None,
                                     im_feats: np.ndarray = None,
                                     scale: Tuple[int, int] = None,
                                     topline: bool = False,
-                                    raise_on_error: bool = False):
+                                    raise_on_error: bool = False,
+                                    fixed_offset_polygons: bool = False):
     """
     Given a list of baselines and an input image, calculates a polygonal
     environment around each baseline.
@@ -717,6 +738,8 @@ def calculate_polygonal_environment(im: Image.Image = None,
                  offset downwards. If set to None, no offset will be applied.
         raise_on_error: Raises error instead of logging them when they are
                         not-blocking
+        fixed_offset_polygons: Switch enabling bounding polygons that are a
+        fixed distance (Hausdorff) from the baseline.
     Returns:
         List of lists of coordinates. If no polygonization could be compute for
         a baseline `None` is returned instead.
@@ -772,7 +795,8 @@ def calculate_polygonal_environment(im: Image.Image = None,
                                            topline,
                                            offset,
                                            im_feats,
-                                           bounds))
+                                           bounds,
+                                           fixed_offset_polygons=fixed_offset_polygons))
         except Exception as e:
             if raise_on_error:
                 raise