Skip to content

Commit

Permalink
Merge pull request #120 from bertsky/fix-segment-line-hull
Browse files Browse the repository at this point in the history
segment-line: validate intersection with parent
  • Loading branch information
bertsky authored Apr 8, 2020
2 parents 7026429 + a6659f0 commit f11c111
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
14 changes: 13 additions & 1 deletion ocrd_tesserocr/segment_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,19 @@ def process(self):
line_poly = Polygon(line_polygon)
if not line_poly.within(region_poly):
# this could happen due to rotation
line_poly = line_poly.intersection(region_poly).convex_hull
interline = line_poly.intersection(region_poly)
if interline.is_empty:
continue # ignore this line
if hasattr(interline, 'geoms'):
# is (heterogeneous) GeometryCollection
area = 0
for geom in interline.geoms:
if geom.area > area:
area = geom.area
interline = geom
if not area:
continue
line_poly = interline.convex_hull
line_polygon = line_poly.exterior.coords
line_polygon = coordinates_for_segment(line_polygon, region_image, region_coords)
line_points = points_from_polygon(line_polygon)
Expand Down
3 changes: 2 additions & 1 deletion ocrd_tesserocr/segment_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ def process(self):
LOG.info("Detecting table cells in region '%s'", region.id)
#
# detect the region segments:
tessapi.SetPageSegMode(PSM.AUTO) # treat table like page
tessapi.SetPageSegMode(PSM.SPARSE_TEXT) # retrieve "cells"
# TODO: we should XY-cut the sparse cells in regroup them into consistent cells
layout = tessapi.AnalyseLayout()
roelem = reading_order.get(region.id)
if not roelem:
Expand Down

0 comments on commit f11c111

Please sign in to comment.