Skip to content

Commit

Permalink
rebase faster line extractor to main
Browse files Browse the repository at this point in the history
  • Loading branch information
mittagessen committed Dec 21, 2023
1 parent 992fb0b commit 4cee7f2
Showing 1 changed file with 193 additions and 72 deletions.
265 changes: 193 additions & 72 deletions kraken/lib/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@

from collections import defaultdict

from PIL import Image
from PIL import Image, ImageDraw

from scipy.signal import convolve2d
from scipy.ndimage import maximum_filter, binary_erosion, gaussian_filter, distance_transform_cdt
from scipy.ndimage import maximum_filter, binary_erosion, gaussian_filter, distance_transform_cdt, affine_transform
from scipy.spatial.distance import pdist, squareform

from shapely.ops import nearest_points, unary_union
Expand All @@ -39,7 +39,11 @@
from skimage.morphology import skeletonize
from skimage.transform import PiecewiseAffineTransform, SimilarityTransform, AffineTransform, warp

<<<<<<< HEAD
from typing import List, Tuple, Union, Dict, Sequence, Optional, Literal, TYPE_CHECKING
=======
from typing import List, Tuple, Union, Dict, Any, Sequence, Optional, Literal, TypeVar, Iterator
>>>>>>> d4c8ee9 (rebase faster line extractor to main)

from kraken.lib import default_specs
from kraken.lib.exceptions import KrakenInputException
Expand Down Expand Up @@ -369,20 +373,32 @@ def vectorize_regions(im: np.ndarray, threshold: float = 0.5):
boundaries = [x.boundary.simplify(10) for x in boundaries.geoms]
return [np.array(x.coords, dtype=np.uint)[:, [1, 0]].tolist() for x in boundaries]

_T_pil_or_np = TypeVar('_T_pil_or_np', Image.Image, np.ndarray)

def _rotate(image, angle, center, scale, cval=0):
def _rotate(image: _T_pil_or_np, angle: float, center: Any, scale: float, cval=0, order:int=0) -> Tuple[AffineTransform, _T_pil_or_np]:
"""
Rotate function taken mostly from scikit image. Main difference is that
this one allows dimensional scaling and records the final translation
to ensure no image content is lost. This is needed to rotate the seam
back into the original image.
Rotate an image at an angle with optional scaling
Args:
image (PIL.Image.Image or (H, W, C) np.ndarray): Input image
angle (float): Angle in radians
center (tuple): unused
scale (float): x-Axis scaling factor
cval (int): Padding value
order (int): Interpolation order
Returns:
A tuple containing the transformation matrix and the rotated image.
Note: this function is much faster applied on PIL images than on numpy ndarrays.
"""
rows, cols = image.shape[0], image.shape[1]
tform1 = SimilarityTransform(translation=center)
tform2 = SimilarityTransform(rotation=angle)
tform3 = SimilarityTransform(translation=-center)
tform4 = AffineTransform(scale=(1/scale, 1))
tform = tform4 + tform3 + tform2 + tform1
if isinstance(image, Image.Image):
rows, cols = image.height, image.width
else:
rows, cols = image.shape[:2]
assert len(image.shape) == 3

tform = AffineTransform(rotation=angle, scale=(1/scale, 1))
corners = np.array([
[0, 0],
[0, rows - 1],
Expand All @@ -396,13 +412,25 @@ def _rotate(image, angle, center, scale, cval=0):
maxr = corners[:, 1].max()
out_rows = maxr - minr + 1
out_cols = maxc - minc + 1
output_shape = np.around((out_rows, out_cols))
output_shape = tuple(int(o) for o in np.around((out_rows, out_cols)))
# fit output image in new shape
translation = (minc, minr)
tform5 = SimilarityTransform(translation=translation)
tform = tform5 + tform
tform.params[2] = (0, 0, 1)
return tform, warp(image, tform, output_shape=output_shape, order=0, cval=cval, clip=False, preserve_range=True)
translation = tform([[minc, minr]])
tform = AffineTransform(rotation=angle, scale=(1/scale, 1), translation=[f for f in translation.flatten()])

if isinstance(image, Image.Image):
# PIL is much faster than scipy
pdata = tform.params.flatten().tolist()[:6]
resample = {0: Image.NEAREST, 1: Image.BILINEAR, 2: Image.BICUBIC, 3: Image.BICUBIC}.get(order, Image.NEAREST)
return tform, image.transform(output_shape[::-1], Image.AFFINE, data=pdata, resample=resample, fillcolor=cval)

# params for scipy
# swap X and Y axis for scipy
pdata = tform.params.copy()[[1, 0, 2], :][:, [1, 0, 2]]
# we copy the translation vector
offset = pdata[:2, 2].copy()
# scipy expects a 3x3 *linear* matrix (to include channel axis), we don't want the channel axis to be modified
pdata[:2, 2] = 0
return tform, affine_transform(image, pdata, offset=(*offset, 0), output_shape=(*output_shape, image.shape[2]), cval=cval, order=order)


def line_regions(line, regions):
Expand Down Expand Up @@ -457,7 +485,6 @@ def _calc_seam(baseline, polygon, angle, im_feats, bias=150):
level.
"""
MASK_VAL = 99999
r, c = draw.polygon(polygon[:, 1], polygon[:, 0])
c_min, c_max = int(polygon[:, 0].min()), int(polygon[:, 0].max())
r_min, r_max = int(polygon[:, 1].min()), int(polygon[:, 1].max())
patch = im_feats[r_min:r_max+2, c_min:c_max+2].copy()
Expand All @@ -471,8 +498,7 @@ def _calc_seam(baseline, polygon, angle, im_feats, bias=150):
mask[line_locs] = 0
dist_bias = distance_transform_cdt(mask)
# absolute mask
mask = np.ones_like(patch, dtype=bool)
mask[r-r_min, c-c_min] = False
mask = np.array(make_polygonal_mask(polygon-(r_min, c_min)), patch.shape[1::-1]) > 128
# dilate mask to compensate for aliasing during rotation
mask = binary_erosion(mask, border_value=True, iterations=2)
# combine weights with features
Expand Down Expand Up @@ -916,7 +942,8 @@ def scale_regions(regions: Sequence[Tuple[List[int], List[int]]],
return scaled_regions


def scale_polygonal_lines(lines: Sequence[Tuple[List, List]], scale: Union[float, Tuple[float, float]]) -> Sequence[Tuple[List, List]]:
def scale_polygonal_lines(lines: Sequence[Tuple[List, List]],
scale: Union[float, Tuple[float, float]]) -> Sequence[Tuple[List, List]]:
"""
Scales baselines/polygon coordinates by a certain factor.
Expand Down Expand Up @@ -1024,7 +1051,98 @@ def compute_polygon_section(baseline: Sequence[Tuple[int, int]],
return tuple(o)


def extract_polygons(im: Image.Image, bounds: 'Segmentation') -> Image.Image:
def _bevelled_warping_envelope(baseline: np.ndarray,
output_bl_start: Tuple[float, float],
output_shape: Tuple[int, int]) -> Tuple[List[Tuple[int, int]], List[Tuple[int, int]]]:
"""
Calculates the source and target envelope for a piecewise affine transform
"""
def _as_int_tuple(x):
return tuple(int(i) for i in x)

envelope_dy = [-output_bl_start[1], output_shape[0] - output_bl_start[1]]
diff_bl = np.diff(baseline, axis=0)
diff_bl_normed = diff_bl / np.linalg.norm(diff_bl, axis=1)[:, None]
l_bl = len(baseline)
cum_lens = np.cumsum([0] + np.linalg.norm(diff_bl, axis=1).tolist())

bl_seg_normals = np.array([-diff_bl_normed[:, 1], diff_bl_normed[:, 0]]).T
ini_point = baseline[0] - diff_bl_normed[0] * output_bl_start[0]
source_envelope = [
_as_int_tuple(ini_point + envelope_dy[0]*bl_seg_normals[0]),
_as_int_tuple(ini_point + envelope_dy[1]*bl_seg_normals[0]),
]
target_envelope = [
(0, 0),
(0, output_shape[0])
]
MAX_BEVEL_WIDTH = output_shape[0] / 3
BEVEL_STEP_WIDTH = MAX_BEVEL_WIDTH / 2

for k in range(l_bl-2):
pt = baseline[k+1]
seg_prev = baseline[k] - pt
seg_next = baseline[k+2] - pt
bevel_prev = seg_prev / max(2., np.linalg.norm(seg_prev) / MAX_BEVEL_WIDTH)
bevel_next = seg_next / max(2., np.linalg.norm(seg_next) / MAX_BEVEL_WIDTH)
bevel_nsteps = max(1, np.round((np.linalg.norm(bevel_prev) + np.linalg.norm(bevel_next)) / BEVEL_STEP_WIDTH))
l_prev = np.linalg.norm(bevel_prev)
l_next = np.linalg.norm(bevel_next)
for i in range(int(bevel_nsteps)+1):
# bezier interp
t = i / bevel_nsteps
tpt = pt + (1-t)**2 * bevel_prev + t**2 * bevel_next
tx = output_bl_start[0] + cum_lens[k+1] - (1-t)**2 * l_prev + t**2 * l_next
tnormal = (1-t) * bl_seg_normals[k] + t * bl_seg_normals[k+1]
tnormal /= np.linalg.norm(tnormal)
source_points = [_as_int_tuple(tpt + envelope_dy[0]*tnormal), _as_int_tuple(tpt + envelope_dy[1]*tnormal)]
target_points = [(int(tx), 0), (int(tx), output_shape[0])]
# avoid duplicate points leading to singularities
if source_points[0] == source_envelope[-2] or source_points[1] == source_envelope[-1] or target_points[0] == target_envelope[-2]:
continue
source_envelope += source_points
target_envelope += target_points

end_point = baseline[-1] + diff_bl_normed[-1]*(output_shape[1]-cum_lens[-1]-output_bl_start[0])
source_envelope += [
end_point + envelope_dy[0]*bl_seg_normals[-1],
end_point + envelope_dy[1]*bl_seg_normals[-1],
]
target_envelope += [
(output_shape[1], 0),
(output_shape[1], output_shape[0])
]
return source_envelope, target_envelope

def make_polygonal_mask(polygon: np.ndarray, shape: Tuple[int, int]) -> Image.Image:
"""
Creates a mask from a polygon.
Args:
polygon: A polygon as a list of points.
shape: The shape of the mask to create.
Returns:
A PIL.Image.Image instance containing the mask.
"""
mask = Image.new('L', shape, 0)
ImageDraw.Draw(mask).polygon([tuple(p) for p in polygon.astype(int).tolist()], fill=255, width=2)
return mask


def apply_polygonal_mask(img: Image.Image, polygon: np.ndarray, cval=0) -> Image.Image:
"""
Extract the polygonal mask of an image.
"""
mask = make_polygonal_mask(polygon, img.size)
out = Image.new(img.mode, (img.width, img.height), cval)
out.paste(img, mask=mask)
return out

def extract_polygons(im: Image.Image,
bounds: 'kraken.containers.Segmentation') -> Iterator[Tuple[Image.Image,
Union['kraken.containers.BBoxLine',
'kraken.containers.BaselineLine']]]:
"""
Yields the subimages of image im defined in the list of bounding polygons
with baselines preserving order.
Expand Down Expand Up @@ -1067,72 +1185,75 @@ def extract_polygons(im: Image.Image, bounds: 'Segmentation') -> Image.Image:
p_dir = np.mean(np.diff(baseline.T) * lengths/lengths.sum(), axis=1)
p_dir = (p_dir.T / np.sqrt(np.sum(p_dir**2, axis=-1)))
angle = np.arctan2(p_dir[1], p_dir[0])
patch = im[r_min:r_max+1, c_min:c_max+1].copy()
# crop out bounding box
patch = im.crop((c_min, r_min, c_max+1, r_max+1))
offset_polygon = pl - (c_min, r_min)
r, c = draw.polygon(offset_polygon[:, 1], offset_polygon[:, 0])
mask = np.zeros(patch.shape[:2], dtype=bool)
mask[r, c] = True
patch[mask != True] = 0
patch = apply_polygonal_mask(patch, offset_polygon, cval=0)
extrema = offset_polygon[(0, -1), :]
# scale line image to max 600 pixel width
tform, rotated_patch = _rotate(patch, angle, center=extrema[0], scale=1.0, cval=0)
i = Image.fromarray(rotated_patch.astype('uint8'))
tform, i = _rotate(patch, angle, center=extrema[0], scale=1.0, cval=0, order=order)
# normal slow path with piecewise affine transformation
else:
if len(pl) > 50:
pl = approximate_polygon(pl, 2)
full_polygon = subdivide_polygon(pl, preserve_ends=True)
pl = geom.MultiPoint(full_polygon)

bl = zip(baseline[:-1:], baseline[1::])
bl = [geom.LineString(x) for x in bl]
cum_lens = np.cumsum([0] + [line.length for line in bl])
# distance of intercept from start point and number of line segment
control_pts = []
for point in pl.geoms:
npoint = np.array(point.coords)[0]
line_idx, dist, intercept = min(((idx, line.project(point),
np.array(line.interpolate(line.project(point)).coords)) for idx, line in enumerate(bl)),
key=lambda x: np.linalg.norm(npoint-x[2]))
# absolute distance from start of line
line_dist = cum_lens[line_idx] + dist
intercept = np.array(intercept)
# side of line the point is at
side = np.linalg.det(np.array([[baseline[line_idx+1][0]-baseline[line_idx][0],
npoint[0]-baseline[line_idx][0]],
[baseline[line_idx+1][1]-baseline[line_idx][1],
npoint[1]-baseline[line_idx][1]]]))
side = np.sign(side)
# signed perpendicular distance from the rectified distance
per_dist = side * np.linalg.norm(npoint-intercept)
control_pts.append((line_dist, per_dist))
# calculate baseline destination points

# baseline segment vectors
diff_bl = np.diff(baseline, axis=0)
diff_bl_norms = np.linalg.norm(diff_bl, axis=1)
diff_bl_normed = diff_bl / diff_bl_norms[:, None]

l_poly = len(full_polygon)
cum_lens = np.cumsum([0] + np.linalg.norm(diff_bl, axis=1).tolist())

# calculate baseline destination points :
bl_dst_pts = baseline[0] + np.dstack((cum_lens, np.zeros_like(cum_lens)))[0]
# calculate bounding polygon destination points
pol_dst_pts = np.array([baseline[0] + (line_dist, per_dist) for line_dist, per_dist in control_pts])

# calculate bounding polygon destination points :
# difference between baselines and polygon
# diff[k, p] = baseline[k] - polygon[p]
poly_bl_diff = full_polygon[None, :] - baseline[:-1, None]
# local x coordinates of polygon points on baseline segments
# x[k, p] = (baseline[k] - polygon[p]) . (baseline[k+1] - baseline[k]) / |baseline[k+1] - baseline[k]|
poly_bl_x = np.einsum('kpm,km->kp', poly_bl_diff, diff_bl_normed)
# distance to baseline segments
poly_bl_segdist = np.maximum(-poly_bl_x, poly_bl_x - diff_bl_norms[:, None])
# closest baseline segment index
poly_closest_bl = np.argmin((poly_bl_segdist), axis=0)
poly_bl_x = poly_bl_x[poly_closest_bl, np.arange(l_poly)]
poly_bl_diff = poly_bl_diff[poly_closest_bl, np.arange(l_poly)]
# signed distance between polygon points and baseline segments (to get y coordinates)
poly_bl_y = np.cross(diff_bl_normed[poly_closest_bl], poly_bl_diff)
# final destination points
pol_dst_pts = np.array(
[cum_lens[poly_closest_bl] + poly_bl_x, poly_bl_y]
).T + baseline[:1]

# extract bounding box patch
c_dst_min, c_dst_max = int(pol_dst_pts[:, 0].min()), int(pol_dst_pts[:, 0].max())
r_dst_min, r_dst_max = int(pol_dst_pts[:, 1].min()), int(pol_dst_pts[:, 1].max())
output_shape = np.around((r_dst_max - r_dst_min + 1, c_dst_max - c_dst_min + 1))
patch = im[r_min:r_max+1, c_min:c_max+1].copy()
patch = im.crop((c_min, r_min, c_max+1, r_max+1))
# offset src points by patch shape
offset_polygon = full_polygon - (c_min, r_min)
offset_baseline = baseline - (c_min, r_min)
# offset dst point by dst polygon shape
offset_bl_dst_pts = bl_dst_pts - (c_dst_min, r_dst_min)
offset_pol_dst_pts = pol_dst_pts - (c_dst_min, r_dst_min)
# mask out points outside bounding polygon
mask = np.zeros(patch.shape[:2], dtype=bool)
r, c = draw.polygon(offset_polygon[:, 1], offset_polygon[:, 0])
mask[r, c] = True
patch[mask != True] = 0
# estimate piecewise transform
src_points = np.concatenate((offset_baseline, offset_polygon))
dst_points = np.concatenate((offset_bl_dst_pts, offset_pol_dst_pts))
tform = PiecewiseAffineTransform()
tform.estimate(src_points, dst_points)
o = warp(patch, tform.inverse, output_shape=output_shape, preserve_range=True, order=order)
i = Image.fromarray(o.astype('uint8'))
patch = apply_polygonal_mask(patch, offset_polygon, cval=0)

# estimate piecewise transform by beveling angles
source_envelope, target_envelope = _bevelled_warping_envelope(offset_baseline, offset_bl_dst_pts[0], output_shape)
# mesh for PIL, as (box, quad) tuples : box is (NW, SE) and quad is (NW, SW, SE, NE)
deform_mesh = [
(
(*target_envelope[i], *target_envelope[i+3]),
(*source_envelope[i], *source_envelope[i+1], *source_envelope[i+3], *source_envelope[i+2])
)
for i in range(0, len(source_envelope)-3, 2)
]
# warp
resample = {0: Image.NEAREST, 1: Image.BILINEAR, 2: Image.BICUBIC, 3: Image.BICUBIC}.get(order, Image.NEAREST)
i = patch.transform((output_shape[1], output_shape[0]), Image.MESH, data=deform_mesh, resample=resample)
yield i.crop(i.getbbox()), line
else:
if bounds.text_direction.startswith('vertical'):
Expand Down

0 comments on commit 4cee7f2

Please sign in to comment.