diff --git a/kraken/lib/segmentation.py b/kraken/lib/segmentation.py index efc35be8b..0ad11cb33 100644 --- a/kraken/lib/segmentation.py +++ b/kraken/lib/segmentation.py @@ -23,10 +23,10 @@ from collections import defaultdict -from PIL import Image +from PIL import Image, ImageDraw from scipy.signal import convolve2d -from scipy.ndimage import maximum_filter, binary_erosion, gaussian_filter, distance_transform_cdt +from scipy.ndimage import maximum_filter, binary_erosion, gaussian_filter, distance_transform_cdt, affine_transform from scipy.spatial.distance import pdist, squareform from shapely.ops import nearest_points, unary_union @@ -39,7 +39,11 @@ from skimage.morphology import skeletonize from skimage.transform import PiecewiseAffineTransform, SimilarityTransform, AffineTransform, warp +<<<<<<< HEAD from typing import List, Tuple, Union, Dict, Sequence, Optional, Literal, TYPE_CHECKING +======= +from typing import List, Tuple, Union, Dict, Any, Sequence, Optional, Literal, TypeVar, Iterator +>>>>>>> d4c8ee9 (rebase faster line extractor to main) from kraken.lib import default_specs from kraken.lib.exceptions import KrakenInputException @@ -369,20 +373,32 @@ def vectorize_regions(im: np.ndarray, threshold: float = 0.5): boundaries = [x.boundary.simplify(10) for x in boundaries.geoms] return [np.array(x.coords, dtype=np.uint)[:, [1, 0]].tolist() for x in boundaries] +_T_pil_or_np = TypeVar('_T_pil_or_np', Image.Image, np.ndarray) -def _rotate(image, angle, center, scale, cval=0): +def _rotate(image: _T_pil_or_np, angle: float, center: Any, scale: float, cval=0, order:int=0) -> Tuple[AffineTransform, _T_pil_or_np]: """ - Rotate function taken mostly from scikit image. Main difference is that - this one allows dimensional scaling and records the final translation - to ensure no image content is lost. This is needed to rotate the seam - back into the original image. + Rotate an image at an angle with optional scaling + + Args: + image (PIL.Image.Image or (H, W, C) np.ndarray): Input image + angle (float): Angle in radians + center (tuple): unused + scale (float): x-Axis scaling factor + cval (int): Padding value + order (int): Interpolation order + + Returns: + A tuple containing the transformation matrix and the rotated image. + + Note: this function is much faster applied on PIL images than on numpy ndarrays. """ - rows, cols = image.shape[0], image.shape[1] - tform1 = SimilarityTransform(translation=center) - tform2 = SimilarityTransform(rotation=angle) - tform3 = SimilarityTransform(translation=-center) - tform4 = AffineTransform(scale=(1/scale, 1)) - tform = tform4 + tform3 + tform2 + tform1 + if isinstance(image, Image.Image): + rows, cols = image.height, image.width + else: + rows, cols = image.shape[:2] + assert len(image.shape) == 3 + + tform = AffineTransform(rotation=angle, scale=(1/scale, 1)) corners = np.array([ [0, 0], [0, rows - 1], @@ -396,13 +412,25 @@ def _rotate(image, angle, center, scale, cval=0): maxr = corners[:, 1].max() out_rows = maxr - minr + 1 out_cols = maxc - minc + 1 - output_shape = np.around((out_rows, out_cols)) + output_shape = tuple(int(o) for o in np.around((out_rows, out_cols))) # fit output image in new shape - translation = (minc, minr) - tform5 = SimilarityTransform(translation=translation) - tform = tform5 + tform - tform.params[2] = (0, 0, 1) - return tform, warp(image, tform, output_shape=output_shape, order=0, cval=cval, clip=False, preserve_range=True) + translation = tform([[minc, minr]]) + tform = AffineTransform(rotation=angle, scale=(1/scale, 1), translation=[f for f in translation.flatten()]) + + if isinstance(image, Image.Image): + # PIL is much faster than scipy + pdata = tform.params.flatten().tolist()[:6] + resample = {0: Image.NEAREST, 1: Image.BILINEAR, 2: Image.BICUBIC, 3: Image.BICUBIC}.get(order, Image.NEAREST) + return tform, image.transform(output_shape[::-1], Image.AFFINE, data=pdata, resample=resample, fillcolor=cval) + + # params for scipy + # swap X and Y axis for scipy + pdata = tform.params.copy()[[1, 0, 2], :][:, [1, 0, 2]] + # we copy the translation vector + offset = pdata[:2, 2].copy() + # scipy expects a 3x3 *linear* matrix (to include channel axis), we don't want the channel axis to be modified + pdata[:2, 2] = 0 + return tform, affine_transform(image, pdata, offset=(*offset, 0), output_shape=(*output_shape, image.shape[2]), cval=cval, order=order) def line_regions(line, regions): @@ -457,7 +485,6 @@ def _calc_seam(baseline, polygon, angle, im_feats, bias=150): level. """ MASK_VAL = 99999 - r, c = draw.polygon(polygon[:, 1], polygon[:, 0]) c_min, c_max = int(polygon[:, 0].min()), int(polygon[:, 0].max()) r_min, r_max = int(polygon[:, 1].min()), int(polygon[:, 1].max()) patch = im_feats[r_min:r_max+2, c_min:c_max+2].copy() @@ -471,8 +498,7 @@ def _calc_seam(baseline, polygon, angle, im_feats, bias=150): mask[line_locs] = 0 dist_bias = distance_transform_cdt(mask) # absolute mask - mask = np.ones_like(patch, dtype=bool) - mask[r-r_min, c-c_min] = False + mask = np.array(make_polygonal_mask(polygon-(r_min, c_min)), patch.shape[1::-1]) > 128 # dilate mask to compensate for aliasing during rotation mask = binary_erosion(mask, border_value=True, iterations=2) # combine weights with features @@ -916,7 +942,8 @@ def scale_regions(regions: Sequence[Tuple[List[int], List[int]]], return scaled_regions -def scale_polygonal_lines(lines: Sequence[Tuple[List, List]], scale: Union[float, Tuple[float, float]]) -> Sequence[Tuple[List, List]]: +def scale_polygonal_lines(lines: Sequence[Tuple[List, List]], + scale: Union[float, Tuple[float, float]]) -> Sequence[Tuple[List, List]]: """ Scales baselines/polygon coordinates by a certain factor. @@ -1024,7 +1051,98 @@ def compute_polygon_section(baseline: Sequence[Tuple[int, int]], return tuple(o) -def extract_polygons(im: Image.Image, bounds: 'Segmentation') -> Image.Image: +def _bevelled_warping_envelope(baseline: np.ndarray, + output_bl_start: Tuple[float, float], + output_shape: Tuple[int, int]) -> Tuple[List[Tuple[int, int]], List[Tuple[int, int]]]: + """ + Calculates the source and target envelope for a piecewise affine transform + """ + def _as_int_tuple(x): + return tuple(int(i) for i in x) + + envelope_dy = [-output_bl_start[1], output_shape[0] - output_bl_start[1]] + diff_bl = np.diff(baseline, axis=0) + diff_bl_normed = diff_bl / np.linalg.norm(diff_bl, axis=1)[:, None] + l_bl = len(baseline) + cum_lens = np.cumsum([0] + np.linalg.norm(diff_bl, axis=1).tolist()) + + bl_seg_normals = np.array([-diff_bl_normed[:, 1], diff_bl_normed[:, 0]]).T + ini_point = baseline[0] - diff_bl_normed[0] * output_bl_start[0] + source_envelope = [ + _as_int_tuple(ini_point + envelope_dy[0]*bl_seg_normals[0]), + _as_int_tuple(ini_point + envelope_dy[1]*bl_seg_normals[0]), + ] + target_envelope = [ + (0, 0), + (0, output_shape[0]) + ] + MAX_BEVEL_WIDTH = output_shape[0] / 3 + BEVEL_STEP_WIDTH = MAX_BEVEL_WIDTH / 2 + + for k in range(l_bl-2): + pt = baseline[k+1] + seg_prev = baseline[k] - pt + seg_next = baseline[k+2] - pt + bevel_prev = seg_prev / max(2., np.linalg.norm(seg_prev) / MAX_BEVEL_WIDTH) + bevel_next = seg_next / max(2., np.linalg.norm(seg_next) / MAX_BEVEL_WIDTH) + bevel_nsteps = max(1, np.round((np.linalg.norm(bevel_prev) + np.linalg.norm(bevel_next)) / BEVEL_STEP_WIDTH)) + l_prev = np.linalg.norm(bevel_prev) + l_next = np.linalg.norm(bevel_next) + for i in range(int(bevel_nsteps)+1): + # bezier interp + t = i / bevel_nsteps + tpt = pt + (1-t)**2 * bevel_prev + t**2 * bevel_next + tx = output_bl_start[0] + cum_lens[k+1] - (1-t)**2 * l_prev + t**2 * l_next + tnormal = (1-t) * bl_seg_normals[k] + t * bl_seg_normals[k+1] + tnormal /= np.linalg.norm(tnormal) + source_points = [_as_int_tuple(tpt + envelope_dy[0]*tnormal), _as_int_tuple(tpt + envelope_dy[1]*tnormal)] + target_points = [(int(tx), 0), (int(tx), output_shape[0])] + # avoid duplicate points leading to singularities + if source_points[0] == source_envelope[-2] or source_points[1] == source_envelope[-1] or target_points[0] == target_envelope[-2]: + continue + source_envelope += source_points + target_envelope += target_points + + end_point = baseline[-1] + diff_bl_normed[-1]*(output_shape[1]-cum_lens[-1]-output_bl_start[0]) + source_envelope += [ + end_point + envelope_dy[0]*bl_seg_normals[-1], + end_point + envelope_dy[1]*bl_seg_normals[-1], + ] + target_envelope += [ + (output_shape[1], 0), + (output_shape[1], output_shape[0]) + ] + return source_envelope, target_envelope + +def make_polygonal_mask(polygon: np.ndarray, shape: Tuple[int, int]) -> Image.Image: + """ + Creates a mask from a polygon. + + Args: + polygon: A polygon as a list of points. + shape: The shape of the mask to create. + + Returns: + A PIL.Image.Image instance containing the mask. + """ + mask = Image.new('L', shape, 0) + ImageDraw.Draw(mask).polygon([tuple(p) for p in polygon.astype(int).tolist()], fill=255, width=2) + return mask + + +def apply_polygonal_mask(img: Image.Image, polygon: np.ndarray, cval=0) -> Image.Image: + """ + Extract the polygonal mask of an image. + """ + mask = make_polygonal_mask(polygon, img.size) + out = Image.new(img.mode, (img.width, img.height), cval) + out.paste(img, mask=mask) + return out + +def extract_polygons(im: Image.Image, + bounds: 'kraken.containers.Segmentation') -> Iterator[Tuple[Image.Image, + Union['kraken.containers.BBoxLine', + 'kraken.containers.BaselineLine']]]: """ Yields the subimages of image im defined in the list of bounding polygons with baselines preserving order. @@ -1067,72 +1185,75 @@ def extract_polygons(im: Image.Image, bounds: 'Segmentation') -> Image.Image: p_dir = np.mean(np.diff(baseline.T) * lengths/lengths.sum(), axis=1) p_dir = (p_dir.T / np.sqrt(np.sum(p_dir**2, axis=-1))) angle = np.arctan2(p_dir[1], p_dir[0]) - patch = im[r_min:r_max+1, c_min:c_max+1].copy() + # crop out bounding box + patch = im.crop((c_min, r_min, c_max+1, r_max+1)) offset_polygon = pl - (c_min, r_min) - r, c = draw.polygon(offset_polygon[:, 1], offset_polygon[:, 0]) - mask = np.zeros(patch.shape[:2], dtype=bool) - mask[r, c] = True - patch[mask != True] = 0 + patch = apply_polygonal_mask(patch, offset_polygon, cval=0) extrema = offset_polygon[(0, -1), :] - # scale line image to max 600 pixel width - tform, rotated_patch = _rotate(patch, angle, center=extrema[0], scale=1.0, cval=0) - i = Image.fromarray(rotated_patch.astype('uint8')) + tform, i = _rotate(patch, angle, center=extrema[0], scale=1.0, cval=0, order=order) # normal slow path with piecewise affine transformation else: if len(pl) > 50: pl = approximate_polygon(pl, 2) full_polygon = subdivide_polygon(pl, preserve_ends=True) - pl = geom.MultiPoint(full_polygon) - - bl = zip(baseline[:-1:], baseline[1::]) - bl = [geom.LineString(x) for x in bl] - cum_lens = np.cumsum([0] + [line.length for line in bl]) - # distance of intercept from start point and number of line segment - control_pts = [] - for point in pl.geoms: - npoint = np.array(point.coords)[0] - line_idx, dist, intercept = min(((idx, line.project(point), - np.array(line.interpolate(line.project(point)).coords)) for idx, line in enumerate(bl)), - key=lambda x: np.linalg.norm(npoint-x[2])) - # absolute distance from start of line - line_dist = cum_lens[line_idx] + dist - intercept = np.array(intercept) - # side of line the point is at - side = np.linalg.det(np.array([[baseline[line_idx+1][0]-baseline[line_idx][0], - npoint[0]-baseline[line_idx][0]], - [baseline[line_idx+1][1]-baseline[line_idx][1], - npoint[1]-baseline[line_idx][1]]])) - side = np.sign(side) - # signed perpendicular distance from the rectified distance - per_dist = side * np.linalg.norm(npoint-intercept) - control_pts.append((line_dist, per_dist)) - # calculate baseline destination points + + # baseline segment vectors + diff_bl = np.diff(baseline, axis=0) + diff_bl_norms = np.linalg.norm(diff_bl, axis=1) + diff_bl_normed = diff_bl / diff_bl_norms[:, None] + + l_poly = len(full_polygon) + cum_lens = np.cumsum([0] + np.linalg.norm(diff_bl, axis=1).tolist()) + + # calculate baseline destination points : bl_dst_pts = baseline[0] + np.dstack((cum_lens, np.zeros_like(cum_lens)))[0] - # calculate bounding polygon destination points - pol_dst_pts = np.array([baseline[0] + (line_dist, per_dist) for line_dist, per_dist in control_pts]) + + # calculate bounding polygon destination points : + # difference between baselines and polygon + # diff[k, p] = baseline[k] - polygon[p] + poly_bl_diff = full_polygon[None, :] - baseline[:-1, None] + # local x coordinates of polygon points on baseline segments + # x[k, p] = (baseline[k] - polygon[p]) . (baseline[k+1] - baseline[k]) / |baseline[k+1] - baseline[k]| + poly_bl_x = np.einsum('kpm,km->kp', poly_bl_diff, diff_bl_normed) + # distance to baseline segments + poly_bl_segdist = np.maximum(-poly_bl_x, poly_bl_x - diff_bl_norms[:, None]) + # closest baseline segment index + poly_closest_bl = np.argmin((poly_bl_segdist), axis=0) + poly_bl_x = poly_bl_x[poly_closest_bl, np.arange(l_poly)] + poly_bl_diff = poly_bl_diff[poly_closest_bl, np.arange(l_poly)] + # signed distance between polygon points and baseline segments (to get y coordinates) + poly_bl_y = np.cross(diff_bl_normed[poly_closest_bl], poly_bl_diff) + # final destination points + pol_dst_pts = np.array( + [cum_lens[poly_closest_bl] + poly_bl_x, poly_bl_y] + ).T + baseline[:1] + # extract bounding box patch c_dst_min, c_dst_max = int(pol_dst_pts[:, 0].min()), int(pol_dst_pts[:, 0].max()) r_dst_min, r_dst_max = int(pol_dst_pts[:, 1].min()), int(pol_dst_pts[:, 1].max()) output_shape = np.around((r_dst_max - r_dst_min + 1, c_dst_max - c_dst_min + 1)) - patch = im[r_min:r_max+1, c_min:c_max+1].copy() + patch = im.crop((c_min, r_min, c_max+1, r_max+1)) # offset src points by patch shape offset_polygon = full_polygon - (c_min, r_min) offset_baseline = baseline - (c_min, r_min) # offset dst point by dst polygon shape offset_bl_dst_pts = bl_dst_pts - (c_dst_min, r_dst_min) - offset_pol_dst_pts = pol_dst_pts - (c_dst_min, r_dst_min) # mask out points outside bounding polygon - mask = np.zeros(patch.shape[:2], dtype=bool) - r, c = draw.polygon(offset_polygon[:, 1], offset_polygon[:, 0]) - mask[r, c] = True - patch[mask != True] = 0 - # estimate piecewise transform - src_points = np.concatenate((offset_baseline, offset_polygon)) - dst_points = np.concatenate((offset_bl_dst_pts, offset_pol_dst_pts)) - tform = PiecewiseAffineTransform() - tform.estimate(src_points, dst_points) - o = warp(patch, tform.inverse, output_shape=output_shape, preserve_range=True, order=order) - i = Image.fromarray(o.astype('uint8')) + patch = apply_polygonal_mask(patch, offset_polygon, cval=0) + + # estimate piecewise transform by beveling angles + source_envelope, target_envelope = _bevelled_warping_envelope(offset_baseline, offset_bl_dst_pts[0], output_shape) + # mesh for PIL, as (box, quad) tuples : box is (NW, SE) and quad is (NW, SW, SE, NE) + deform_mesh = [ + ( + (*target_envelope[i], *target_envelope[i+3]), + (*source_envelope[i], *source_envelope[i+1], *source_envelope[i+3], *source_envelope[i+2]) + ) + for i in range(0, len(source_envelope)-3, 2) + ] + # warp + resample = {0: Image.NEAREST, 1: Image.BILINEAR, 2: Image.BICUBIC, 3: Image.BICUBIC}.get(order, Image.NEAREST) + i = patch.transform((output_shape[1], output_shape[0]), Image.MESH, data=deform_mesh, resample=resample) yield i.crop(i.getbbox()), line else: if bounds.text_direction.startswith('vertical'):