From fe925a02a90120e0d0b8ccdccdbffa6294d7a928 Mon Sep 17 00:00:00 2001 From: bertsky Date: Wed, 11 Sep 2019 18:23:41 +0200 Subject: [PATCH 01/13] Workspace.image_from_*: when rotation is necessary, do not always fill with white; instead, determine the background color by median, and only use white for binary images; moreover, add a transparency channel if the input mode allows it --- ocrd/ocrd/workspace.py | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 24a07b5ff..db490c028 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -3,7 +3,7 @@ from pathlib import Path import cv2 -from PIL import Image +from PIL import Image, ImageStat import numpy as np from atomicwrites import atomic_write from deprecated.sphinx import deprecated @@ -359,12 +359,24 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='') if (page_xywh['angle'] and not 'deskewed' in page_xywh['features'] and not 'deskewed' in feature_filter.split(',')): - log.info("Rotating AlternativeImage for page '%s' by %.2f°", - page_id, page_xywh['angle']) + log.info("Rotating %s for page '%s' by %.2f°", + "AlternativeImage" if alternative_image else + "image", page_id, page_xywh['angle']) + if page_image.mode in ['RGB', 'L']: + # ensure no information is lost by adding transparency + # (which rotation will respect): + page_image.putalpha(255) + background = ImageStat.Stat(page_image).median[0] page_image = page_image.rotate(page_xywh['angle'], expand=True, #resample=Image.BILINEAR, - fillcolor='white') + fillcolor=( + # background detection by median can fail + # if segments are very small or have lots + # of image foreground; if we already know + # this is binarized, fill with white: + 'white' if page_image.mode == '1' else + background)) page_xywh['features'] += ',deskewed' # verify constraints again: if not all(feature in page_xywh['features'] @@ -495,12 +507,24 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto segment_xywh['angle'] and not 'deskewed' in segment_xywh['features'] and not 'deskewed' in feature_filter.split(',')): - log.info("Rotating AlternativeImage for segment '%s' by %.2f°", - segment.id, segment_xywh['angle']) + log.info("Rotating %s for segment '%s' by %.2f°", + "AlternativeImage" if alternative_image else + "image", segment.id, segment_xywh['angle']) + if segment_image.mode in ['RGB', 'L']: + # ensure no information is lost by adding transparency + # (which rotation will respect): + segment_image.putalpha(255) + background = ImageStat.Stat(segment_image).median[0] segment_image = segment_image.rotate(segment_xywh['angle'], expand=True, #resample=Image.BILINEAR, - fillcolor='white') + fillcolor=( + # background detection by median can fail + # if segments are very small or have lots + # of image foreground; if we already know + # this is binarized, fill with white: + 'white' if page_image.mode == '1' else + background)) segment_xywh['features'] += ',deskewed' # verify constraints again: if not all(feature in segment_xywh['features'] From a10c9f21382d8ccc904e16812d53bed5d694d3a5 Mon Sep 17 00:00:00 2001 From: bertsky Date: Wed, 11 Sep 2019 22:35:15 +0200 Subject: [PATCH 02/13] Workspace.image_from_*: when cropping is necessary (image_from_polygon), keep the input image mode; moreover, add a transparency channel if the input image allows it --- ocrd/ocrd/workspace.py | 4 +++- ocrd_utils/ocrd_utils/__init__.py | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index db490c028..30a69fe45 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -343,7 +343,9 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='') if (border and not 'cropped' in page_xywh['features'] and not 'cropped' in feature_filter.split(',')): - log.debug('Cropping to border') + log.debug("Cropping %s for page '%s' to border", + "AlternativeImage" if alternative_image else + "image", page_id) # get polygon outline of page border: page_polygon = np.array(polygon_from_points(page_points)) # create a mask from the page polygon: diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 3e549e234..a4fe7c8d8 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -320,7 +320,12 @@ def image_from_polygon(image, polygon): # background = np.median(array, axis=[0, 1], keepdims=True) # array = np.broadcast_to(background.astype(np.uint8), array.shape) background = ImageStat.Stat(image).median[0] - new_image = Image.new('L', image.size, background) + new_image = Image.new(image.mode, image.size, background) + if image.mode in ['RGB', 'L']: + # ensure no information is lost by adding transparency + # (so we do not have to rely on background estimation): + image.putalpha(mask) + return image new_image.paste(image, mask=mask) return new_image From 440863fcd1a9098933dadea8e04517dd4154d91c Mon Sep 17 00:00:00 2001 From: bertsky Date: Thu, 12 Sep 2019 09:37:28 +0200 Subject: [PATCH 03/13] fix debug message --- ocrd/ocrd/workspace.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 30a69fe45..8bdbcee45 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -314,6 +314,7 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='') # initialize AlternativeImage@comments classes as empty: page_xywh['features'] = '' + alternative_image = None alternative_images = page.get_AlternativeImage() if alternative_images: # (e.g. from page-level cropping, binarization, deskewing or despeckling) @@ -479,6 +480,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto # initialize AlternativeImage@comments classes from parent: segment_xywh['features'] = parent_xywh['features'] + ',cropped' + alternative_image = None alternative_images = segment.get_AlternativeImage() if alternative_images: # (e.g. from segment-level cropping, binarization, deskewing or despeckling) From d4b46a32a595fcd3372ab7ad7b108e6cdd9975f3 Mon Sep 17 00:00:00 2001 From: Robert Schubert Date: Fri, 20 Sep 2019 10:26:02 +0200 Subject: [PATCH 04/13] cropping/rotation: caller can opt out of transparency: - image_from_page, image_from_segment, image_from_polygon: add parameter ``fill`` - possible values white/background/transparent, with ``transparent`` (behaviour introduced by this branch) as default --- CHANGELOG.md | 6 ++- ocrd/ocrd/workspace.py | 80 +++++++++++++++++++------------ ocrd_utils/ocrd_utils/__init__.py | 44 +++++++++-------- 3 files changed, 78 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 063cdebc0..9f6548480 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,9 +5,11 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +* image_from_page etc: allow filling with background or transparency + ## [1.0.0b19] - 2019-09-10 -* image_from_page: allow filtering by feature (@comment), #294 +* image_from_page etc: allow filtering by feature (@comments), #294 ## [1.0.0b18] - 2019-09-06 @@ -25,7 +27,7 @@ Fixed: * Processor: `chdir` to workspace directory on init so relative files resolve properly * typos in docstrings * README: 'module' -> 'package' - * workspace.image_from_page: logic with rotation/angle + * workspace.image_from_page etc: logic with rotation/angle * Adapted test suite to OCR-D/assets now with file extensions Added: diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 8bdbcee45..ee32d73df 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -249,7 +249,7 @@ def _resolve_image_as_pil(self, image_url, coords=None): ] return Image.fromarray(region_cut) - def image_from_page(self, page, page_id, feature_selector='', feature_filter=''): + def image_from_page(self, page, page_id, fill='transparent', feature_selector='', feature_filter=''): """Extract a Page image from the workspace. Given a PageType object, ``page``, extract its PIL.Image from @@ -267,7 +267,15 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='') chosen image does not have "deskewed", but an @orientation exists, then rotate it (unless "deskewed" is also being filtered). - Cropping uses a polygon mask (not just the rectangle). + Cropping uses a polygon mask (not just the rectangle). Areas outside + the polygon (regardless of cropping and deskewing) will be filled + according to ``fill``: + - if ``background`` (the default), then fill with the median color + of the image; + - if ``white``, then fill with white; + - if ``transparent``, then add a transparency channel which is + fully opaque before cropping and rotating (thus only the exposed + areas will be transparent afterwards). (Required and produced features need not be in the same order, so ``feature_selector`` is merely a mask specifying Boolean AND, and @@ -350,7 +358,7 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='') # get polygon outline of page border: page_polygon = np.array(polygon_from_points(page_points)) # create a mask from the page polygon: - page_image = image_from_polygon(page_image, page_polygon) + page_image = image_from_polygon(page_image, page_polygon, fill=fill) # recrop into page rectangle: page_image = crop_image(page_image, box=(page_xywh['x'], @@ -365,21 +373,21 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='') log.info("Rotating %s for page '%s' by %.2f°", "AlternativeImage" if alternative_image else "image", page_id, page_xywh['angle']) - if page_image.mode in ['RGB', 'L']: - # ensure no information is lost by adding transparency - # (which rotation will respect): - page_image.putalpha(255) - background = ImageStat.Stat(page_image).median[0] + if fill == 'transparent': + if page_image.mode in ['RGB', 'L']: + # ensure no information is lost by adding transparency channel + # initialized to fully opaque (so cropping and rotation will + # expose areas as transparent): + page_image.putalpha(255) + background = 0 + elif fill == 'background': + background = ImageStat.Stat(page_image).median[0] + else: + background = 'white' page_image = page_image.rotate(page_xywh['angle'], expand=True, #resample=Image.BILINEAR, - fillcolor=( - # background detection by median can fail - # if segments are very small or have lots - # of image foreground; if we already know - # this is binarized, fill with white: - 'white' if page_image.mode == '1' else - background)) + fillcolor=background) page_xywh['features'] += ',deskewed' # verify constraints again: if not all(feature in page_xywh['features'] @@ -395,9 +403,10 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='') # subtract offset from any increase in binary region size over source: page_xywh['x'] -= round(0.5 * max(0, page_image.width - page_xywh['w'])) page_xywh['y'] -= round(0.5 * max(0, page_image.height - page_xywh['h'])) + page_image.format = 'PNG' # workaround for tesserocr#194 return page_image, page_xywh, page_image_info - def image_from_segment(self, segment, parent_image, parent_xywh, feature_selector='', feature_filter=''): + def image_from_segment(self, segment, parent_image, parent_xywh, fill='transparent', feature_selector='', feature_filter=''): """Extract a segment image from its parent's image. Given a PIL.Image of the parent, ``parent_image``, with its @@ -421,8 +430,16 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto the segment coordinates in an inverse transformation (i.e. translation to center, passive rotation, re-translation). - Cropping uses a polygon mask (not just the rectangle). - + Cropping uses a polygon mask (not just the rectangle). Areas outside + the polygon (regardless of cropping and deskewing) will be filled + according to ``fill``: + - if ``background`` (the default), then fill with the median color + of the image; + - if ``white``, then fill with white; + - if ``transparent``, then add a transparency channel which is + fully opaque before cropping and rotating (thus only the exposed + areas will be transparent afterwards). + (Required and produced features need not be in the same order, so ``feature_selector`` is merely a mask specifying Boolean AND, and ``feature_filter`` is merely a mask specifying Boolean OR.) @@ -466,7 +483,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto # get polygon outline of segment relative to parent image: segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh) # create a mask from the segment polygon: - segment_image = image_from_polygon(parent_image, segment_polygon) + segment_image = image_from_polygon(parent_image, segment_polygon, fill=fill) # recrop into segment rectangle: segment_image = crop_image(segment_image, box=(segment_xywh['x'] - parent_xywh['x'], @@ -514,21 +531,21 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto log.info("Rotating %s for segment '%s' by %.2f°", "AlternativeImage" if alternative_image else "image", segment.id, segment_xywh['angle']) - if segment_image.mode in ['RGB', 'L']: - # ensure no information is lost by adding transparency - # (which rotation will respect): - segment_image.putalpha(255) - background = ImageStat.Stat(segment_image).median[0] + if fill == 'transparent': + if segment_image.mode in ['RGB', 'L']: + # ensure no information is lost by adding transparency channel + # initialized to fully opaque (so cropping and rotation will + # expose areas as transparent): + segment_image.putalpha(255) + background = 0 + elif fill == 'background': + background = ImageStat.Stat(segment_image).median[0] + else: + background = 'white' segment_image = segment_image.rotate(segment_xywh['angle'], expand=True, #resample=Image.BILINEAR, - fillcolor=( - # background detection by median can fail - # if segments are very small or have lots - # of image foreground; if we already know - # this is binarized, fill with white: - 'white' if page_image.mode == '1' else - background)) + fillcolor=background) segment_xywh['features'] += ',deskewed' # verify constraints again: if not all(feature in segment_xywh['features'] @@ -546,6 +563,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto segment_image.width - segment_xywh['w'])) segment_xywh['y'] -= round(0.5 * max(0, segment_image.height - segment_xywh['h'])) + segment_image.format = 'PNG' # workaround for tesserocr#194 return segment_image, segment_xywh # pylint: disable=redefined-builtin diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index a4fe7c8d8..a3c0e8305 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -104,7 +104,7 @@ import contextlib import numpy as np -from PIL import Image, ImageStat, ImageDraw +from PIL import Image, ImageStat, ImageDraw, ImageChops import logging from .logging import * # pylint: disable=wildcard-import @@ -301,31 +301,37 @@ def get_local_filename(url, start=None): url = url[len(start):] return url - -def image_from_polygon(image, polygon): +def image_from_polygon(image, polygon, fill='transparent'): """"Mask an image with a polygon. Given a PIL.Image ``image`` and a numpy array ``polygon`` - of relative coordinates into the image, put everything - outside the polygon hull to the background. Since ``image`` - is not necessarily binarized yet, determine the background - from the median color (instead of white). - + of relative coordinates into the image, fill everything + outside the polygon hull to a color according to ``fill``: + - if ``background`` (the default), then use the median color + of the image; + - if ``white``, then use white; + - if ``transparent``, then add a transparency channel from + the polygon mask (i.e. everything outside the polygon will + be transparent). + Return a new PIL.Image. """ mask = polygon_mask(image, polygon) - # create a background image from its median color - # (in case it has not been binarized yet): - # array = np.asarray(image) - # background = np.median(array, axis=[0, 1], keepdims=True) - # array = np.broadcast_to(background.astype(np.uint8), array.shape) - background = ImageStat.Stat(image).median[0] + if fill == 'transparent' and image.mode in ['RGB', 'L', 'RGBA', 'LA']: + # ensure no information is lost by adding transparency channel + # initialized to fully transparent outside the mask + # (so consumers do not have to rely on background estimation): + # ensure transparency maximizes (i.e. parent mask AND mask): + if image.mode in ['RGBA', 'LA']: + mask = ImageChops.darker(mask, image.getchannel('A')) # min opaque + new_image = image.copy() + new_image.putalpha(mask) + return new_image + if fill == 'background': + background = ImageStat.Stat(image).median[0] + else: + background = 'white' new_image = Image.new(image.mode, image.size, background) - if image.mode in ['RGB', 'L']: - # ensure no information is lost by adding transparency - # (so we do not have to rely on background estimation): - image.putalpha(mask) - return image new_image.paste(image, mask=mask) return new_image From 9f55b2771e67f05dc0c38548dea08ef6fb899938 Mon Sep 17 00:00:00 2001 From: Robert Schubert Date: Fri, 20 Sep 2019 10:26:02 +0200 Subject: [PATCH 05/13] cropping/rotation: caller must opt into transparency: - image_from_page, image_from_segment, image_from_polygon: add parameter ``transparency``, independent of ``fill`` - an alpha channel with the mask will be added iff ``transparency``, colour in ``fill`` will be used regardless (for consumers which cannot handle alpha channels) --- ocrd/ocrd/workspace.py | 92 ++++++++++++++++--------------- ocrd_utils/ocrd_utils/__init__.py | 30 +++++----- 2 files changed, 63 insertions(+), 59 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index ee32d73df..955a2ba59 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -249,43 +249,46 @@ def _resolve_image_as_pil(self, image_url, coords=None): ] return Image.fromarray(region_cut) - def image_from_page(self, page, page_id, fill='transparent', feature_selector='', feature_filter=''): + def image_from_page(self, page, page_id, + fill='background', transparency=False, + feature_selector='', feature_filter=''): """Extract a Page image from the workspace. - + Given a PageType object, ``page``, extract its PIL.Image from AlternativeImage if it exists. Otherwise extract the PIL.Image from imageFilename. Also crop it if a Border exists, and rotate it if an @orientation exists. Otherwise just return it. - + If ``feature_selector`` and/or ``feature_filter`` is given, then select/filter among imageFilename and all AlternativeImages the last which contains all of the selected but none of the filtered features (i.e. @comments classes), or raise an error. - + If the chosen image does not have "cropped", but a Border exists, then crop it (unless "cropped" is also being filtered). And if the chosen image does not have "deskewed", but an @orientation exists, then rotate it (unless "deskewed" is also being filtered). - + Cropping uses a polygon mask (not just the rectangle). Areas outside the polygon (regardless of cropping and deskewing) will be filled according to ``fill``: - if ``background`` (the default), then fill with the median color of the image; - - if ``white``, then fill with white; - - if ``transparent``, then add a transparency channel which is - fully opaque before cropping and rotating (thus only the exposed - areas will be transparent afterwards). - + - if ``white``, then fill with white. + Moreover, ``transparency`` is true, then add an alpha channel which + is fully opaque before cropping and rotating (thus only the exposed + areas will be transparent afterwards, for those that can interpret + alpha channels). + (Required and produced features need not be in the same order, so ``feature_selector`` is merely a mask specifying Boolean AND, and ``feature_filter`` is merely a mask specifying Boolean OR.) - + If the resulting page image is larger than the bounding box of ``page``, then in the returned bounding box, reduce the offset by half the width/height difference (so consumers being passed this image and offset will still crop relative to the original center). - + Return a tuple: * the extracted image, * a dictionary with the absolute coordinates of the page's @@ -295,7 +298,7 @@ def image_from_page(self, page, page_id, fill='transparent', feature_selector='' * an OcrdExif instance associated with the original image. (The first two can be used to annotate a new AlternativeImage, or pass down with ``image_from_segment``.) - + Example: * get a raw (colored) but already deskewed and cropped image: ``page_image, page_xywh, page_image_info = workspace.image_from_page( @@ -358,7 +361,8 @@ def image_from_page(self, page, page_id, fill='transparent', feature_selector='' # get polygon outline of page border: page_polygon = np.array(polygon_from_points(page_points)) # create a mask from the page polygon: - page_image = image_from_polygon(page_image, page_polygon, fill=fill) + page_image = image_from_polygon(page_image, page_polygon, + fill=fill, transparency=transparency) # recrop into page rectangle: page_image = crop_image(page_image, box=(page_xywh['x'], @@ -373,17 +377,15 @@ def image_from_page(self, page, page_id, fill='transparent', feature_selector='' log.info("Rotating %s for page '%s' by %.2f°", "AlternativeImage" if alternative_image else "image", page_id, page_xywh['angle']) - if fill == 'transparent': - if page_image.mode in ['RGB', 'L']: - # ensure no information is lost by adding transparency channel - # initialized to fully opaque (so cropping and rotation will - # expose areas as transparent): - page_image.putalpha(255) - background = 0 - elif fill == 'background': + if fill == 'background': background = ImageStat.Stat(page_image).median[0] else: background = 'white' + if transparency and page_image.mode in ['RGB', 'L']: + # ensure no information is lost by adding transparency channel + # initialized to fully opaque (so cropping and rotation will + # expose areas as transparent): + page_image.putalpha(255) page_image = page_image.rotate(page_xywh['angle'], expand=True, #resample=Image.BILINEAR, @@ -406,49 +408,52 @@ def image_from_page(self, page, page_id, fill='transparent', feature_selector='' page_image.format = 'PNG' # workaround for tesserocr#194 return page_image, page_xywh, page_image_info - def image_from_segment(self, segment, parent_image, parent_xywh, fill='transparent', feature_selector='', feature_filter=''): + def image_from_segment(self, segment, parent_image, parent_xywh, + fill='background', transparency=False, + feature_selector='', feature_filter=''): """Extract a segment image from its parent's image. - + Given a PIL.Image of the parent, ``parent_image``, with its absolute coordinates, ``parent_xywh``, and a PAGE segment (TextRegionType / TextLineType / WordType / GlyphType) object which is logically contained in it, ``segment``, extract its PIL.Image from AlternativeImage if it exists. Otherwise produce an image via cropping from ``parent_image``. - + If ``feature_selector`` and/or ``feature_filter`` is given, then select/filter among the cropped ``parent_image`` and the available AlternativeImages the last which contains all of the selected but none of the filtered features (i.e. @comments classes), or raise an error. - + If the chosen AlternativeImage does not have "deskewed", but an @orientation exists, then rotate it (unless "deskewed" is also being filtered). - + Regardless, respect any orientation angle annotated for the parent (from parent-level deskewing) by rotating the image, and compensating the segment coordinates in an inverse transformation (i.e. translation to center, passive rotation, re-translation). - + Cropping uses a polygon mask (not just the rectangle). Areas outside the polygon (regardless of cropping and deskewing) will be filled according to ``fill``: - if ``background`` (the default), then fill with the median color of the image; - - if ``white``, then fill with white; - - if ``transparent``, then add a transparency channel which is - fully opaque before cropping and rotating (thus only the exposed - areas will be transparent afterwards). + - if ``white``, then fill with white. + Moreover, if ``transparency`` is true, then add an alpha channel which + is fully opaque before cropping and rotating (thus only the exposed + areas will be transparent afterwards, for those that can interpret + alpha channels). (Required and produced features need not be in the same order, so ``feature_selector`` is merely a mask specifying Boolean AND, and ``feature_filter`` is merely a mask specifying Boolean OR.) - + If the resulting segment image is larger than the bounding box of ``segment``, then in the returned bounding box, reduce the offset by half the width/height difference (so consumers being passed this image and offset will still crop relative to the original center). - + Return a tuple: * the extracted image, * a dictionary with the absolute coordinates of the segment's @@ -456,7 +461,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh, fill='transpare (features, i.e. of all operations that lead up to this result). (These can be used to create a new AlternativeImage, or pass down for calls on lower hierarchy levels.) - + Example: * get a raw (colored) but already deskewed and cropped image: ``image, xywh = workspace.image_from_segment(region, @@ -483,7 +488,8 @@ def image_from_segment(self, segment, parent_image, parent_xywh, fill='transpare # get polygon outline of segment relative to parent image: segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh) # create a mask from the segment polygon: - segment_image = image_from_polygon(parent_image, segment_polygon, fill=fill) + segment_image = image_from_polygon(parent_image, segment_polygon, + fill=fill, transparency=transparency) # recrop into segment rectangle: segment_image = crop_image(segment_image, box=(segment_xywh['x'] - parent_xywh['x'], @@ -531,17 +537,15 @@ def image_from_segment(self, segment, parent_image, parent_xywh, fill='transpare log.info("Rotating %s for segment '%s' by %.2f°", "AlternativeImage" if alternative_image else "image", segment.id, segment_xywh['angle']) - if fill == 'transparent': - if segment_image.mode in ['RGB', 'L']: - # ensure no information is lost by adding transparency channel - # initialized to fully opaque (so cropping and rotation will - # expose areas as transparent): - segment_image.putalpha(255) - background = 0 - elif fill == 'background': + if fill == 'background': background = ImageStat.Stat(segment_image).median[0] else: background = 'white' + if transparency and segment_image.mode in ['RGB', 'L']: + # ensure no information is lost by adding transparency channel + # initialized to fully opaque (so cropping and rotation will + # expose areas as transparent): + segment_image.putalpha(255) segment_image = segment_image.rotate(segment_xywh['angle'], expand=True, #resample=Image.BILINEAR, diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index a3c0e8305..74b8da2f9 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -301,7 +301,7 @@ def get_local_filename(url, start=None): url = url[len(start):] return url -def image_from_polygon(image, polygon, fill='transparent'): +def image_from_polygon(image, polygon, fill='background', transparency=False): """"Mask an image with a polygon. Given a PIL.Image ``image`` and a numpy array ``polygon`` @@ -309,30 +309,30 @@ def image_from_polygon(image, polygon, fill='transparent'): outside the polygon hull to a color according to ``fill``: - if ``background`` (the default), then use the median color of the image; - - if ``white``, then use white; - - if ``transparent``, then add a transparency channel from - the polygon mask (i.e. everything outside the polygon will - be transparent). + - if ``white``, then use white. + Moreover, if ``transparent`` is true, then add an alpha channel + from the polygon mask (i.e. everything outside the polygon will + be transparent for those that can interpret alpha channels). Return a new PIL.Image. """ mask = polygon_mask(image, polygon) - if fill == 'transparent' and image.mode in ['RGB', 'L', 'RGBA', 'LA']: - # ensure no information is lost by adding transparency channel - # initialized to fully transparent outside the mask - # (so consumers do not have to rely on background estimation): - # ensure transparency maximizes (i.e. parent mask AND mask): - if image.mode in ['RGBA', 'LA']: - mask = ImageChops.darker(mask, image.getchannel('A')) # min opaque - new_image = image.copy() - new_image.putalpha(mask) - return new_image if fill == 'background': background = ImageStat.Stat(image).median[0] else: background = 'white' new_image = Image.new(image.mode, image.size, background) new_image.paste(image, mask=mask) + if transparency and image.mode in ['RGB', 'L', 'RGBA', 'LA']: + # ensure no information is lost by adding transparency channel + # initialized to fully transparent outside the mask + # (so consumers do not have to rely on background estimation, + # which can fail on foreground-dominated segments, or white, + # which can be inconsistent on unbinarized images): + if image.mode in ['RGBA', 'LA']: + # ensure transparency maximizes (i.e. parent mask AND mask): + mask = ImageChops.darker(mask, image.getchannel('A')) # min opaque + new_image.putalpha(mask) return new_image def is_local_filename(url): From 215105ab3f509472feeac834726e2a7b2e9e27bc Mon Sep 17 00:00:00 2001 From: Robert Schubert Date: Mon, 23 Sep 2019 15:39:22 +0200 Subject: [PATCH 06/13] cropping: if input already has alpha, then use it unconditionally - image_from_polygon: regardless of the ``transparency`` parameter, if the input already has an alpha channel, then shrink its mask from the polygon mask --- ocrd_utils/ocrd_utils/__init__.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 74b8da2f9..c8932f5a1 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -310,9 +310,11 @@ def image_from_polygon(image, polygon, fill='background', transparency=False): - if ``background`` (the default), then use the median color of the image; - if ``white``, then use white. - Moreover, if ``transparent`` is true, then add an alpha channel + Moreover, if ``transparency`` is true, then add an alpha channel from the polygon mask (i.e. everything outside the polygon will be transparent for those that can interpret alpha channels). + (Images which already have an alpha channel will have them + shrinked from the polygon mask.) Return a new PIL.Image. """ @@ -323,15 +325,17 @@ def image_from_polygon(image, polygon, fill='background', transparency=False): background = 'white' new_image = Image.new(image.mode, image.size, background) new_image.paste(image, mask=mask) - if transparency and image.mode in ['RGB', 'L', 'RGBA', 'LA']: - # ensure no information is lost by adding transparency channel - # initialized to fully transparent outside the mask - # (so consumers do not have to rely on background estimation, - # which can fail on foreground-dominated segments, or white, - # which can be inconsistent on unbinarized images): - if image.mode in ['RGBA', 'LA']: - # ensure transparency maximizes (i.e. parent mask AND mask): - mask = ImageChops.darker(mask, image.getchannel('A')) # min opaque + # ensure no information is lost by a adding transparency channel + # initialized to fully transparent outside the polygon mask + # (so consumers do not have to rely on background estimation, + # which can fail on foreground-dominated segments, or white, + # which can be inconsistent on unbinarized images): + if image.mode in ['RGBA', 'LA']: + # ensure transparency maximizes (i.e. parent mask AND mask): + mask = ImageChops.darker(mask, image.getchannel('A')) # min opaque + new_image.putalpha(mask) + elif transparency and image.mode in ['RGB', 'L']: + # introduce transparency: new_image.putalpha(mask) return new_image From 1d82c42e3931ce50149b03edeb6e92523fd8d13c Mon Sep 17 00:00:00 2001 From: Robert Schubert Date: Wed, 25 Sep 2019 15:46:40 +0200 Subject: [PATCH 07/13] polygon_mask: outline must have color of fill --- ocrd_utils/ocrd_utils/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index c8932f5a1..3d916455a 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -271,6 +271,9 @@ def crop_image(image, box=None): # (It should be invalid in PAGE-XML to extend beyond parents.) if not box: box = (0, 0, image.width, image.height) + elif box[0] < 0 or box[1] < 0 or box[2] > image.width or box[3] > image.height: + LOG.error('crop coordinates (%s) exceed image (%dx%d)', + str(box), image.width, image.height) xywh = xywh_from_bbox(*box) background = ImageStat.Stat(image).median[0] new_image = Image.new(image.mode, (xywh['w'], xywh['h']), @@ -446,7 +449,7 @@ def polygon_mask(image, coordinates): mask = Image.new('L', image.size, 0) if isinstance(coordinates, np.ndarray): coordinates = list(map(tuple, coordinates)) - ImageDraw.Draw(mask).polygon(coordinates, outline=1, fill=255) + ImageDraw.Draw(mask).polygon(coordinates, outline=255, fill=255) return mask def rotate_coordinates(polygon, angle, orig=np.array([0, 0])): From 24430ad71e113ff9c1e5d6b986ef6919d5a438c0 Mon Sep 17 00:00:00 2001 From: Robert Schubert Date: Tue, 1 Oct 2019 12:31:21 +0200 Subject: [PATCH 08/13] fix image API: - for converting to/from relative coordinates on each level, instead of passing on offsets and angles along with images (which would actually have to be stored and applied for all levels monotonically, but was only implemented for the previous level), propagate one single affine coordinate transformation (which can be composed via matrix multiplication and inverted via matrix inversion) - encapsulate image rotation, coordinate rotation, coordinate translation, offset calculation for coordinate rotation, application of coordinate transformation - use the same feature selectors/filters for coordinate transforms as for image operations - crop to the bounding box of the rotated polygon, not of the original; likewise, calculate rotation center and rotation offset (for regions) based on such bbox (whether it was used for AlternativeImage or not) - break API for callers that expected the bounding box of the original (this was incorrect; callers must likewise crop via relative bboxes) - warn if actual image size after rotation or from AlternativeImage does not fit calculated image size, but keep going with the calculated offset - page and region level orientation are not described additive-relative but supplantive-absolute; thus, rotation must be differential when both apply - bbox_from_polygon: intermediate coordinates can be negative - image_from_polygon: allow passing any fillcolor - improve docstrings --- ocrd/ocrd/workspace.py | 383 +++++++++++++++++------------- ocrd_utils/ocrd_utils/__init__.py | 269 ++++++++++++++------- 2 files changed, 407 insertions(+), 245 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 955a2ba59..830c78929 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -10,14 +10,20 @@ from ocrd_models import OcrdMets, OcrdExif, OcrdFile from ocrd_utils import ( - coordinates_of_segment, - crop_image, getLogger, image_from_polygon, + coordinates_of_segment, + transform_coordinates, + shift_coordinates, + rotate_coordinates, + adjust_canvas_to_rotation, + rotate_image, + crop_image, + bbox_from_polygon, polygon_from_points, + xywh_from_bbox, xywh_from_points, pushd_popd, - MIME_TO_EXT, ) @@ -252,79 +258,105 @@ def _resolve_image_as_pil(self, image_url, coords=None): def image_from_page(self, page, page_id, fill='background', transparency=False, feature_selector='', feature_filter=''): - """Extract a Page image from the workspace. + """Extract an image for a PAGE-XML page from the workspace. - Given a PageType object, ``page``, extract its PIL.Image from - AlternativeImage if it exists. Otherwise extract the PIL.Image - from imageFilename. Also crop it if a Border exists, and rotate - it if an @orientation exists. Otherwise just return it. + Given ``page``, a PAGE PageType object, extract its PIL.Image, + either from its AlternativeImage (if it exists), or from its + @imageFilename (otherwise). Also crop it, if a Border exists, + and rotate it, if any @orientation angle is annotated. If ``feature_selector`` and/or ``feature_filter`` is given, then - select/filter among imageFilename and all AlternativeImages the - last which contains all of the selected but none of the filtered - features (i.e. @comments classes), or raise an error. - - If the chosen image does not have "cropped", but a Border exists, - then crop it (unless "cropped" is also being filtered). And if the - chosen image does not have "deskewed", but an @orientation exists, - then rotate it (unless "deskewed" is also being filtered). - - Cropping uses a polygon mask (not just the rectangle). Areas outside - the polygon (regardless of cropping and deskewing) will be filled - according to ``fill``: - - if ``background`` (the default), then fill with the median color - of the image; - - if ``white``, then fill with white. - Moreover, ``transparency`` is true, then add an alpha channel which - is fully opaque before cropping and rotating (thus only the exposed - areas will be transparent afterwards, for those that can interpret - alpha channels). + select/filter among the @imageFilename image and the available + AlternativeImages the last one which contains all of the selected, + but none of the filtered features (i.e. @comments classes), or + raise an error. (Required and produced features need not be in the same order, so ``feature_selector`` is merely a mask specifying Boolean AND, and ``feature_filter`` is merely a mask specifying Boolean OR.) - If the resulting page image is larger than the bounding box of - ``page``, then in the returned bounding box, reduce the offset by - half the width/height difference (so consumers being passed this - image and offset will still crop relative to the original center). + If the chosen image does not have the feature "cropped" yet, but + a Border exists, and unless "cropped" is being filtered, then crop it. + Likewise, if the chosen image does not have the feature "deskewed" yet, + but an @orientation angle is annotated, and unless "deskewed" is being + filtered, then rotate it. + + Cropping uses a polygon mask (not just the bounding box rectangle). + Areas outside the polygon will be filled according to ``fill``: + - if ``background`` (the default), + then fill with the median color of the image; + - otherwise, use the given color, e.g. ``white`` or (255,255,255). + Moreover, if ``transparency`` is true, and unless the image already + has an alpha channel, then add an alpha channel which is fully opaque + before cropping and rotating. (Thus, only the exposed areas will be + transparent afterwards, for those that can interpret alpha channels). Return a tuple: * the extracted image, - * a dictionary with the absolute coordinates of the page's - bounding box / border (xywh), angle and the AlternativeImage - @comments (features, i.e. of all operations that lead up to - this result), + * a dictionary with information about the extracted image: + - ``transform``: a Numpy array with an affine transform which + converts from absolute coordinates to those relative to the image, + i.e. after cropping to the page's border / bounding box (if any) + and deskewing with the page's orientation angle (if any) + - ``angle``: the page-level rotation angle applied to the image, + - ``features``: the AlternativeImage @comments for the image, i.e. + names of all operations that lead up to this result, * an OcrdExif instance associated with the original image. (The first two can be used to annotate a new AlternativeImage, - or pass down with ``image_from_segment``.) + or be passed down with ``image_from_segment``.) Example: * get a raw (colored) but already deskewed and cropped image: - ``page_image, page_xywh, page_image_info = workspace.image_from_page( + ``page_image, page_coords, page_image_info = workspace.image_from_page( page, page_id, feature_selector='deskewed,cropped', feature_filter='binarized,grayscale_normalized')`` """ page_image = self._resolve_image_as_pil(page.imageFilename) page_image_info = OcrdExif(page_image) - page_xywh = {'x': 0, - 'y': 0, - 'w': page_image.width, - 'h': page_image.height} # FIXME: remove PrintSpace here as soon as GT abides by the PAGE standard: border = page.get_Border() or page.get_PrintSpace() - if border: + if (border and + not 'cropped' in feature_filter.split(',')): page_points = border.get_Coords().points log.debug("Using explicitly set page border '%s' for page '%s'", page_points, page_id) - page_xywh = xywh_from_points(page_points) - # region angle: PAGE orientation is defined clockwise, + # get polygon outline of page border: + page_polygon = np.array(polygon_from_points(page_points)) + page_bbox = bbox_from_polygon(page_polygon) + # subtract offset in affine coordinate transform: + # (consistent with image cropping or AlternativeImage below) + page_coords = { + 'transform': shift_coordinates( + np.eye(3), + np.array([-page_bbox[0], + -page_bbox[1]])) + } + else: + page_bbox = [0, 0, page_image.width, page_image.height] + # use identity as affine coordinate transform: + page_coords = { + 'transform': np.eye(3) + } + # get size of the page after cropping but before rotation: + page_xywh = xywh_from_bbox(*page_bbox) + + # page angle: PAGE @orientation is defined clockwise, # whereas PIL/ndimage rotation is in mathematical direction: - page_xywh['angle'] = -(page.get_orientation() or 0) + page_coords['angle'] = -(page.get_orientation() or 0) + if (page_coords['angle'] and + not 'deskewed' in feature_filter.split(',')): + # Rotate around center in affine coordinate transform: + # (consistent with image rotation or AlternativeImage below) + page_coords['transform'] = rotate_coordinates( + page_coords['transform'], + page_coords['angle'], + np.array([0.5 * page_xywh['w'], + 0.5 * page_xywh['h']])) + # initialize AlternativeImage@comments classes as empty: - page_xywh['features'] = '' - + page_coords['features'] = '' + alternative_image = None alternative_images = page.get_AlternativeImage() if alternative_images: @@ -350,116 +382,120 @@ def image_from_page(self, page, page_id, alternative_images.index(alternative_image) + 1, features, page_id) page_image = self._resolve_image_as_pil(alternative_image.get_filename()) - page_xywh['features'] = features + page_coords['features'] = features + # crop, if (still) necessary: if (border and - not 'cropped' in page_xywh['features'] and + not 'cropped' in page_coords['features'] and not 'cropped' in feature_filter.split(',')): log.debug("Cropping %s for page '%s' to border", - "AlternativeImage" if alternative_image else - "image", page_id) - # get polygon outline of page border: - page_polygon = np.array(polygon_from_points(page_points)) + "AlternativeImage" if alternative_image else "image", + page_id) # create a mask from the page polygon: page_image = image_from_polygon(page_image, page_polygon, fill=fill, transparency=transparency) # recrop into page rectangle: - page_image = crop_image(page_image, - box=(page_xywh['x'], - page_xywh['y'], - page_xywh['x'] + page_xywh['w'], - page_xywh['y'] + page_xywh['h'])) - page_xywh['features'] += ',cropped' + page_image = crop_image(page_image, box=page_bbox) + page_coords['features'] += ',cropped' # deskew, if (still) necessary: - if (page_xywh['angle'] and - not 'deskewed' in page_xywh['features'] and + if (page_coords['angle'] and + not 'deskewed' in page_coords['features'] and not 'deskewed' in feature_filter.split(',')): log.info("Rotating %s for page '%s' by %.2f°", "AlternativeImage" if alternative_image else - "image", page_id, page_xywh['angle']) - if fill == 'background': - background = ImageStat.Stat(page_image).median[0] - else: - background = 'white' - if transparency and page_image.mode in ['RGB', 'L']: - # ensure no information is lost by adding transparency channel - # initialized to fully opaque (so cropping and rotation will - # expose areas as transparent): - page_image.putalpha(255) - page_image = page_image.rotate(page_xywh['angle'], - expand=True, - #resample=Image.BILINEAR, - fillcolor=background) - page_xywh['features'] += ',deskewed' + "image", page_id, page_coords['angle']) + page_image = rotate_image(page_image, page_coords['angle'], + fill=fill, transparency=transparency) + page_coords['features'] += ',deskewed' + if (page_coords['angle'] and + not 'deskewed' in feature_filter.split(',')): + w_new, h_new = adjust_canvas_to_rotation( + [page_xywh['w'], page_xywh['h']], page_coords['angle']) + # FIXME we should enforce consistency here (i.e. rotation always reshapes, + # and rescaling never happens) + if not (w_new - 1.5 < page_image.width < w_new + 1.5 and + h_new - 1.5 < page_image.height < h_new + 1.5): + log.error('page "%s" image (%s; %dx%d) has not been reshaped properly (%dx%d) during rotation', + page_id, page_coords['features'], + page_image.width, page_image.height, + w_new, h_new) + # verify constraints again: - if not all(feature in page_xywh['features'] + if not all(feature in page_coords['features'] for feature in feature_selector.split(',') if feature): raise Exception('Found no AlternativeImage that satisfies all requirements ' + 'selector="%s" in page "%s"' % ( feature_selector, page_id)) - if any(feature in page_xywh['features'] + if any(feature in page_coords['features'] for feature in feature_filter.split(',') if feature): raise Exception('Found no AlternativeImage that satisfies all requirements ' + 'filter="%s" in page "%s"' % ( feature_filter, page_id)) - # subtract offset from any increase in binary region size over source: - page_xywh['x'] -= round(0.5 * max(0, page_image.width - page_xywh['w'])) - page_xywh['y'] -= round(0.5 * max(0, page_image.height - page_xywh['h'])) page_image.format = 'PNG' # workaround for tesserocr#194 - return page_image, page_xywh, page_image_info + return page_image, page_coords, page_image_info - def image_from_segment(self, segment, parent_image, parent_xywh, + def image_from_segment(self, segment, parent_image, parent_coords, fill='background', transparency=False, feature_selector='', feature_filter=''): - """Extract a segment image from its parent's image. + """Extract an image for a PAGE-XML hierarchy segment from its parent's image. - Given a PIL.Image of the parent, ``parent_image``, with its - absolute coordinates, ``parent_xywh``, and a PAGE segment - (TextRegionType / TextLineType / WordType / GlyphType) object - which is logically contained in it, ``segment``, extract its - PIL.Image from AlternativeImage if it exists. Otherwise produce - an image via cropping from ``parent_image``. + Given... + * ``parent_image``, a PIL.Image of the parent, with + * ``parent_coords``, a dict with information about ``parent_image``: + - ``transform``: a Numpy array with an affine transform which + converts from absolute coordinates to those relative to the image, + i.e. after applying all operations (starting with the original image) + - ``angle``: the parent-level rotation angle applied to the image, + - ``features``: the AlternativeImage @comments for the image, i.e. + names of all operations that lead up to this result, and + * ``segment``, a PAGE segment object logically contained in it + (i.e. TextRegionType / TextLineType / WordType / GlyphType), + ...extract the segment's corresponding PIL.Image, either from + AlternativeImage (if it exists), or producing a new image via + cropping from ``parent_image`` (otherwise). If ``feature_selector`` and/or ``feature_filter`` is given, then select/filter among the cropped ``parent_image`` and the available - AlternativeImages the last which contains all of the selected but none - of the filtered features (i.e. @comments classes), or raise an error. - - If the chosen AlternativeImage does not have "deskewed", but - an @orientation exists, then rotate it (unless "deskewed" is - also being filtered). - - Regardless, respect any orientation angle annotated for the parent - (from parent-level deskewing) by rotating the image, and compensating - the segment coordinates in an inverse transformation (i.e. translation - to center, passive rotation, re-translation). - - Cropping uses a polygon mask (not just the rectangle). Areas outside - the polygon (regardless of cropping and deskewing) will be filled - according to ``fill``: - - if ``background`` (the default), then fill with the median color - of the image; - - if ``white``, then fill with white. - Moreover, if ``transparency`` is true, then add an alpha channel which - is fully opaque before cropping and rotating (thus only the exposed - areas will be transparent afterwards, for those that can interpret - alpha channels). + AlternativeImages the last one which contains all of the selected, + but none of the filtered features (i.e. @comments classes), or + raise an error. (Required and produced features need not be in the same order, so ``feature_selector`` is merely a mask specifying Boolean AND, and ``feature_filter`` is merely a mask specifying Boolean OR.) - If the resulting segment image is larger than the bounding box of - ``segment``, then in the returned bounding box, reduce the offset by - half the width/height difference (so consumers being passed this - image and offset will still crop relative to the original center). + Cropping uses a polygon mask (not just the bounding box rectangle). + Areas outside the polygon will be filled according to ``fill``: + - if ``background`` (the default), + then fill with the median color of the image; + - otherwise, use the given color, e.g. ``white`` or (255,255,255). + Moreover, if ``transparency`` is true, and unless the image already + has an alpha channel, then add an alpha channel which is fully opaque + before cropping and rotating. (Thus, only the exposed areas will be + transparent afterwards, for those that can interpret alpha channels). + + When cropping, compensate any @orientation angle annotated for the + parent (from parent-level deskewing) by rotating the segment coordinates + in an inverse transformation (i.e. translation to center, then passive + rotation, and translation back). + + Regardless, if any @orientation angle is annotated for the segment + (from segment-level deskewing), and the chosen image does not have + the feature "deskewed" yet, and unless "deskewed" is being filtered, + then rotate it. Return a tuple: * the extracted image, - * a dictionary with the absolute coordinates of the segment's - bounding box (xywh), angle and the AlternativeImage @comments - (features, i.e. of all operations that lead up to this result). - (These can be used to create a new AlternativeImage, or pass down + * a dictionary with information about the extracted image: + - ``transform``: a Numpy array with an affine transform which + converts from absolute coordinates to those relative to the image, + i.e. after applying all parent operations, and then cropping to + the segment's bounding box, and deskewing with the segment's + orientation angle (if any) + - ``angle``: the segment-level rotation angle applied to the image, + - ``features``: the AlternativeImage @comments for the image, i.e. + names of all operations that lead up to this result. + (These can be used to create a new AlternativeImage, or passed down for calls on lower hierarchy levels.) Example: @@ -483,25 +519,49 @@ def image_from_segment(self, segment, parent_image, parent_xywh, # on some ad-hoc binarization method. Thus, it is preferable to use # a dedicated processor for this (which produces clipped AlternativeImage # or reduced polygon coordinates). - # crop: - segment_xywh = xywh_from_points(segment.get_Coords().points) + # get polygon outline of segment relative to parent image: - segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh) + segment_polygon = coordinates_of_segment(segment, parent_image, parent_coords) + # get relative bounding box: + segment_bbox = bbox_from_polygon(segment_polygon) + # get size of the segment in the parent image after cropping + # (i.e. possibly different from size before rotation at the parent, but + # also possibly different from size after rotation below/AlternativeImage): + segment_xywh = xywh_from_bbox(*segment_bbox) # create a mask from the segment polygon: segment_image = image_from_polygon(parent_image, segment_polygon, fill=fill, transparency=transparency) # recrop into segment rectangle: - segment_image = crop_image(segment_image, - box=(segment_xywh['x'] - parent_xywh['x'], - segment_xywh['y'] - parent_xywh['y'], - segment_xywh['x'] - parent_xywh['x'] + segment_xywh['w'], - segment_xywh['y'] - parent_xywh['y'] + segment_xywh['h'])) + segment_image = crop_image(segment_image, box=segment_bbox) + # subtract offset from parent in affine coordinate transform: + # (consistent with image cropping) + segment_coords = { + 'transform': shift_coordinates( + parent_coords['transform'], + np.array([-segment_bbox[0], + -segment_bbox[1]])) + } + if 'orientation' in segment.__dict__: - # angle: PAGE orientation is defined clockwise, + # region angle: PAGE @orientation is defined clockwise, # whereas PIL/ndimage rotation is in mathematical direction: - segment_xywh['angle'] = -(segment.get_orientation() or 0) + segment_coords['angle'] = -(segment.get_orientation() or 0) + else: + segment_coords['angle'] = 0 + if (segment_coords['angle'] and + not 'deskewed' in feature_filter.split(',')): + # Rotate around center in affine coordinate transform: + # (consistent with image rotation or AlternativeImage below) + segment_coords['transform'] = rotate_coordinates( + segment_coords['transform'], + # @orientation is always absolute; if higher levels + # have already rotated, then we must compensate: + segment_coords['angle'] - parent_coords['angle'], + np.array([0.5 * segment_xywh['w'], + 0.5 * segment_xywh['h']])) + # initialize AlternativeImage@comments classes from parent: - segment_xywh['features'] = parent_xywh['features'] + ',cropped' + segment_coords['features'] = parent_coords['features'] + ',cropped' alternative_image = None alternative_images = segment.get_AlternativeImage() @@ -528,47 +588,52 @@ def image_from_segment(self, segment, parent_image, parent_xywh, alternative_images.index(alternative_image) + 1, features, segment.id) segment_image = self._resolve_image_as_pil(alternative_image.get_filename()) - segment_xywh['features'] = features + segment_coords['features'] = features # deskew, if (still) necessary: - if ('angle' in segment_xywh and - segment_xywh['angle'] and - not 'deskewed' in segment_xywh['features'] and + if (segment_coords['angle'] and + not 'deskewed' in segment_coords['features'] and not 'deskewed' in feature_filter.split(',')): - log.info("Rotating %s for segment '%s' by %.2f°", + log.info("Rotating %s for segment '%s' by %.2f°-%.2f°", "AlternativeImage" if alternative_image else - "image", segment.id, segment_xywh['angle']) - if fill == 'background': - background = ImageStat.Stat(segment_image).median[0] - else: - background = 'white' - if transparency and segment_image.mode in ['RGB', 'L']: - # ensure no information is lost by adding transparency channel - # initialized to fully opaque (so cropping and rotation will - # expose areas as transparent): - segment_image.putalpha(255) - segment_image = segment_image.rotate(segment_xywh['angle'], - expand=True, - #resample=Image.BILINEAR, - fillcolor=background) - segment_xywh['features'] += ',deskewed' + "image", segment.id, segment_coords['angle'], parent_coords['angle']) + # @orientation is always absolute; if higher levels + # have already rotated, then we must compensate: + segment_image = rotate_image(segment_image, segment_coords['angle'] - parent_coords['angle'], + fill=fill, transparency=transparency) + segment_coords['features'] += ',deskewed' + if (segment_coords['angle'] and + not 'deskewed' in feature_filter.split(',')): + # FIXME we should enforce consistency here (i.e. rotation always reshapes, + # and rescaling never happens) + w_new, h_new = adjust_canvas_to_rotation( + [segment_xywh['w'], segment_xywh['h']], segment_coords['angle']) + if not (w_new - 1.5 < segment_image.width < w_new + 1.5 and + h_new - 1.5 < segment_image.height < h_new + 1.5): + log.error('segment "%s" image (%s; %dx%d) has not been reshaped properly (%dx%d) during rotation', + segment.id, segment_coords['features'], + segment_image.width, segment_image.height, + w_new, h_new) + else: + if not (segment_xywh['w'] - 1.5 < segment_image.width < segment_xywh['w'] + 1.5 and + segment_xywh['h'] - 1.5 < segment_image.height < segment_xywh['h'] + 1.5): + log.error('segment "%s" image (%s; %dx%d) has not been cropped properly (%dx%d)', + segment.id, segment_coords['features'], + segment_image.width, segment_image.height, + segment_xywh['w'], segment_xywh['h']) + # verify constraints again: - if not all(feature in segment_xywh['features'] + if not all(feature in segment_coords['features'] for feature in feature_selector.split(',') if feature): raise Exception('Found no AlternativeImage that satisfies all requirements' + 'selector="%s" in segment "%s"' % ( feature_selector, segment.id)) - if any(feature in segment_xywh['features'] + if any(feature in segment_coords['features'] for feature in feature_filter.split(',') if feature): raise Exception('Found no AlternativeImage that satisfies all requirements ' + 'filter="%s" in segment "%s"' % ( feature_filter, segment.id)) - # subtract offset from any increase in binary region size over source: - segment_xywh['x'] -= round(0.5 * max(0, - segment_image.width - segment_xywh['w'])) - segment_xywh['y'] -= round(0.5 * max(0, - segment_image.height - segment_xywh['h'])) segment_image.format = 'PNG' # workaround for tesserocr#194 - return segment_image, segment_xywh + return segment_image, segment_coords # pylint: disable=redefined-builtin def save_image_file(self, image, diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 3d916455a..8e0010182 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -1,16 +1,25 @@ """ Utility functions and constants usable in various circumstances. -* ``coordinates_of_segment``, ``coordinates_for_segment``, ``rotate_coordinates`` +* ``coordinates_of_segment``, ``coordinates_for_segment`` These functions convert polygon outlines for PAGE elements on all hierarchy - levels (page, region, line, word, glyph) between relative coordinates w.r.t. - parent segment and absolute coordinates w.r.t. the top-level (source) image. - This includes rotation and offset correction. + levels below page (i.e. region, line, word, glyph) between relative coordinates + w.r.t. the parent segment and absolute coordinates w.r.t. the top-level image. + This includes rotation and offset correction, based on affine transformations. + (Used by ``Workspace`` methods ``image_from_page`` and ``image_from_segment``) -* ``polygon_mask``, ``image_from_polygon``, ``crop_image`` +* ``rotate_coordinates``, ``shift_coordinates``, ``transform_coordinates`` - These functions combine PIL.Image with polygons or bboxes. + These backend functions compose affine transformations for rotation and offset + correction of coordinates, or apply them to a set of points. They can be used + to pass down the coordinate system along with images (both invariably sharing + the same operations context) when traversing the element hierarchy top to bottom. + (Used by ``Workspace`` methods ``image_from_page`` and ``image_from_segment``). + +* ``crop_image``, ``rotate_image``, ``image_from_polygon``, ``polygon_mask`` + + These functions operate on PIL.Image objects. * ``xywh_from_points``, ``points_from_xywh``, ``polygon_from_points`` etc. @@ -52,6 +61,7 @@ __all__ = [ 'abspath', + 'adjust_canvas_to_rotation', 'bbox_from_points', 'bbox_from_xywh', 'bbox_from_polygon', @@ -80,8 +90,11 @@ 'polygon_from_xywh', 'polygon_mask', 'rotate_coordinates', + 'rotate_image', 'safe_filename', 'setOverrideLogLevel', + 'shift_coordinates', + 'transform_coordinates', 'unzip_file_to_dir', 'xywh_from_bbox', 'xywh_from_points', @@ -132,8 +145,8 @@ def bbox_from_polygon(polygon): """Construct a numeric list representing a bounding box from polygon coordinates in numeric list representation.""" minx = sys.maxsize miny = sys.maxsize - maxx = 0 - maxy = 0 + maxx = -sys.maxsize + maxy = -sys.maxsize for xy in polygon: if xy[0] < minx: minx = xy[0] @@ -159,49 +172,55 @@ def xywh_from_polygon(polygon): return xywh_from_bbox(*bbox_from_polygon(polygon)) def coordinates_for_segment(polygon, parent_image, parent_xywh): - """Convert a relative coordinates polygon to absolute. + """Convert relative coordinates to absolute. - Given a numpy array ``polygon`` of points, and a parent PIL.Image - along with its bounding box to which the coordinates are relative, - calculate the absolute coordinates within the page. - That is, (in case the parent was rotated,) rotate all points in - opposite direction with the center of the image as origin, then - shift all points to the offset of the parent. + Given... + - ``polygon``, a numpy array of points relative to + - ``parent_image``, a PIL.Image, along with + - ``parent_xywh``, its absolute coordinates (bounding box), + ...calculate the absolute coordinates within the page. + + That is: + 1. If ``parent_image`` is larger than indicated by ``parent_xywh`` in + width and height, (which only occurs when the parent was rotated), + then subtract from all points an offset of half the size difference. + 2. In case the parent was rotated, rotate all points, + in opposite direction with the center of the image as origin. + 3. Shift all points to the offset of the parent. Return the rounded numpy array of the resulting polygon. """ - # angle correction (unrotate coordinates if image has been rotated): - if 'angle' in parent_xywh: - polygon = rotate_coordinates( - polygon, -parent_xywh['angle'], - orig=np.array([0.5 * parent_image.width, - 0.5 * parent_image.height])) - # offset correction (shift coordinates from base of segment): - polygon += np.array([parent_xywh['x'], parent_xywh['y']]) + polygon = np.array(polygon, dtype=np.float32) # avoid implicit type cast problems + # apply inverse of affine transform: + inv_transform = np.linalg.inv(parent_xywh['transform']) + polygon = transform_coordinates(polygon, inv_transform) return np.round(polygon).astype(np.int32) def coordinates_of_segment(segment, parent_image, parent_xywh): - """Extract the relative coordinates polygon of a PAGE segment element. - - Given a Region / TextLine / Word / Glyph ``segment`` and - the PIL.Image of its parent Page / Region / TextLine / Word - along with its bounding box, calculate the relative coordinates - of the segment within the image. That is, shift all points from - the offset of the parent, and (in case the parent was rotated,) - rotate all points with the center of the image as origin. - + """Extract the coordinates of a PAGE segment element relative to its parent. + + Given... + - ``segment``, a PAGE segment object in absolute coordinates + (i.e. RegionType / TextLineType / WordType / GlyphType), and + - ``parent_image``, the PIL.Image of its corresponding parent object + (i.e. PageType / RegionType / TextLineType / WordType), along with + - ``parent_xywh``, its absolute coordinates (bounding box), + ...calculate the relative coordinates of the segment within the image. + + That is: + 1. Shift all points from the offset of the parent. + 2. In case the parent image was rotated, rotate all points, + with the center of the image as origin. + 3. If ``parent_image`` is larger than indicated by ``parent_xywh`` in + width and height, (which only occurs when the parent was rotated), + then add to all points an offset of half the size difference. + Return the rounded numpy array of the resulting polygon. """ # get polygon: polygon = np.array(polygon_from_points(segment.get_Coords().points)) - # offset correction (shift coordinates to base of segment): - polygon -= np.array([parent_xywh['x'], parent_xywh['y']]) - # angle correction (rotate coordinates if image has been rotated): - if 'angle' in parent_xywh: - polygon = rotate_coordinates( - polygon, parent_xywh['angle'], - orig=np.array([0.5 * parent_image.width, - 0.5 * parent_image.height])) + # apply affine transform: + polygon = transform_coordinates(polygon, parent_xywh['transform']) return np.round(polygon).astype(np.int32) @contextlib.contextmanager @@ -267,13 +286,12 @@ def crop_image(image, box=None): Return a new PIL.Image. """ - # todo: perhaps we should issue a warning if we encounter this - # (It should be invalid in PAGE-XML to extend beyond parents.) if not box: box = (0, 0, image.width, image.height) elif box[0] < 0 or box[1] < 0 or box[2] > image.width or box[3] > image.height: - LOG.error('crop coordinates (%s) exceed image (%dx%d)', - str(box), image.width, image.height) + # (It should be invalid in PAGE-XML to extend beyond parents.) + LOG.warning('crop coordinates (%s) exceed image (%dx%d)', + str(box), image.width, image.height) xywh = xywh_from_bbox(*box) background = ImageStat.Stat(image).median[0] new_image = Image.new(image.mode, (xywh['w'], xywh['h']), @@ -281,6 +299,40 @@ def crop_image(image, box=None): new_image.paste(image, (-xywh['x'], -xywh['y'])) return new_image +def rotate_image(image, angle, fill='background', transparency=False): + """"Rotate an image, enlarging and filling with background. + + Given a PIL.Image ``image`` and a rotation angle in degrees + counter-clockwise ``angle``, rotate the image, increasing its + size at the margins accordingly, and filling everything outside + the original image according to ``fill``: + - if ``background`` (the default), + then use the median color of the image; + - otherwise use the given color, e.g. ``white`` or (255,255,255). + Moreover, if ``transparency`` is true, then add an alpha channel + fully opaque (i.e. everything outside the original image will + be transparent for those that can interpret alpha channels). + (This is true for images which already have an alpha channel, + regardless of the setting used.) + + Return a new PIL.Image. + """ + if fill == 'background': + background = ImageStat.Stat(image).median[0] + else: + background = fill + if transparency and image.mode in ['RGB', 'L']: + # ensure no information is lost by adding transparency channel + # initialized to fully opaque (so cropping and rotation will + # expose areas as transparent): + image = image.copy() + image.putalpha(255) + new_image = image.rotate(angle, + expand=True, + #resample=Image.BILINEAR, + fillcolor=background) + return new_image + def get_local_filename(url, start=None): """ Return local filename, optionally relative to ``start`` @@ -310,9 +362,9 @@ def image_from_polygon(image, polygon, fill='background', transparency=False): Given a PIL.Image ``image`` and a numpy array ``polygon`` of relative coordinates into the image, fill everything outside the polygon hull to a color according to ``fill``: - - if ``background`` (the default), then use the median color - of the image; - - if ``white``, then use white. + - if ``background`` (the default), + then use the median color of the image; + - otherwise use the given color, e.g. ``white`` or (255,255,255). Moreover, if ``transparency`` is true, then add an alpha channel from the polygon mask (i.e. everything outside the polygon will be transparent for those that can interpret alpha channels). @@ -325,7 +377,7 @@ def image_from_polygon(image, polygon, fill='background', transparency=False): if fill == 'background': background = ImageStat.Stat(image).median[0] else: - background = 'white' + background = fill new_image = Image.new(image.mode, image.size, background) new_image.paste(image, mask=mask) # ensure no information is lost by a adding transparency channel @@ -452,24 +504,89 @@ def polygon_mask(image, coordinates): ImageDraw.Draw(mask).polygon(coordinates, outline=255, fill=255) return mask -def rotate_coordinates(polygon, angle, orig=np.array([0, 0])): - """Apply a passive rotation transformation to the given coordinates. - - Given a numpy array ``polygon`` of points and a rotation ``angle``, - as well as a numpy array ``orig`` of the center of rotation, - calculate the coordinate transform corresponding to the rotation - of the underlying image by ``angle`` degrees at ``center`` by - applying translation to the center, inverse rotation, - and translation from the center. - - Return a numpy array of the resulting polygon. +def adjust_canvas_to_rotation(size, angle): + """Calculate the enlarged image size after rotation. + + Given a numpy array ``size`` of an original canvas (width and height), + and a rotation angle in degrees counter-clockwise ``angle``, + calculate the new size which is necessary to encompass the full + image after rotation. + + Return a numpy array of the enlarged width and height. + """ + angle = np.deg2rad(angle) + sin = np.abs(np.sin(angle)) + cos = np.abs(np.cos(angle)) + return np.dot(np.array([[cos, sin], + [sin, cos]]), + np.array(size)) + +def rotate_coordinates(transform, angle, orig=np.array([0, 0])): + """Compose an affine coordinate transformation with a passive rotation. + + Given a numpy array ``transform`` of an existing transformation + matrix in homogeneous (3d) coordinates, and a rotation angle in + degrees counter-clockwise ``angle``, as well as a numpy array + ``orig`` of the center of rotation, calculate the affine + coordinate transform corresponding to the composition of both + transformations. (This entails translation to the center, followed + by pure rotation, and subsequent translation back. However, since + rotation necessarily increases the bounding box, and thus image size, + do not translate back the same amount, but to the enlarged offset.) + + Return a numpy array of the resulting affine transformation matrix. + """ + LOG.debug('rotating by %.2f° around %s', angle, str(orig)) + rad = np.deg2rad(angle) + cos = np.cos(rad) + sin = np.sin(rad) + # get rotation matrix for passive rotation: + rot = np.array([[+cos, sin, 0], + [-sin, cos, 0], + [0, 0, 1]]) + return shift_coordinates( + np.dot(rot, + shift_coordinates(transform, + -orig)), + #orig) + # the image (bounding box) increases with rotation, + # so we must translate back to the new upper left: + adjust_canvas_to_rotation(orig, angle)) + +def shift_coordinates(transform, offset): + """Compose an affine coordinate transformation with a translation. + + Given a numpy array ``transform`` of an existing transformation + matrix in homogeneous (3d) coordinates, and a numpy array + ``offset`` of the translation vector, calculate the affine + coordinate transform corresponding to the composition of both + transformations. + + Return a numpy array of the resulting affine transformation matrix. """ - angle = np.deg2rad(angle) # pylint: disable=assignment-from-no-return - cos = np.cos(angle) - sin = np.sin(angle) - # active rotation: [[cos, -sin], [sin, cos]] - # passive rotation: [[cos, sin], [-sin, cos]] (inverse) - return orig + np.dot(polygon - orig, np.array([[cos, sin], [-sin, cos]]).transpose()) + LOG.debug('shifting by %s', str(offset)) + shift = np.eye(3) + shift[0, 2] = offset[0] + shift[1, 2] = offset[1] + return np.dot(shift, transform) + +def transform_coordinates(polygon, transform=None): + """Apply an affine transformation to a set of points. + + Augment the 2d numpy array of points ``polygon`` with a an extra + column of ones (homogeneous coordinates), then multiply with + the transformation matrix ``transform`` (or the identity matrix), + and finally remove the extra column from the result. + """ + if transform is None: + transform = np.eye(3) + polygon = np.insert(polygon, 2, 1, axis=1) # make 3d homogeneous coordinates + polygon = np.dot(transform, polygon.T).T + # ones = polygon[:,2] + # assert np.all(np.array_equal(ones, np.clip(ones, 1 - 1e-2, 1 + 1e-2))), \ + # 'affine transform failed' # should never happen + polygon = np.delete(polygon, 2, axis=1) # remove z coordinate again + return polygon def safe_filename(url): """ @@ -500,24 +617,4 @@ def xywh_from_points(points): """ Construct a numeric dict representing a bounding box from polygon coordinates in page representation. """ - xys = [[int(p) for p in pair.split(',')] for pair in points.split(' ')] - minx = sys.maxsize - miny = sys.maxsize - maxx = 0 - maxy = 0 - for xy in xys: - if xy[0] < minx: - minx = xy[0] - if xy[0] > maxx: - maxx = xy[0] - if xy[1] < miny: - miny = xy[1] - if xy[1] > maxy: - maxy = xy[1] - - return { - 'x': minx, - 'y': miny, - 'w': maxx - minx, - 'h': maxy - miny, - } + return xywh_from_bbox(*bbox_from_points(points)) From c5198f87508f2685cfb4ff33c865a8a281c43bfb Mon Sep 17 00:00:00 2001 From: bertsky Date: Mon, 7 Oct 2019 23:12:57 +0200 Subject: [PATCH 09/13] improve docstrings --- ocrd_utils/ocrd_utils/__init__.py | 78 ++++++++++++++++++------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 8e0010182..5cc907052 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -5,7 +5,7 @@ These functions convert polygon outlines for PAGE elements on all hierarchy levels below page (i.e. region, line, word, glyph) between relative coordinates - w.r.t. the parent segment and absolute coordinates w.r.t. the top-level image. + w.r.t. a corresponding image and absolute coordinates w.r.t. the top-level image. This includes rotation and offset correction, based on affine transformations. (Used by ``Workspace`` methods ``image_from_page`` and ``image_from_segment``) @@ -17,9 +17,14 @@ the same operations context) when traversing the element hierarchy top to bottom. (Used by ``Workspace`` methods ``image_from_page`` and ``image_from_segment``). -* ``crop_image``, ``rotate_image``, ``image_from_polygon``, ``polygon_mask`` +* ``crop_image``, ``rotate_image`` - These functions operate on PIL.Image objects. + These PIL.Image functions are safe replacements for the ``crop`` and ``rotate`` + methods. + +* ``image_from_polygon``, ``polygon_mask`` + + These functions apply polygon masks to PIL.Image objects. * ``xywh_from_points``, ``points_from_xywh``, ``polygon_from_points`` etc. @@ -110,7 +115,6 @@ import sys import logging import os -import re from os import getcwd, chdir from os.path import isfile, abspath as os_abspath from zipfile import ZipFile @@ -119,7 +123,6 @@ import numpy as np from PIL import Image, ImageStat, ImageDraw, ImageChops -import logging from .logging import * # pylint: disable=wildcard-import from .constants import * # pylint: disable=wildcard-import @@ -171,56 +174,66 @@ def xywh_from_polygon(polygon): """Construct a numeric dict representing a bounding box from polygon coordinates in numeric list representation.""" return xywh_from_bbox(*bbox_from_polygon(polygon)) -def coordinates_for_segment(polygon, parent_image, parent_xywh): +def coordinates_for_segment(polygon, parent_image, parent_coords): """Convert relative coordinates to absolute. Given... - ``polygon``, a numpy array of points relative to - - ``parent_image``, a PIL.Image, along with - - ``parent_xywh``, its absolute coordinates (bounding box), + - ``parent_image``, a PIL.Image (not used), along with + - ``parent_coords``, its corresponding affine transformation, ...calculate the absolute coordinates within the page. - That is: - 1. If ``parent_image`` is larger than indicated by ``parent_xywh`` in - width and height, (which only occurs when the parent was rotated), - then subtract from all points an offset of half the size difference. - 2. In case the parent was rotated, rotate all points, - in opposite direction with the center of the image as origin. - 3. Shift all points to the offset of the parent. + That is, apply the given transform inversely to ``polygon`` + The transform encodes (recursively): + 1. Whenever ``parent_image`` or any of its parents was cropped, + all points must be shifted by the offset in opposite direction + (i.e. coordinate system gets translated by the upper left). + 2. Whenever ``parent_image`` or any of its parents was rotated, + all points must be rotated around the center of that image in + opposite direction + (i.e. coordinate system gets translated by the center in + opposite direction, rotated purely, and translated back; + the latter involves an additional offset from the increase + in canvas size necessary to accomodate all points). Return the rounded numpy array of the resulting polygon. """ polygon = np.array(polygon, dtype=np.float32) # avoid implicit type cast problems # apply inverse of affine transform: - inv_transform = np.linalg.inv(parent_xywh['transform']) + inv_transform = np.linalg.inv(parent_coords['transform']) polygon = transform_coordinates(polygon, inv_transform) return np.round(polygon).astype(np.int32) -def coordinates_of_segment(segment, parent_image, parent_xywh): +def coordinates_of_segment(segment, parent_image, parent_coords): """Extract the coordinates of a PAGE segment element relative to its parent. Given... - ``segment``, a PAGE segment object in absolute coordinates (i.e. RegionType / TextLineType / WordType / GlyphType), and - ``parent_image``, the PIL.Image of its corresponding parent object - (i.e. PageType / RegionType / TextLineType / WordType), along with - - ``parent_xywh``, its absolute coordinates (bounding box), + (i.e. PageType / RegionType / TextLineType / WordType), (not used), + along with + - ``parent_coords``, its corresponding affine transformation, ...calculate the relative coordinates of the segment within the image. - That is: - 1. Shift all points from the offset of the parent. - 2. In case the parent image was rotated, rotate all points, - with the center of the image as origin. - 3. If ``parent_image`` is larger than indicated by ``parent_xywh`` in - width and height, (which only occurs when the parent was rotated), - then add to all points an offset of half the size difference. + That is, apply the given transform to the points annotated in ``segment``. + The transform encodes (recursively): + 1. Whenever ``parent_image`` or any of its parents was cropped, + all points must be shifted by the offset + (i.e. coordinate system gets translated by the upper left). + 2. Whenever ``parent_image`` or any of its parents was rotated, + all points must be rotated around the center of that image + (i.e. coordinate system gets translated by the center in + opposite direction, rotated purely, and translated back; + the latter involves an additional offset from the increase + in canvas size necessary to accomodate all points). Return the rounded numpy array of the resulting polygon. """ # get polygon: polygon = np.array(polygon_from_points(segment.get_Coords().points)) # apply affine transform: - polygon = transform_coordinates(polygon, parent_xywh['transform']) + polygon = transform_coordinates(polygon, parent_coords['transform']) return np.round(polygon).astype(np.int32) @contextlib.contextmanager @@ -308,7 +321,7 @@ def rotate_image(image, angle, fill='background', transparency=False): the original image according to ``fill``: - if ``background`` (the default), then use the median color of the image; - - otherwise use the given color, e.g. ``white`` or (255,255,255). + - otherwise use the given color, e.g. ``'white'`` or (255,255,255). Moreover, if ``transparency`` is true, then add an alpha channel fully opaque (i.e. everything outside the original image will be transparent for those that can interpret alpha channels). @@ -364,12 +377,13 @@ def image_from_polygon(image, polygon, fill='background', transparency=False): outside the polygon hull to a color according to ``fill``: - if ``background`` (the default), then use the median color of the image; - - otherwise use the given color, e.g. ``white`` or (255,255,255). + - otherwise use the given color, e.g. ``'white'`` or (255,255,255). Moreover, if ``transparency`` is true, then add an alpha channel from the polygon mask (i.e. everything outside the polygon will - be transparent for those that can interpret alpha channels). - (Images which already have an alpha channel will have them - shrinked from the polygon mask.) + be transparent, for those consumers that can interpret alpha channels). + Images which already have an alpha channel will have it shrinked + from the polygon mask (i.e. everything outside the polygon will + be transparent, in addition to existing transparent pixels). Return a new PIL.Image. """ From 359e153fb060177439e88255b94b4debf64d6505 Mon Sep 17 00:00:00 2001 From: bertsky Date: Mon, 7 Oct 2019 23:29:28 +0200 Subject: [PATCH 10/13] image_from_page: integer instead of float polygon --- ocrd/ocrd/workspace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 830c78929..a0dd15540 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -322,7 +322,7 @@ def image_from_page(self, page, page_id, log.debug("Using explicitly set page border '%s' for page '%s'", page_points, page_id) # get polygon outline of page border: - page_polygon = np.array(polygon_from_points(page_points)) + page_polygon = np.array(polygon_from_points(page_points), dtype=np.int32) page_bbox = bbox_from_polygon(page_polygon) # subtract offset in affine coordinate transform: # (consistent with image cropping or AlternativeImage below) From 217e38a41f7edf11d9737458febb67d16554cc8f Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 11 Oct 2019 15:36:46 +0200 Subject: [PATCH 11/13] incorporate transposition (image features rotated-90/180/270): MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - image_from_page / image_from_segment: when applying @orientation to the image (or at least applying corresponding operations to the affine coordinate transform), always try to split the angle into orientation (multiples of 90°) and the remaining skew (both positive and negative, i.e. split symmetrically around -45°/45°); now apply skew via rotation, but orientation via transposition (i.e. a combination of reflection and 90° rotation operations, which may also swap width with height) – describing the latter via image features `rotated-90`, `rotated-180` and `rotated-270`, as required by the spec - adjust_canvas_to_transposition, transpose_coordinates, transpose_image: encapsulate and document all possible PIL.Image transposition methods --- ocrd/ocrd/workspace.py | 163 +++++++++++++++++++++++++----- ocrd_utils/ocrd_utils/__init__.py | 141 ++++++++++++++++++++++++-- 2 files changed, 269 insertions(+), 35 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index a0dd15540..7d2a1d3bd 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -14,11 +14,14 @@ image_from_polygon, coordinates_of_segment, transform_coordinates, + adjust_canvas_to_rotation, + adjust_canvas_to_transposition, shift_coordinates, rotate_coordinates, - adjust_canvas_to_rotation, - rotate_image, + transpose_coordinates, crop_image, + rotate_image, + transpose_image, bbox_from_polygon, polygon_from_points, xywh_from_bbox, @@ -279,7 +282,9 @@ def image_from_page(self, page, page_id, a Border exists, and unless "cropped" is being filtered, then crop it. Likewise, if the chosen image does not have the feature "deskewed" yet, but an @orientation angle is annotated, and unless "deskewed" is being - filtered, then rotate it. + filtered, then rotate it. (However, if @orientation is above the + [-45°,45°] interval, then apply as much transposition as possible first, + unless "rotated-90" / "rotated-180" / "rotated-270" is being filtered.) Cropping uses a polygon mask (not just the bounding box rectangle). Areas outside the polygon will be filled according to ``fill``: @@ -298,7 +303,7 @@ def image_from_page(self, page, page_id, converts from absolute coordinates to those relative to the image, i.e. after cropping to the page's border / bounding box (if any) and deskewing with the page's orientation angle (if any) - - ``angle``: the page-level rotation angle applied to the image, + - ``angle``: the rotation/reflection angle applied to the image so far, - ``features``: the AlternativeImage @comments for the image, i.e. names of all operations that lead up to this result, * an OcrdExif instance associated with the original image. @@ -344,15 +349,42 @@ def image_from_page(self, page, page_id, # page angle: PAGE @orientation is defined clockwise, # whereas PIL/ndimage rotation is in mathematical direction: page_coords['angle'] = -(page.get_orientation() or 0) - if (page_coords['angle'] and + # map angle from (-180,180] to [0,360], and partition into multiples of 90; + # but avoid unnecessary large remainders, i.e. split symmetrically: + orientation = (page_coords['angle'] + 45) % 360 + orientation = orientation - (orientation % 90) + skew = (page_coords['angle'] % 360) - orientation + skew = 180 - (180 - skew) % 360 # map to [-45,45] + page_coords['angle'] = 0 # nothing applied yet (depends on filters) + log.debug("page '%s' has orientation=%d skew=%.2f", + page_id, orientation, skew) + + if (orientation and + not 'rotated-%d' % orientation in feature_filter.split(',')): + # Transpose in affine coordinate transform: + # (consistent with image transposition or AlternativeImage below) + transposition = { 90: Image.ROTATE_90, + 180: Image.ROTATE_180, + 270: Image.ROTATE_270 + }.get(orientation) # no default + page_coords['transform'] = transpose_coordinates( + page_coords['transform'], + transposition, + np.array([0.5 * page_xywh['w'], + 0.5 * page_xywh['h']])) + page_xywh['w'], page_xywh['h'] = adjust_canvas_to_transposition( + [page_xywh['w'], page_xywh['h']], transposition) + page_coords['angle'] = orientation + if (skew and not 'deskewed' in feature_filter.split(',')): # Rotate around center in affine coordinate transform: # (consistent with image rotation or AlternativeImage below) page_coords['transform'] = rotate_coordinates( page_coords['transform'], - page_coords['angle'], + skew, np.array([0.5 * page_xywh['w'], 0.5 * page_xywh['h']])) + page_coords['angle'] += skew # initialize AlternativeImage@comments classes as empty: page_coords['features'] = '' @@ -397,20 +429,43 @@ def image_from_page(self, page, page_id, # recrop into page rectangle: page_image = crop_image(page_image, box=page_bbox) page_coords['features'] += ',cropped' + # transpose, if (still) necessary: + if (orientation and + not 'rotated-%d' % orientation in page_coords['features'] and + not 'rotated-%d' % orientation in feature_filter.split(',')): + log.info("Transposing %s for page '%s' by %d°", + "AlternativeImage" if alternative_image else + "image", page_id, orientation) + page_image = transpose_image(page_image, { + 90: Image.ROTATE_90, + 180: Image.ROTATE_180, + 270: Image.ROTATE_270 + }.get(orientation)) # no default + page_coords['features'] += ',rotated-%d' % orientation + if (orientation and + not 'rotated-%d' % orientation in feature_filter.split(',')): + # FIXME we should enforce consistency here (i.e. split into transposition + # and minimal rotation) + if not (page_image.width == page_xywh['w'] and + page_image.height == page_xywh['h']): + log.error('page "%s" image (%s; %dx%d) has not been transposed properly (%dx%d) during rotation', + page_id, page_coords['features'], + page_image.width, page_image.height, + page_xywh['w'], page_xywh['h']) # deskew, if (still) necessary: - if (page_coords['angle'] and + if (skew and not 'deskewed' in page_coords['features'] and not 'deskewed' in feature_filter.split(',')): log.info("Rotating %s for page '%s' by %.2f°", "AlternativeImage" if alternative_image else - "image", page_id, page_coords['angle']) - page_image = rotate_image(page_image, page_coords['angle'], + "image", page_id, skew) + page_image = rotate_image(page_image, skew, fill=fill, transparency=transparency) page_coords['features'] += ',deskewed' - if (page_coords['angle'] and + if (skew and not 'deskewed' in feature_filter.split(',')): w_new, h_new = adjust_canvas_to_rotation( - [page_xywh['w'], page_xywh['h']], page_coords['angle']) + [page_xywh['w'], page_xywh['h']], skew) # FIXME we should enforce consistency here (i.e. rotation always reshapes, # and rescaling never happens) if not (w_new - 1.5 < page_image.width < w_new + 1.5 and @@ -445,7 +500,7 @@ def image_from_segment(self, segment, parent_image, parent_coords, - ``transform``: a Numpy array with an affine transform which converts from absolute coordinates to those relative to the image, i.e. after applying all operations (starting with the original image) - - ``angle``: the parent-level rotation angle applied to the image, + - ``angle``: the rotation/reflection angle applied to the image so far, - ``features``: the AlternativeImage @comments for the image, i.e. names of all operations that lead up to this result, and * ``segment``, a PAGE segment object logically contained in it @@ -482,7 +537,10 @@ def image_from_segment(self, segment, parent_image, parent_coords, Regardless, if any @orientation angle is annotated for the segment (from segment-level deskewing), and the chosen image does not have the feature "deskewed" yet, and unless "deskewed" is being filtered, - then rotate it. + then rotate it - compensating for any previous ``angle``. (However, + if @orientation is above the [-45°,45°] interval, then apply as much + transposition as possible first, unless "rotated-90" / "rotated-180" / + "rotated-270" is being filtered.) Return a tuple: * the extracted image, @@ -492,7 +550,7 @@ def image_from_segment(self, segment, parent_image, parent_coords, i.e. after applying all parent operations, and then cropping to the segment's bounding box, and deskewing with the segment's orientation angle (if any) - - ``angle``: the segment-level rotation angle applied to the image, + - ``angle``: the rotation/reflection angle applied to the image so far, - ``features``: the AlternativeImage @comments for the image, i.e. names of all operations that lead up to this result. (These can be used to create a new AlternativeImage, or passed down @@ -548,17 +606,49 @@ def image_from_segment(self, segment, parent_image, parent_coords, segment_coords['angle'] = -(segment.get_orientation() or 0) else: segment_coords['angle'] = 0 - if (segment_coords['angle'] and + if segment_coords['angle']: + # @orientation is always absolute; if higher levels + # have already rotated, then we must compensate: + angle = segment_coords['angle'] - parent_coords['angle'] + # map angle from (-180,180] to [0,360], and partition into multiples of 90; + # but avoid unnecessary large remainders, i.e. split symmetrically: + orientation = (angle + 45) % 360 + orientation = orientation - (orientation % 90) + skew = (angle % 360) - orientation + skew = 180 - (180 - skew) % 360 # map to [-45,45] + segment_coords['angle'] = parent_coords['angle'] # nothing applied yet (depends on filters) + log.debug("segment '%s' has orientation=%d skew=%.2f", + segment.id, orientation, skew) + else: + orientation = 0 + skew = 0 + + if (orientation and + not 'rotated-%d' % orientation in feature_filter.split(',')): + # Transpose in affine coordinate transform: + # (consistent with image transposition or AlternativeImage below) + transposition = { 90: Image.ROTATE_90, + 180: Image.ROTATE_180, + 270: Image.ROTATE_270 + }.get(orientation) # no default + segment_coords['transform'] = transpose_coordinates( + segment_coords['transform'], + transposition, + np.array([0.5 * segment_xywh['w'], + 0.5 * segment_xywh['h']])) + segment_xywh['w'], segment_xywh['h'] = adjust_canvas_to_transposition( + [segment_xywh['w'], segment_xywh['h']], transposition) + segment_coords['angle'] = orientation + if (skew and not 'deskewed' in feature_filter.split(',')): # Rotate around center in affine coordinate transform: # (consistent with image rotation or AlternativeImage below) segment_coords['transform'] = rotate_coordinates( segment_coords['transform'], - # @orientation is always absolute; if higher levels - # have already rotated, then we must compensate: - segment_coords['angle'] - parent_coords['angle'], + skew, np.array([0.5 * segment_xywh['w'], 0.5 * segment_xywh['h']])) + segment_coords['angle'] += skew # initialize AlternativeImage@comments classes from parent: segment_coords['features'] = parent_coords['features'] + ',cropped' @@ -589,24 +679,45 @@ def image_from_segment(self, segment, parent_image, parent_coords, features, segment.id) segment_image = self._resolve_image_as_pil(alternative_image.get_filename()) segment_coords['features'] = features + # transpose, if (still) necessary: + if (orientation and + not 'rotated-%d' % orientation in segment_coords['features'] and + not 'rotated-%d' % orientation in feature_filter.split(',')): + log.info("Transposing %s for segment '%s' by %d°", + "AlternativeImage" if alternative_image else + "image", segment.id, orientation) + segment_image = transpose_image(segment_image, { + 90: Image.ROTATE_90, + 180: Image.ROTATE_180, + 270: Image.ROTATE_270 + }.get(orientation)) # no default + segment_coords['features'] += ',rotated-%d' % orientation + if (orientation and + not 'rotated-%d' % orientation in feature_filter.split(',')): + # FIXME we should enforce consistency here (i.e. split into transposition + # and minimal rotation) + if not (segment_image.width == segment_xywh['w'] and + segment_image.height == segment_xywh['h']): + log.error('segment "%s" image (%s; %dx%d) has not been transposed properly (%dx%d) during rotation', + segment.id, segment_coords['features'], + segment_image.width, segment_image.height, + segment_xywh['w'], segment_xywh['h']) # deskew, if (still) necessary: - if (segment_coords['angle'] and + if (skew and not 'deskewed' in segment_coords['features'] and not 'deskewed' in feature_filter.split(',')): - log.info("Rotating %s for segment '%s' by %.2f°-%.2f°", + log.info("Rotating %s for segment '%s' by %.2f°", "AlternativeImage" if alternative_image else - "image", segment.id, segment_coords['angle'], parent_coords['angle']) - # @orientation is always absolute; if higher levels - # have already rotated, then we must compensate: - segment_image = rotate_image(segment_image, segment_coords['angle'] - parent_coords['angle'], + "image", segment.id, skew) + segment_image = rotate_image(segment_image, skew, fill=fill, transparency=transparency) segment_coords['features'] += ',deskewed' - if (segment_coords['angle'] and + if (skew and not 'deskewed' in feature_filter.split(',')): # FIXME we should enforce consistency here (i.e. rotation always reshapes, # and rescaling never happens) w_new, h_new = adjust_canvas_to_rotation( - [segment_xywh['w'], segment_xywh['h']], segment_coords['angle']) + [segment_xywh['w'], segment_xywh['h']], skew) if not (w_new - 1.5 < segment_image.width < w_new + 1.5 and h_new - 1.5 < segment_image.height < h_new + 1.5): log.error('segment "%s" image (%s; %dx%d) has not been reshaped properly (%dx%d) during rotation', diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 5cc907052..d29b020ed 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -9,18 +9,18 @@ This includes rotation and offset correction, based on affine transformations. (Used by ``Workspace`` methods ``image_from_page`` and ``image_from_segment``) -* ``rotate_coordinates``, ``shift_coordinates``, ``transform_coordinates`` +* ``rotate_coordinates``, ``shift_coordinates``, ``transpose_coordinates``, ``transform_coordinates`` - These backend functions compose affine transformations for rotation and offset - correction of coordinates, or apply them to a set of points. They can be used - to pass down the coordinate system along with images (both invariably sharing + These backend functions compose affine transformations for reflection, rotation + and offset correction of coordinates, or apply them to a set of points. They can be + used to pass down the coordinate system along with images (both invariably sharing the same operations context) when traversing the element hierarchy top to bottom. (Used by ``Workspace`` methods ``image_from_page`` and ``image_from_segment``). -* ``crop_image``, ``rotate_image`` +* ``rotate_image``, ``crop_image``, ``transpose_image`` - These PIL.Image functions are safe replacements for the ``crop`` and ``rotate`` - methods. + These PIL.Image functions are safe replacements for the ``rotate``, ``crop``, and + ``transpose`` methods. * ``image_from_polygon``, ``polygon_mask`` @@ -67,6 +67,7 @@ __all__ = [ 'abspath', 'adjust_canvas_to_rotation', + 'adjust_canvas_to_transposition', 'bbox_from_points', 'bbox_from_xywh', 'bbox_from_polygon', @@ -100,6 +101,8 @@ 'setOverrideLogLevel', 'shift_coordinates', 'transform_coordinates', + 'transpose_coordinates', + 'transpose_image', 'unzip_file_to_dir', 'xywh_from_bbox', 'xywh_from_points', @@ -305,6 +308,7 @@ def crop_image(image, box=None): # (It should be invalid in PAGE-XML to extend beyond parents.) LOG.warning('crop coordinates (%s) exceed image (%dx%d)', str(box), image.width, image.height) + LOG.debug('cropping image to %s', str(box)) xywh = xywh_from_bbox(*box) background = ImageStat.Stat(image).median[0] new_image = Image.new(image.mode, (xywh['w'], xywh['h']), @@ -330,6 +334,7 @@ def rotate_image(image, angle, fill='background', transparency=False): Return a new PIL.Image. """ + LOG.debug('rotating image by %.2f°', angle) if fill == 'background': background = ImageStat.Stat(image).median[0] else: @@ -346,6 +351,44 @@ def rotate_image(image, angle, fill='background', transparency=False): fillcolor=background) return new_image +def transpose_image(image, method): + """"Transpose (i.e. flip or rotate in 90° multiples) an image. + + Given a PIL.Image ``image`` and a transposition mode ``method``, + apply the respective operation: + - ``PIL.Image.FLIP_LEFT_RIGHT``: + all pixels get mirrored at half the width of the image + - ``PIL.Image.FLIP_TOP_BOTTOM``: + all pixels get mirrored at half the height of the image + - ``PIL.Image.ROTATE_180``: + all pixels get mirrored at both, the width and half the height + of the image, + i.e. the image gets rotated by 180° counter-clockwise + - ``PIL.Image.ROTATE_90``: + rows become columns (but counted from the right) and + columns become rows, + i.e. the image gets rotated by 90° counter-clockwise; + width becomes height and vice versa + - ``PIL.Image.ROTATE_270``: + rows become columns and + columns become rows (but counted from the bottom), + i.e. the image gets rotated by 270° counter-clockwise; + width becomes height and vice versa + - ``PIL.Image.TRANSPOSE``: + rows become columns and vice versa, + i.e. all pixels get mirrored at the main diagonal; + width becomes height and vice versa + - ``PIL.Image.TRANSVERSE``: + rows become columns (but counted from the right) and + columns become rows (but counted from the bottom), + i.e. all pixels get mirrored at the opposite diagonal; + width becomes height and vice versa + + Return a new PIL.Image. + """ + LOG.debug('transposing image with %s', membername(Image, method)) + return image.transpose(method) + def get_local_filename(url, start=None): """ Return local filename, optionally relative to ``start`` @@ -534,7 +577,23 @@ def adjust_canvas_to_rotation(size, angle): return np.dot(np.array([[cos, sin], [sin, cos]]), np.array(size)) + +def adjust_canvas_to_transposition(size, method): + """Calculate the flipped image size after transposition. + Given a numpy array ``size`` of an original canvas (width and height), + and a transposition mode ``method`` (see ``transpose_image``), + calculate the new size after transposition. + + Return a numpy array of the enlarged width and height. + """ + if method in [Image.ROTATE_90, + Image.ROTATE_270, + Image.TRANSPOSE, + Image.TRANSVERSE]: + size = size[::-1] + return size + def rotate_coordinates(transform, angle, orig=np.array([0, 0])): """Compose an affine coordinate transformation with a passive rotation. @@ -550,7 +609,7 @@ def rotate_coordinates(transform, angle, orig=np.array([0, 0])): Return a numpy array of the resulting affine transformation matrix. """ - LOG.debug('rotating by %.2f° around %s', angle, str(orig)) + LOG.debug('rotating coordinates by %.2f° around %s', angle, str(orig)) rad = np.deg2rad(angle) cos = np.cos(rad) sin = np.sin(rad) @@ -578,12 +637,76 @@ def shift_coordinates(transform, offset): Return a numpy array of the resulting affine transformation matrix. """ - LOG.debug('shifting by %s', str(offset)) + LOG.debug('shifting coordinates by %s', str(offset)) shift = np.eye(3) shift[0, 2] = offset[0] shift[1, 2] = offset[1] return np.dot(shift, transform) +def transpose_coordinates(transform, method, orig=np.array([0, 0])): + """"Compose an affine coordinate transformation with a transposition (i.e. flip or rotate in 90° multiples). + + Given a numpy array ``transform`` of an existing transformation + matrix in homogeneous (3d) coordinates, a transposition mode ``method``, + as well as a numpy array ``orig`` of the center of the image, + calculate the affine coordinate transform corresponding to the composition + of both transformations, which is respectively: + - ``PIL.Image.FLIP_LEFT_RIGHT``: + entails translation to the center, followed by pure reflection + about the y-axis, and subsequent translation back + - ``PIL.Image.FLIP_TOP_BOTTOM``: + entails translation to the center, followed by pure reflection + about the x-axis, and subsequent translation back + - ``PIL.Image.ROTATE_180``: + entails translation to the center, followed by pure reflection + about the origin, and subsequent translation back + - ``PIL.Image.ROTATE_90``: + entails translation to the center, followed by pure rotation + by 90° counter-clockwise, and subsequent translation back + - ``PIL.Image.ROTATE_270``: + entails translation to the center, followed by pure rotation + by 270° counter-clockwise, and subsequent translation back + - ``PIL.Image.TRANSPOSE``: + entails translation to the center, followed by pure rotation + by 90° counter-clockwise and pure reflection about the x-axis, + and subsequent translation back + - ``PIL.Image.TRANSVERSE``: + entails translation to the center, followed by pure rotation + by 90° counter-clockwise and pure reflection about the y-axis, + and subsequent translation back + + Return a numpy array of the resulting affine transformation matrix. + """ + LOG.debug('transposing coordinates with %s around %s', membername(Image, method), str(orig)) + # get rotation matrix for passive rotation/reflection: + rot90 = np.array([[0, 1, 0], + [-1, 0, 0], + [0, 0, 1]]) + reflx = np.array([[1, 0, 0], + [0, -1, 0], + [0, 0, 1]]) + refly = np.array([[-1, 0, 0], + [0, 1, 0], + [0, 0, 1]]) + transform = shift_coordinates(transform, -orig) + operations = { + Image.FLIP_LEFT_RIGHT: [refly], + Image.FLIP_TOP_BOTTOM: [reflx], + Image.ROTATE_180: [reflx, refly], + Image.ROTATE_90: [rot90], + Image.ROTATE_270: [rot90, reflx, refly], + Image.TRANSPOSE: [rot90, reflx], + Image.TRANSVERSE: [rot90, refly] + }.get(method) # no default + for operation in operations: + transform = np.dot(operation, transform) + transform = shift_coordinates( + transform, + # the image (bounding box) may flip with transposition, + # so we must translate back to the new upper left: + adjust_canvas_to_transposition(orig, method)) + return transform + def transform_coordinates(polygon, transform=None): """Apply an affine transformation to a set of points. From a4ede35b7fb0343b0c170b3b78e47aff7adbf3f3 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 15 Oct 2019 17:22:50 +0200 Subject: [PATCH 12/13] fix fill/transparency: - for fillcolor background estimation, use median of all channels (but make sure to set alpha value to zero/fully transparent) - workaround for Pillow #1600 (creating black fill when processing images in LA mode) --- ocrd_utils/ocrd_utils/__init__.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index d29b020ed..fe8fd87c7 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -310,7 +310,7 @@ def crop_image(image, box=None): str(box), image.width, image.height) LOG.debug('cropping image to %s', str(box)) xywh = xywh_from_bbox(*box) - background = ImageStat.Stat(image).median[0] + background = tuple(ImageStat.Stat(image).median) new_image = Image.new(image.mode, (xywh['w'], xywh['h']), background) # or 'white' new_image.paste(image, (-xywh['x'], -xywh['y'])) @@ -335,20 +335,30 @@ def rotate_image(image, angle, fill='background', transparency=False): Return a new PIL.Image. """ LOG.debug('rotating image by %.2f°', angle) - if fill == 'background': - background = ImageStat.Stat(image).median[0] - else: - background = fill if transparency and image.mode in ['RGB', 'L']: # ensure no information is lost by adding transparency channel # initialized to fully opaque (so cropping and rotation will # expose areas as transparent): image = image.copy() image.putalpha(255) + if fill == 'background': + background = ImageStat.Stat(image).median + if image.mode in ['RGBA', 'LA']: + background[-1] = 0 # fully transparent + background = tuple(background) + else: + background = fill new_image = image.rotate(angle, expand=True, #resample=Image.BILINEAR, fillcolor=background) + if new_image.mode in ['LA']: + # workaround for #1600 (bug in LA support which + # causes areas fully transparent before rotation + # to be filled with black here): + image = new_image + new_image = Image.new(image.mode, image.size, background) + new_image.paste(image, mask=image.getchannel('A')) return new_image def transpose_image(image, method): @@ -432,7 +442,7 @@ def image_from_polygon(image, polygon, fill='background', transparency=False): """ mask = polygon_mask(image, polygon) if fill == 'background': - background = ImageStat.Stat(image).median[0] + background = tuple(ImageStat.Stat(image).median) else: background = fill new_image = Image.new(image.mode, image.size, background) From 27140d83da7ee7e6ffb9a507c0ec67aee1f9f164 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 15 Oct 2019 17:27:27 +0200 Subject: [PATCH 13/13] fix multi-level deskewing: - angle already applied must be sum of angle applied on parent level and on current level - not all image features are monotonic - some do propagate through all hierarchy levels: binarized, grayscale_normalized, despeckled - some must be treated level-local (to make sense and allow for relative coordinate consistency): deskewed, rotated-90, rotated-180, rotated-270, dewarped --- ocrd/ocrd/workspace.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 7d2a1d3bd..facfe0df6 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -616,12 +616,12 @@ def image_from_segment(self, segment, parent_image, parent_coords, orientation = orientation - (orientation % 90) skew = (angle % 360) - orientation skew = 180 - (180 - skew) % 360 # map to [-45,45] - segment_coords['angle'] = parent_coords['angle'] # nothing applied yet (depends on filters) log.debug("segment '%s' has orientation=%d skew=%.2f", segment.id, orientation, skew) else: orientation = 0 skew = 0 + segment_coords['angle'] = parent_coords['angle'] # nothing applied yet (depends on filters) if (orientation and not 'rotated-%d' % orientation in feature_filter.split(',')): @@ -638,7 +638,7 @@ def image_from_segment(self, segment, parent_image, parent_coords, 0.5 * segment_xywh['h']])) segment_xywh['w'], segment_xywh['h'] = adjust_canvas_to_transposition( [segment_xywh['w'], segment_xywh['h']], transposition) - segment_coords['angle'] = orientation + segment_coords['angle'] += orientation if (skew and not 'deskewed' in feature_filter.split(',')): # Rotate around center in affine coordinate transform: @@ -650,9 +650,13 @@ def image_from_segment(self, segment, parent_image, parent_coords, 0.5 * segment_xywh['h']])) segment_coords['angle'] += skew - # initialize AlternativeImage@comments classes from parent: - segment_coords['features'] = parent_coords['features'] + ',cropped' - + # initialize AlternativeImage@comments classes from parent, except + # for those operations that can apply on multiple hierarchy levels: + segment_coords['features'] = ','.join( + [feature for feature in parent_coords['features'].split(',') + if feature in ['binarized', 'grayscale_normalized', + 'despeckled', 'dewarped']]) + alternative_image = None alternative_images = segment.get_AlternativeImage() if alternative_images: