cropping/rotation: caller can opt out of transparency:

- image_from_page, image_from_segment, image_from_polygon: add parameter ``fill`` - possible values white/background/transparent, with ``transparent`` (behaviour introduced by this branch) as default
OCR-D · Sep 20, 2019 · b49648d · b49648d
1 parent 440863f
commit b49648d
Show file tree

Hide file tree

Showing 3 changed files with 68 additions and 49 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,9 +5,11 @@ Versioned according to [Semantic Versioning](http://semver.org/).
 
 ## Unreleased
 
+* image_from_page etc: allow filling with background or transparency
+
 ## [1.0.0b19] - 2019-09-10
 
-* image_from_page: allow filtering by feature (@comment), #294
+* image_from_page etc: allow filtering by feature (@comments), #294
 
 ## [1.0.0b18] - 2019-09-06
 
@@ -25,7 +27,7 @@ Fixed:
   * Processor: `chdir` to workspace directory on init so relative files resolve properly
   * typos in docstrings
   * README: 'module' -> 'package'
-  * workspace.image_from_page: logic with rotation/angle
+  * workspace.image_from_page etc: logic with rotation/angle
   * Adapted test suite to OCR-D/assets now with file extensions
 
 Added:

diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py
@@ -249,7 +249,7 @@ def _resolve_image_as_pil(self, image_url, coords=None):
         ]
         return Image.fromarray(region_cut)
 
-    def image_from_page(self, page, page_id, feature_selector='', feature_filter=''):
+    def image_from_page(self, page, page_id, fill='transparent', feature_selector='', feature_filter=''):
         """Extract a Page image from the workspace.
 
         Given a PageType object, ``page``, extract its PIL.Image from
@@ -267,7 +267,15 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='')
         chosen image does not have "deskewed", but an @orientation exists,
         then rotate it (unless "deskewed" is also being filtered).
 
-        Cropping uses a polygon mask (not just the rectangle).
+        Cropping uses a polygon mask (not just the rectangle). Areas outside
+        the polygon (regardless of cropping and deskewing) will be filled
+        according to ``fill``:
+        - if ``background`` (the default), then fill with the median color
+          of the image;
+        - if ``white``, then fill with white;
+        - if ``transparent``, then add a transparency channel which is
+          fully opaque before cropping and rotating (thus only the exposed
+          areas will be transparent afterwards).
 
         (Required and produced features need not be in the same order, so
         ``feature_selector`` is merely a mask specifying Boolean AND, and
@@ -350,7 +358,7 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='')
             # get polygon outline of page border:
             page_polygon = np.array(polygon_from_points(page_points))
             # create a mask from the page polygon:
-            page_image = image_from_polygon(page_image, page_polygon)
+            page_image = image_from_polygon(page_image, page_polygon, fill=fill)
             # recrop into page rectangle:
             page_image = crop_image(page_image,
                                     box=(page_xywh['x'],
@@ -365,21 +373,19 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='')
             log.info("Rotating %s for page '%s' by %.2f°",
                      "AlternativeImage" if alternative_image else
                      "image", page_id, page_xywh['angle'])
-            if page_image.mode in ['RGB', 'L']:
-                # ensure no information is lost by adding transparency
-                # (which rotation will respect):
+            if fill == 'transparent' and page_image.mode in ['RGB', 'L']:
+                # ensure no information is lost by adding transparency channel
+                # initialized to fully opaque (so cropping and rotation will
+                # expose areas as transparent):
                 page_image.putalpha(255)
-            background = ImageStat.Stat(page_image).median[0]
+            if fill == 'background':
+                background = ImageStat.Stat(page_image).median[0]
+            else:
+                background = 'white'
             page_image = page_image.rotate(page_xywh['angle'],
                                            expand=True,
                                            #resample=Image.BILINEAR,
-                                           fillcolor=(
-                                               # background detection by median can fail
-                                               # if segments are very small or have lots
-                                               # of image foreground; if we already know
-                                               # this is binarized, fill with white:
-                                               'white' if page_image.mode == '1' else
-                                               background))
+                                           fillcolor=background)
             page_xywh['features'] += ',deskewed'
         # verify constraints again:
         if not all(feature in page_xywh['features']
@@ -395,9 +401,10 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='')
         # subtract offset from any increase in binary region size over source:
         page_xywh['x'] -= round(0.5 * max(0, page_image.width  - page_xywh['w']))
         page_xywh['y'] -= round(0.5 * max(0, page_image.height - page_xywh['h']))
+        page_image.format = 'PNG' # workaround for tesserocr#194
         return page_image, page_xywh, page_image_info
 
-    def image_from_segment(self, segment, parent_image, parent_xywh, feature_selector='', feature_filter=''):
+    def image_from_segment(self, segment, parent_image, parent_xywh, fill='transparent', feature_selector='', feature_filter=''):
         """Extract a segment image from its parent's image.
 
         Given a PIL.Image of the parent, ``parent_image``, with its
@@ -421,8 +428,16 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto
         the segment coordinates in an inverse transformation (i.e. translation
         to center, passive rotation, re-translation).
 
-        Cropping uses a polygon mask (not just the rectangle).
-
+        Cropping uses a polygon mask (not just the rectangle). Areas outside
+        the polygon (regardless of cropping and deskewing) will be filled
+        according to ``fill``:
+        - if ``background`` (the default), then fill with the median color
+          of the image;
+        - if ``white``, then fill with white;
+        - if ``transparent``, then add a transparency channel which is
+          fully opaque before cropping and rotating (thus only the exposed
+          areas will be transparent afterwards).
+        
         (Required and produced features need not be in the same order, so
         ``feature_selector`` is merely a mask specifying Boolean AND, and
         ``feature_filter`` is merely a mask specifying Boolean OR.)
@@ -466,7 +481,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto
         # get polygon outline of segment relative to parent image:
         segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh)
         # create a mask from the segment polygon:
-        segment_image = image_from_polygon(parent_image, segment_polygon)
+        segment_image = image_from_polygon(parent_image, segment_polygon, fill=fill)
         # recrop into segment rectangle:
         segment_image = crop_image(segment_image,
                                    box=(segment_xywh['x'] - parent_xywh['x'],
@@ -514,21 +529,19 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto
             log.info("Rotating %s for segment '%s' by %.2f°",
                      "AlternativeImage" if alternative_image else
                      "image", segment.id, segment_xywh['angle'])
-            if segment_image.mode in ['RGB', 'L']:
-                # ensure no information is lost by adding transparency
-                # (which rotation will respect):
+            if fill == 'transparent' and segment_image.mode in ['RGB', 'L']:
+                # ensure no information is lost by adding transparency channel
+                # initialized to fully opaque (so cropping and rotation will
+                # expose areas as transparent):
                 segment_image.putalpha(255)
-            background = ImageStat.Stat(segment_image).median[0]
+            if fill == 'background':
+                background = ImageStat.Stat(segment_image).median[0]
+            else:
+                background = 'white'
             segment_image = segment_image.rotate(segment_xywh['angle'],
                                                  expand=True,
                                                  #resample=Image.BILINEAR,
-                                                 fillcolor=(
-                                                     # background detection by median can fail
-                                                     # if segments are very small or have lots
-                                                     # of image foreground; if we already know
-                                                     # this is binarized, fill with white:
-                                                     'white' if page_image.mode == '1' else
-                                                     background))
+                                                 fillcolor=background)
             segment_xywh['features'] += ',deskewed'
         # verify constraints again:
         if not all(feature in segment_xywh['features']
@@ -546,6 +559,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto
                                              segment_image.width - segment_xywh['w']))
         segment_xywh['y'] -= round(0.5 * max(0,
                                              segment_image.height - segment_xywh['h']))
+        segment_image.format = 'PNG' # workaround for tesserocr#194
         return segment_image, segment_xywh
 
     # pylint: disable=redefined-builtin

diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py
@@ -301,31 +301,34 @@ def get_local_filename(url, start=None):
         url = url[len(start):]
     return url
 
-
-def image_from_polygon(image, polygon):
+def image_from_polygon(image, polygon, fill='background'):
     """"Mask an image with a polygon.
 
     Given a PIL.Image ``image`` and a numpy array ``polygon``
-    of relative coordinates into the image, put everything
-    outside the polygon hull to the background. Since ``image``
-    is not necessarily binarized yet, determine the background
-    from the median color (instead of white).
-
+    of relative coordinates into the image, fill everything
+    outside the polygon hull to a color according to ``fill``:
+    - if ``background`` (the default), then use the median color
+      of the image;
+    - if ``white``, then use white;
+    - if ``transparent``, then add a transparency channel from
+      the polygon mask (i.e. everything outside the polygon will
+      be transparent).
+    
     Return a new PIL.Image.
     """
     mask = polygon_mask(image, polygon)
-    # create a background image from its median color
-    # (in case it has not been binarized yet):
-    # array = np.asarray(image)
-    # background = np.median(array, axis=[0, 1], keepdims=True)
-    # array = np.broadcast_to(background.astype(np.uint8), array.shape)
-    background = ImageStat.Stat(image).median[0]
+    if fill == 'transparent' and image.mode in ['RGB', 'L']:
+        # ensure no information is lost by adding transparency channel
+        # initialized to fully transparent outside the mask
+        # (so consumers do not have to rely on background estimation):
+        new_image = image.copy()
+        new_image.putalpha(mask)
+        return new_image
+    if fill == 'background':
+        background = ImageStat.Stat(image).median[0]
+    else:
+        background = 'white'
     new_image = Image.new(image.mode, image.size, background)
-    if image.mode in ['RGB', 'L']:
-        # ensure no information is lost by adding transparency
-        # (so we do not have to rely on background estimation):
-        image.putalpha(mask)
-        return image
     new_image.paste(image, mask=mask)
     return new_image