Skip to content

Commit

Permalink
cropping/rotation: caller can opt out of transparency:
Browse files Browse the repository at this point in the history
- image_from_page, image_from_segment, image_from_polygon:
  add parameter ``fill``
- possible values white/background/transparent, with
  ``transparent`` (behaviour introduced by this branch)
  as default
  • Loading branch information
bertsky committed Sep 20, 2019
1 parent 440863f commit b49648d
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 49 deletions.
6 changes: 4 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

* image_from_page etc: allow filling with background or transparency

## [1.0.0b19] - 2019-09-10

* image_from_page: allow filtering by feature (@comment), #294
* image_from_page etc: allow filtering by feature (@comments), #294

## [1.0.0b18] - 2019-09-06

Expand All @@ -25,7 +27,7 @@ Fixed:
* Processor: `chdir` to workspace directory on init so relative files resolve properly
* typos in docstrings
* README: 'module' -> 'package'
* workspace.image_from_page: logic with rotation/angle
* workspace.image_from_page etc: logic with rotation/angle
* Adapted test suite to OCR-D/assets now with file extensions

Added:
Expand Down
72 changes: 43 additions & 29 deletions ocrd/ocrd/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def _resolve_image_as_pil(self, image_url, coords=None):
]
return Image.fromarray(region_cut)

def image_from_page(self, page, page_id, feature_selector='', feature_filter=''):
def image_from_page(self, page, page_id, fill='transparent', feature_selector='', feature_filter=''):
"""Extract a Page image from the workspace.
Given a PageType object, ``page``, extract its PIL.Image from
Expand All @@ -267,7 +267,15 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='')
chosen image does not have "deskewed", but an @orientation exists,
then rotate it (unless "deskewed" is also being filtered).
Cropping uses a polygon mask (not just the rectangle).
Cropping uses a polygon mask (not just the rectangle). Areas outside
the polygon (regardless of cropping and deskewing) will be filled
according to ``fill``:
- if ``background`` (the default), then fill with the median color
of the image;
- if ``white``, then fill with white;
- if ``transparent``, then add a transparency channel which is
fully opaque before cropping and rotating (thus only the exposed
areas will be transparent afterwards).
(Required and produced features need not be in the same order, so
``feature_selector`` is merely a mask specifying Boolean AND, and
Expand Down Expand Up @@ -350,7 +358,7 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='')
# get polygon outline of page border:
page_polygon = np.array(polygon_from_points(page_points))
# create a mask from the page polygon:
page_image = image_from_polygon(page_image, page_polygon)
page_image = image_from_polygon(page_image, page_polygon, fill=fill)
# recrop into page rectangle:
page_image = crop_image(page_image,
box=(page_xywh['x'],
Expand All @@ -365,21 +373,19 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='')
log.info("Rotating %s for page '%s' by %.2f°",
"AlternativeImage" if alternative_image else
"image", page_id, page_xywh['angle'])
if page_image.mode in ['RGB', 'L']:
# ensure no information is lost by adding transparency
# (which rotation will respect):
if fill == 'transparent' and page_image.mode in ['RGB', 'L']:
# ensure no information is lost by adding transparency channel
# initialized to fully opaque (so cropping and rotation will
# expose areas as transparent):
page_image.putalpha(255)
background = ImageStat.Stat(page_image).median[0]
if fill == 'background':
background = ImageStat.Stat(page_image).median[0]
else:
background = 'white'
page_image = page_image.rotate(page_xywh['angle'],
expand=True,
#resample=Image.BILINEAR,
fillcolor=(
# background detection by median can fail
# if segments are very small or have lots
# of image foreground; if we already know
# this is binarized, fill with white:
'white' if page_image.mode == '1' else
background))
fillcolor=background)
page_xywh['features'] += ',deskewed'
# verify constraints again:
if not all(feature in page_xywh['features']
Expand All @@ -395,9 +401,10 @@ def image_from_page(self, page, page_id, feature_selector='', feature_filter='')
# subtract offset from any increase in binary region size over source:
page_xywh['x'] -= round(0.5 * max(0, page_image.width - page_xywh['w']))
page_xywh['y'] -= round(0.5 * max(0, page_image.height - page_xywh['h']))
page_image.format = 'PNG' # workaround for tesserocr#194
return page_image, page_xywh, page_image_info

def image_from_segment(self, segment, parent_image, parent_xywh, feature_selector='', feature_filter=''):
def image_from_segment(self, segment, parent_image, parent_xywh, fill='transparent', feature_selector='', feature_filter=''):
"""Extract a segment image from its parent's image.
Given a PIL.Image of the parent, ``parent_image``, with its
Expand All @@ -421,8 +428,16 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto
the segment coordinates in an inverse transformation (i.e. translation
to center, passive rotation, re-translation).
Cropping uses a polygon mask (not just the rectangle).
Cropping uses a polygon mask (not just the rectangle). Areas outside
the polygon (regardless of cropping and deskewing) will be filled
according to ``fill``:
- if ``background`` (the default), then fill with the median color
of the image;
- if ``white``, then fill with white;
- if ``transparent``, then add a transparency channel which is
fully opaque before cropping and rotating (thus only the exposed
areas will be transparent afterwards).
(Required and produced features need not be in the same order, so
``feature_selector`` is merely a mask specifying Boolean AND, and
``feature_filter`` is merely a mask specifying Boolean OR.)
Expand Down Expand Up @@ -466,7 +481,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto
# get polygon outline of segment relative to parent image:
segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh)
# create a mask from the segment polygon:
segment_image = image_from_polygon(parent_image, segment_polygon)
segment_image = image_from_polygon(parent_image, segment_polygon, fill=fill)
# recrop into segment rectangle:
segment_image = crop_image(segment_image,
box=(segment_xywh['x'] - parent_xywh['x'],
Expand Down Expand Up @@ -514,21 +529,19 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto
log.info("Rotating %s for segment '%s' by %.2f°",
"AlternativeImage" if alternative_image else
"image", segment.id, segment_xywh['angle'])
if segment_image.mode in ['RGB', 'L']:
# ensure no information is lost by adding transparency
# (which rotation will respect):
if fill == 'transparent' and segment_image.mode in ['RGB', 'L']:
# ensure no information is lost by adding transparency channel
# initialized to fully opaque (so cropping and rotation will
# expose areas as transparent):
segment_image.putalpha(255)
background = ImageStat.Stat(segment_image).median[0]
if fill == 'background':
background = ImageStat.Stat(segment_image).median[0]
else:
background = 'white'
segment_image = segment_image.rotate(segment_xywh['angle'],
expand=True,
#resample=Image.BILINEAR,
fillcolor=(
# background detection by median can fail
# if segments are very small or have lots
# of image foreground; if we already know
# this is binarized, fill with white:
'white' if page_image.mode == '1' else
background))
fillcolor=background)
segment_xywh['features'] += ',deskewed'
# verify constraints again:
if not all(feature in segment_xywh['features']
Expand All @@ -546,6 +559,7 @@ def image_from_segment(self, segment, parent_image, parent_xywh, feature_selecto
segment_image.width - segment_xywh['w']))
segment_xywh['y'] -= round(0.5 * max(0,
segment_image.height - segment_xywh['h']))
segment_image.format = 'PNG' # workaround for tesserocr#194
return segment_image, segment_xywh

# pylint: disable=redefined-builtin
Expand Down
39 changes: 21 additions & 18 deletions ocrd_utils/ocrd_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,31 +301,34 @@ def get_local_filename(url, start=None):
url = url[len(start):]
return url


def image_from_polygon(image, polygon):
def image_from_polygon(image, polygon, fill='background'):
""""Mask an image with a polygon.
Given a PIL.Image ``image`` and a numpy array ``polygon``
of relative coordinates into the image, put everything
outside the polygon hull to the background. Since ``image``
is not necessarily binarized yet, determine the background
from the median color (instead of white).
of relative coordinates into the image, fill everything
outside the polygon hull to a color according to ``fill``:
- if ``background`` (the default), then use the median color
of the image;
- if ``white``, then use white;
- if ``transparent``, then add a transparency channel from
the polygon mask (i.e. everything outside the polygon will
be transparent).
Return a new PIL.Image.
"""
mask = polygon_mask(image, polygon)
# create a background image from its median color
# (in case it has not been binarized yet):
# array = np.asarray(image)
# background = np.median(array, axis=[0, 1], keepdims=True)
# array = np.broadcast_to(background.astype(np.uint8), array.shape)
background = ImageStat.Stat(image).median[0]
if fill == 'transparent' and image.mode in ['RGB', 'L']:
# ensure no information is lost by adding transparency channel
# initialized to fully transparent outside the mask
# (so consumers do not have to rely on background estimation):
new_image = image.copy()
new_image.putalpha(mask)
return new_image
if fill == 'background':
background = ImageStat.Stat(image).median[0]
else:
background = 'white'
new_image = Image.new(image.mode, image.size, background)
if image.mode in ['RGB', 'L']:
# ensure no information is lost by adding transparency
# (so we do not have to rely on background estimation):
image.putalpha(mask)
return image
new_image.paste(image, mask=mask)
return new_image

Expand Down

0 comments on commit b49648d

Please sign in to comment.