Skip to content

Commit

Permalink
[WIP] Add deskewing per tesseract
Browse files Browse the repository at this point in the history
With the help of `OSD`, tesseract determines the skew angle for
images. The wrapper applies this to pages and regions. It is
not clear yet how to save the estimated skew angle in PAGE
XML. Cf. PRImA-Research-Lab/PAGE-XML#9
  • Loading branch information
wrznr committed Apr 17, 2019
1 parent 098a681 commit 52c2a63
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 4 deletions.
1 change: 1 addition & 0 deletions ocrd_tesserocr/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .recognize import TesserocrRecognize
from .segment_line import TesserocrSegmentLine
from .segment_region import TesserocrSegmentRegion
from .deskew import TesserocrDeskew
6 changes: 6 additions & 0 deletions ocrd_tesserocr/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from ocrd_tesserocr.segment_region import TesserocrSegmentRegion
from ocrd_tesserocr.segment_line import TesserocrSegmentLine
from ocrd_tesserocr.segment_word import TesserocrSegmentWord
from ocrd_tesserocr.deskew import TesserocrDeskew

@click.command()
@ocrd_cli_options
Expand All @@ -25,3 +26,8 @@ def ocrd_tesserocr_segment_word(*args, **kwargs):
@ocrd_cli_options
def ocrd_tesserocr_recognize(*args, **kwargs):
return ocrd_cli_wrap_processor(TesserocrRecognize, *args, **kwargs)

@click.command()
@ocrd_cli_options
def ocrd_tesserocr_deskew(*args, **kwargs):
return ocrd_cli_wrap_processor(TesserocrDeskew, *args, **kwargs)
91 changes: 91 additions & 0 deletions ocrd_tesserocr/deskew.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from __future__ import absolute_import

import locale

# pylint: disable=wrong-import-position
locale.setlocale(locale.LC_ALL, 'C') # circumvent tesseract-ocr issue 1670 (which cannot be done on command line because Click requires an UTF-8 locale in Python 3)

from tesserocr import RIL, PSM, PyTessBaseAPI

from ocrd_utils import getLogger, concat_padded, xywh_from_points, points_from_x0y0x1y1, MIMETYPE_PAGE
from ocrd_modelfactory import page_from_file
from ocrd_models.ocrd_page import (
CoordsType,
GlyphType,
LabelType,
LabelsType,
MetadataItemType,
TextEquivType,
TextStyleType,

to_xml
)
from ocrd import Processor
from .config import TESSDATA_PREFIX, OCRD_TOOL

log = getLogger('processor.TesserocrDeskew')

class TesserocrDeskew(Processor):

def __init__(self, *args, **kwargs):
kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-tesserocr-deskew']
kwargs['version'] = OCRD_TOOL['version']
super(TesserocrDeskew, self).__init__(*args, **kwargs)

def process(self):
"""
Performs the deskewing.
"""
# print(self.parameter)
oplevel = self.parameter['operation_level']
with PyTessBaseAPI(path=TESSDATA_PREFIX, psm=PSM.AUTO_OSD) as tessapi:
for (n, input_file) in enumerate(self.input_files):
log.info("INPUT FILE %i / %s", n, input_file)
pcgts = page_from_file(self.workspace.download_file(input_file))
pil_image = self.workspace.resolve_image_as_pil(pcgts.get_Page().imageFilename)

metadata = pcgts.get_Metadata() # ensured by from_file()
metadata.add_MetadataItem(
MetadataItemType(type_="processingStep",
name=OCRD_TOOL['tools']['ocrd-tesserocr-deskew']['steps'][0],
value='ocrd-tesserocr-deskew',
Labels=[LabelsType(externalRef="parameters",
Label=[LabelType(type_=name,
value=self.parameter[name])
for name in self.parameter.keys()])]))
log.info("Deskewing on '%s' level on page '%s'", oplevel, pcgts.get_pcGtsId())

if oplevel == 'page':
self._process_page(tessapi, pil_image)
elif oplevel == 'region':
regions = pcgts.get_Page().get_TextRegion()
if not regions:
log.warning("Deskewing regions requested but page contains no text regions")
self._process_regions(regions, tessapi, pil_image)

ID = concat_padded(self.output_file_grp, n)
self.workspace.add_file(
ID=ID,
file_grp=self.output_file_grp,
mimetype=MIMETYPE_PAGE,
local_filename='%s/%s' % (self.output_file_grp, ID),
content=to_xml(pcgts),
)

def _process_page(self, tessapi, pil_image):
tessapi.SetImage(pil_image)
orientation, direction, order, deskew_angle = tessapi.AnalyseLayout().Orientation()
log.debug("Deskew angle: {:.4f}".format(deskew_angle))

def _process_regions(self, regions, tessapi, pil_image):
for region in regions:
log.debug("Deskewing region '%s'", region.id)
region_xywh = xywh_from_points(region.get_Coords().points)

# Note: we set the image instead of specifying a rectangle!
pil_region_image = pil_image.crop((region_xywh['x'], region_xywh['y'], region_xywh['x'] + region_xywh['w'], region_xywh['y'] + region_xywh['h']))
tessapi.SetImage(pil_region_image)

orientation, direction, order, deskew_angle = tessapi.AnalyseLayout().Orientation()
log.debug("Deskew angle: {:.4f}".format(deskew_angle))
region.set_orientation(deskew_angle)
22 changes: 22 additions & 0 deletions ocrd_tesserocr/ocrd-tool.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,28 @@
"git_url": "https://github.com/OCR-D/ocrd_tesserocr",
"dockerhub": "ocrd/tesserocr",
"tools": {
"ocrd-tesserocr-deskew": {
"executable": "ocrd-tesserocr-deskew",
"categories": ["Deskewing"],
"description": "Deskew images or image parts",
"input_file_grp": [
"OCR-D-IMG",
"OCR-D-SEG-BLOCK"
],
"output_file_grp": [
"OCR-D-DESKEW-IMG",
"OCR-D-DESKEW-BLOCK"
],
"steps": ["preprocessing/optimization/deskewing"],
"parameters": {
"operation_level": {
"type": "string",
"enum": ["page","region"],
"default": "page",
"description": "Level of operation for deskewing"
}
}
},
"ocrd-tesserocr-recognize": {
"executable": "ocrd-tesserocr-recognize",
"categories": ["Text recognition and optimization"],
Expand Down
10 changes: 6 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# -*- coding: utf-8 -*-
"""
Installs four executables:
Installs five executables:
- ocrd_tesserocr_recognize
- ocrd_tesserocr_segment_region
- ocrd_tesserocr_segment_line
- ocrd_tesserocr_segment_word
- ocrd_tesserocr_deskew
"""
import codecs

Expand All @@ -16,9 +17,9 @@
version='0.2.1',
description='Tesserocr bindings',
long_description=codecs.open('README.rst', encoding='utf-8').read(),
author='Konstantin Baierer',
author_email='[email protected]',
url='https://github.com/kba/ocrd_tesserocr',
author='Konstantin Baierer, Kay-Michael Würzner',
author_email='[email protected], [email protected]',
url='https://github.com/OCR-D/ocrd_tesserocr',
license='Apache License 2.0',
packages=find_packages(exclude=('tests', 'docs')),
install_requires=open('requirements.txt').read().split('\n'),
Expand All @@ -31,6 +32,7 @@
'ocrd-tesserocr-segment-region=ocrd_tesserocr.cli:ocrd_tesserocr_segment_region',
'ocrd-tesserocr-segment-line=ocrd_tesserocr.cli:ocrd_tesserocr_segment_line',
'ocrd-tesserocr-segment-word=ocrd_tesserocr.cli:ocrd_tesserocr_segment_word',
'ocrd-tesserocr-deskew=ocrd_tesserocr.cli:ocrd_tesserocr_deskew',
]
},
)

0 comments on commit 52c2a63

Please sign in to comment.