diff --git a/kraken/ketos/transcription.py b/kraken/ketos/transcription.py index e8a8fc11d..93f07e134 100644 --- a/kraken/ketos/transcription.py +++ b/kraken/ketos/transcription.py @@ -20,9 +20,9 @@ """ import logging import os +import dataclasses import unicodedata import uuid -from typing import IO, Any, cast import click from bidi.algorithm import get_display @@ -151,19 +151,15 @@ def extract(ctx, binarize, normalization, normalize_whitespace, reorder, help='Use given model for prefill mode.') @click.option('--pad', show_default=True, type=(int, int), default=(0, 0), help='Left and right padding around lines') -@click.option('-l', '--lines', type=click.Path(exists=True), show_default=True, - help='JSON file containing line coordinates') @click.option('-o', '--output', type=click.File(mode='wb'), default='transcription.html', help='Output file', show_default=True) @click.argument('images', nargs=-1, type=click.File(mode='rb', lazy=True)) def transcription(ctx, text_direction, scale, bw, maxcolseps, - black_colseps, font, font_style, prefill, pad, lines, output, + black_colseps, font, font_style, prefill, pad, output, images): """ Creates transcription environments for ground truth generation. """ - import json - from PIL import Image from kraken import binarization, pageseg, rpred, transcribe @@ -172,9 +168,6 @@ def transcription(ctx, text_direction, scale, bw, maxcolseps, ti = transcribe.TranscriptionInterface(font, font_style) - if len(images) > 1 and lines: - raise click.UsageError('--lines option is incompatible with multiple image files') - if prefill: logger.info('Loading model {}'.format(prefill)) message('Loading ANN', nl=False) @@ -193,23 +186,15 @@ def transcription(ctx, text_direction, scale, bw, maxcolseps, im_bin = binarization.nlbin(im) im_bin = im_bin.convert('1') logger.info('Segmenting page') - if not lines: - res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad) - else: - with click.open_file(lines, 'r') as fp: - try: - fp = cast('IO[Any]', fp) - res = json.load(fp) - except ValueError as e: - raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e))) + res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad) if prefill: - it = rpred.rpred(prefill, im_bin, res.copy()) + it = rpred.rpred(prefill, im_bin, res) preds = [] logger.info('Recognizing') for pred in it: logger.debug('{}'.format(pred.prediction)) preds.append(pred) - ti.add_page(im, res, records=preds) + ti.add_page(im, dataclasses.replace(res, lines=preds)) else: ti.add_page(im, res) fp.close() diff --git a/kraken/transcribe.py b/kraken/transcribe.py index 9e355da93..03b0dc21f 100644 --- a/kraken/transcribe.py +++ b/kraken/transcribe.py @@ -23,7 +23,6 @@ from jinja2 import Environment, PackageLoader -from kraken.lib.exceptions import KrakenInputException from kraken.lib.util import get_im_str logger = logging.getLogger() @@ -44,18 +43,17 @@ def __init__(self, font=None, font_style=None): self.line_idx = 1 self.seg_idx = 1 - def add_page(self, im, segmentation=None, records=None): + def add_page(self, im, segmentation = None): """ Adds an image to the transcription interface, optionally filling in information from a list of ocr_record objects. Args: - im (PIL.Image): Input image - segmentation (dict): Output of the segment method. - records (list): A list of ocr_record objects. + im: Input image + segmentation: Output of the segment method. """ im_str = get_im_str(im) - logger.info('Adding page {} with {} lines'.format(im_str, len(segmentation) if segmentation else len(records))) + logger.info(f'Adding page {im_str} with {len(segmentation.lines)} lines') page = {} fd = BytesIO() im.save(fd, format='png', optimize=True) @@ -64,37 +62,22 @@ def add_page(self, im, segmentation=None, records=None): logger.debug('Base64 encoding image') page['img'] = 'data:image/png;base64,' + base64.b64encode(fd.getvalue()).decode('ascii') page['lines'] = [] - if records: - logger.debug('Adding records.') - self.text_direction = segmentation['text_direction'] - for record, bbox in zip(records, segmentation['boxes']): - page['lines'].append({'index': self.line_idx, 'text': record.prediction, - 'left': 100*int(bbox[0]) / im.size[0], - 'top': 100*int(bbox[1]) / im.size[1], - 'width': 100*(bbox[2] - bbox[0])/im.size[0], - 'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1], - 'bbox': '{}, {}, {}, {}'.format(int(bbox[0]), - int(bbox[1]), - int(bbox[2]), - int(bbox[3]))}) - - self.line_idx += 1 - elif segmentation: - logger.debug('Adding segmentations.') - self.text_direction = segmentation['text_direction'] - for bbox in segmentation['boxes']: - page['lines'].append({'index': self.line_idx, - 'left': 100*int(bbox[0]) / im.size[0], - 'top': 100*int(bbox[1]) / im.size[1], - 'width': 100*(bbox[2] - bbox[0])/im.size[0], - 'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1], - 'bbox': '{}, {}, {}, {}'.format(int(bbox[0]), - int(bbox[1]), - int(bbox[2]), - int(bbox[3]))}) - self.line_idx += 1 - else: - raise KrakenInputException('Neither segmentations nor records given') + logger.debug('Adding segmentation.') + self.text_direction = segmentation.text_direction + for line in segmentation.lines: + bbox = line.bbox + page['lines'].append({'index': self.line_idx, + 'left': 100*int(bbox[0]) / im.size[0], + 'top': 100*int(bbox[1]) / im.size[1], + 'width': 100*(bbox[2] - bbox[0])/im.size[0], + 'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1], + 'bbox': '{}, {}, {}, {}'.format(int(bbox[0]), + int(bbox[1]), + int(bbox[2]), + int(bbox[3]))}) + if line.text: + page['lines'][-1]['text'] = line.prediction + self.line_idx += 1 self.pages.append(page) def write(self, fd):