Skip to content

Commit

Permalink
Badly make ketos transcribe work again
Browse files Browse the repository at this point in the history
Fixes #638
  • Loading branch information
mittagessen committed Sep 16, 2024
1 parent 1db0c04 commit b8c4c2b
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 57 deletions.
25 changes: 5 additions & 20 deletions kraken/ketos/transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
"""
import logging
import os
import dataclasses
import unicodedata
import uuid
from typing import IO, Any, cast

import click
from bidi.algorithm import get_display
Expand Down Expand Up @@ -151,19 +151,15 @@ def extract(ctx, binarize, normalization, normalize_whitespace, reorder,
help='Use given model for prefill mode.')
@click.option('--pad', show_default=True, type=(int, int), default=(0, 0),
help='Left and right padding around lines')
@click.option('-l', '--lines', type=click.Path(exists=True), show_default=True,
help='JSON file containing line coordinates')
@click.option('-o', '--output', type=click.File(mode='wb'), default='transcription.html',
help='Output file', show_default=True)
@click.argument('images', nargs=-1, type=click.File(mode='rb', lazy=True))
def transcription(ctx, text_direction, scale, bw, maxcolseps,
black_colseps, font, font_style, prefill, pad, lines, output,
black_colseps, font, font_style, prefill, pad, output,
images):
"""
Creates transcription environments for ground truth generation.
"""
import json

from PIL import Image

from kraken import binarization, pageseg, rpred, transcribe
Expand All @@ -172,9 +168,6 @@ def transcription(ctx, text_direction, scale, bw, maxcolseps,

ti = transcribe.TranscriptionInterface(font, font_style)

if len(images) > 1 and lines:
raise click.UsageError('--lines option is incompatible with multiple image files')

if prefill:
logger.info('Loading model {}'.format(prefill))
message('Loading ANN', nl=False)
Expand All @@ -193,23 +186,15 @@ def transcription(ctx, text_direction, scale, bw, maxcolseps,
im_bin = binarization.nlbin(im)
im_bin = im_bin.convert('1')
logger.info('Segmenting page')
if not lines:
res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad)
else:
with click.open_file(lines, 'r') as fp:
try:
fp = cast('IO[Any]', fp)
res = json.load(fp)
except ValueError as e:
raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e)))
res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad)
if prefill:
it = rpred.rpred(prefill, im_bin, res.copy())
it = rpred.rpred(prefill, im_bin, res)
preds = []
logger.info('Recognizing')
for pred in it:
logger.debug('{}'.format(pred.prediction))
preds.append(pred)
ti.add_page(im, res, records=preds)
ti.add_page(im, dataclasses.replace(res, lines=preds))
else:
ti.add_page(im, res)
fp.close()
Expand Down
57 changes: 20 additions & 37 deletions kraken/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

from jinja2 import Environment, PackageLoader

from kraken.lib.exceptions import KrakenInputException
from kraken.lib.util import get_im_str

logger = logging.getLogger()
Expand All @@ -44,18 +43,17 @@ def __init__(self, font=None, font_style=None):
self.line_idx = 1
self.seg_idx = 1

def add_page(self, im, segmentation=None, records=None):
def add_page(self, im, segmentation = None):
"""
Adds an image to the transcription interface, optionally filling in
information from a list of ocr_record objects.
Args:
im (PIL.Image): Input image
segmentation (dict): Output of the segment method.
records (list): A list of ocr_record objects.
im: Input image
segmentation: Output of the segment method.
"""
im_str = get_im_str(im)
logger.info('Adding page {} with {} lines'.format(im_str, len(segmentation) if segmentation else len(records)))
logger.info(f'Adding page {im_str} with {len(segmentation.lines)} lines')
page = {}
fd = BytesIO()
im.save(fd, format='png', optimize=True)
Expand All @@ -64,37 +62,22 @@ def add_page(self, im, segmentation=None, records=None):
logger.debug('Base64 encoding image')
page['img'] = 'data:image/png;base64,' + base64.b64encode(fd.getvalue()).decode('ascii')
page['lines'] = []
if records:
logger.debug('Adding records.')
self.text_direction = segmentation['text_direction']
for record, bbox in zip(records, segmentation['boxes']):
page['lines'].append({'index': self.line_idx, 'text': record.prediction,
'left': 100*int(bbox[0]) / im.size[0],
'top': 100*int(bbox[1]) / im.size[1],
'width': 100*(bbox[2] - bbox[0])/im.size[0],
'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1],
'bbox': '{}, {}, {}, {}'.format(int(bbox[0]),
int(bbox[1]),
int(bbox[2]),
int(bbox[3]))})

self.line_idx += 1
elif segmentation:
logger.debug('Adding segmentations.')
self.text_direction = segmentation['text_direction']
for bbox in segmentation['boxes']:
page['lines'].append({'index': self.line_idx,
'left': 100*int(bbox[0]) / im.size[0],
'top': 100*int(bbox[1]) / im.size[1],
'width': 100*(bbox[2] - bbox[0])/im.size[0],
'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1],
'bbox': '{}, {}, {}, {}'.format(int(bbox[0]),
int(bbox[1]),
int(bbox[2]),
int(bbox[3]))})
self.line_idx += 1
else:
raise KrakenInputException('Neither segmentations nor records given')
logger.debug('Adding segmentation.')
self.text_direction = segmentation.text_direction
for line in segmentation.lines:
bbox = line.bbox
page['lines'].append({'index': self.line_idx,
'left': 100*int(bbox[0]) / im.size[0],
'top': 100*int(bbox[1]) / im.size[1],
'width': 100*(bbox[2] - bbox[0])/im.size[0],
'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1],
'bbox': '{}, {}, {}, {}'.format(int(bbox[0]),
int(bbox[1]),
int(bbox[2]),
int(bbox[3]))})
if line.text:
page['lines'][-1]['text'] = line.prediction
self.line_idx += 1
self.pages.append(page)

def write(self, fd):
Expand Down

0 comments on commit b8c4c2b

Please sign in to comment.