Badly make ketos transcribe work again

Fixes #638
mittagessen · Sep 16, 2024 · b8c4c2b · b8c4c2b
1 parent 1db0c04
commit b8c4c2b
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 57 deletions.
diff --git a/kraken/ketos/transcription.py b/kraken/ketos/transcription.py
@@ -20,9 +20,9 @@
 """
 import logging
 import os
+import dataclasses
 import unicodedata
 import uuid
-from typing import IO, Any, cast
 
 import click
 from bidi.algorithm import get_display
@@ -151,19 +151,15 @@ def extract(ctx, binarize, normalization, normalize_whitespace, reorder,
               help='Use given model for prefill mode.')
 @click.option('--pad', show_default=True, type=(int, int), default=(0, 0),
               help='Left and right padding around lines')
-@click.option('-l', '--lines', type=click.Path(exists=True), show_default=True,
-              help='JSON file containing line coordinates')
 @click.option('-o', '--output', type=click.File(mode='wb'), default='transcription.html',
               help='Output file', show_default=True)
 @click.argument('images', nargs=-1, type=click.File(mode='rb', lazy=True))
 def transcription(ctx, text_direction, scale, bw, maxcolseps,
-                  black_colseps, font, font_style, prefill, pad, lines, output,
+                  black_colseps, font, font_style, prefill, pad, output,
                   images):
     """
     Creates transcription environments for ground truth generation.
     """
-    import json
-
     from PIL import Image
 
     from kraken import binarization, pageseg, rpred, transcribe
@@ -172,9 +168,6 @@ def transcription(ctx, text_direction, scale, bw, maxcolseps,
 
     ti = transcribe.TranscriptionInterface(font, font_style)
 
-    if len(images) > 1 and lines:
-        raise click.UsageError('--lines option is incompatible with multiple image files')
-
     if prefill:
         logger.info('Loading model {}'.format(prefill))
         message('Loading ANN', nl=False)
@@ -193,23 +186,15 @@ def transcription(ctx, text_direction, scale, bw, maxcolseps,
             im_bin = binarization.nlbin(im)
             im_bin = im_bin.convert('1')
             logger.info('Segmenting page')
-            if not lines:
-                res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad)
-            else:
-                with click.open_file(lines, 'r') as fp:
-                    try:
-                        fp = cast('IO[Any]', fp)
-                        res = json.load(fp)
-                    except ValueError as e:
-                        raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e)))
+            res = pageseg.segment(im_bin, text_direction, scale, maxcolseps, black_colseps, pad=pad)
             if prefill:
-                it = rpred.rpred(prefill, im_bin, res.copy())
+                it = rpred.rpred(prefill, im_bin, res)
                 preds = []
                 logger.info('Recognizing')
                 for pred in it:
                     logger.debug('{}'.format(pred.prediction))
                     preds.append(pred)
-                ti.add_page(im, res, records=preds)
+                ti.add_page(im, dataclasses.replace(res, lines=preds))
             else:
                 ti.add_page(im, res)
             fp.close()

diff --git a/kraken/transcribe.py b/kraken/transcribe.py
@@ -23,7 +23,6 @@
 
 from jinja2 import Environment, PackageLoader
 
-from kraken.lib.exceptions import KrakenInputException
 from kraken.lib.util import get_im_str
 
 logger = logging.getLogger()
@@ -44,18 +43,17 @@ def __init__(self, font=None, font_style=None):
         self.line_idx = 1
         self.seg_idx = 1
 
-    def add_page(self, im, segmentation=None, records=None):
+    def add_page(self, im, segmentation = None):
         """
         Adds an image to the transcription interface, optionally filling in
         information from a list of ocr_record objects.
 
         Args:
-            im (PIL.Image): Input image
-            segmentation (dict): Output of the segment method.
-            records (list): A list of ocr_record objects.
+            im: Input image
+            segmentation: Output of the segment method.
         """
         im_str = get_im_str(im)
-        logger.info('Adding page {} with {} lines'.format(im_str, len(segmentation) if segmentation else len(records)))
+        logger.info(f'Adding page {im_str} with {len(segmentation.lines)} lines')
         page = {}
         fd = BytesIO()
         im.save(fd, format='png', optimize=True)
@@ -64,37 +62,22 @@ def add_page(self, im, segmentation=None, records=None):
         logger.debug('Base64 encoding image')
         page['img'] = 'data:image/png;base64,' + base64.b64encode(fd.getvalue()).decode('ascii')
         page['lines'] = []
-        if records:
-            logger.debug('Adding records.')
-            self.text_direction = segmentation['text_direction']
-            for record, bbox in zip(records, segmentation['boxes']):
-                page['lines'].append({'index': self.line_idx, 'text': record.prediction,
-                                      'left': 100*int(bbox[0]) / im.size[0],
-                                      'top': 100*int(bbox[1]) / im.size[1],
-                                      'width': 100*(bbox[2] - bbox[0])/im.size[0],
-                                      'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1],
-                                      'bbox': '{}, {}, {}, {}'.format(int(bbox[0]),
-                                                                      int(bbox[1]),
-                                                                      int(bbox[2]),
-                                                                      int(bbox[3]))})
-
-                self.line_idx += 1
-        elif segmentation:
-            logger.debug('Adding segmentations.')
-            self.text_direction = segmentation['text_direction']
-            for bbox in segmentation['boxes']:
-                page['lines'].append({'index': self.line_idx,
-                                      'left': 100*int(bbox[0]) / im.size[0],
-                                      'top': 100*int(bbox[1]) / im.size[1],
-                                      'width': 100*(bbox[2] - bbox[0])/im.size[0],
-                                      'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1],
-                                      'bbox': '{}, {}, {}, {}'.format(int(bbox[0]),
-                                                                      int(bbox[1]),
-                                                                      int(bbox[2]),
-                                                                      int(bbox[3]))})
-                self.line_idx += 1
-        else:
-            raise KrakenInputException('Neither segmentations nor records given')
+        logger.debug('Adding segmentation.')
+        self.text_direction = segmentation.text_direction
+        for line in segmentation.lines:
+            bbox = line.bbox
+            page['lines'].append({'index': self.line_idx,
+                                  'left': 100*int(bbox[0]) / im.size[0],
+                                  'top': 100*int(bbox[1]) / im.size[1],
+                                  'width': 100*(bbox[2] - bbox[0])/im.size[0],
+                                  'height': 100*(int(bbox[3]) - int(bbox[1]))/im.size[1],
+                                  'bbox': '{}, {}, {}, {}'.format(int(bbox[0]),
+                                                                  int(bbox[1]),
+                                                                  int(bbox[2]),
+                                                                  int(bbox[3]))})
+            if line.text:
+                page['lines'][-1]['text'] = line.prediction
+            self.line_idx += 1
         self.pages.append(page)
 
     def write(self, fd):