From 56d84b074220da22169b2e30697870c78c5b6524 Mon Sep 17 00:00:00 2001 From: Benjamin Kiessling Date: Mon, 11 Dec 2023 12:41:39 +0100 Subject: [PATCH] Some "light" linting. Fixes #557. --- kraken/align.py | 9 ++--- kraken/blla.py | 4 +-- kraken/containers.py | 2 +- kraken/ketos/recognition.py | 7 ++-- kraken/ketos/ro.py | 11 +++---- kraken/ketos/segmentation.py | 4 ++- kraken/lib/codec.py | 4 +-- kraken/lib/dataset/__init__.py | 8 ++--- kraken/lib/dataset/recognition.py | 23 +++++++++---- kraken/lib/dataset/ro.py | 23 +++++-------- kraken/lib/dataset/segmentation.py | 7 ++-- kraken/lib/dataset/utils.py | 1 - kraken/lib/pretrain/model.py | 8 ++--- kraken/lib/ro/model.py | 12 +++---- kraken/lib/segmentation.py | 2 +- kraken/lib/train.py | 14 ++++---- kraken/lib/util.py | 53 ++++++++++++++++++++++++++++-- kraken/pageseg.py | 2 +- kraken/repo.py | 1 - kraken/rpred.py | 3 -- kraken/serialization.py | 9 ++--- 21 files changed, 121 insertions(+), 86 deletions(-) diff --git a/kraken/align.py b/kraken/align.py index ff76a1d43..db2948ce0 100644 --- a/kraken/align.py +++ b/kraken/align.py @@ -24,21 +24,16 @@ import torch import logging import dataclasses -import numpy as np from PIL import Image from bidi.algorithm import get_display from dataclasses import dataclass -from typing import List, Dict, Any, Optional, Literal +from typing import Optional, Literal from kraken import rpred from kraken.containers import Segmentation, BaselineOCRRecord -from kraken.lib.codec import PytorchCodec -from kraken.lib.xml import XMLPage from kraken.lib.models import TorchSeqRecognizer -from kraken.lib.exceptions import KrakenInputException, KrakenEncodeException -from kraken.lib.segmentation import compute_polygon_section logger = logging.getLogger('kraken') @@ -95,6 +90,8 @@ def forced_align(doc: Segmentation, model: TorchSeqRecognizer, base_dir: Optiona at: https://github.com/pytorch/audio/blob/main/examples/tutorials/forced_alignment_tutorial.py """ + + @dataclass class Point: token_index: int diff --git a/kraken/blla.py b/kraken/blla.py index 050a2abb7..ac05ea5fa 100644 --- a/kraken/blla.py +++ b/kraken/blla.py @@ -30,7 +30,7 @@ import torch.nn.functional as F import torchvision.transforms as tf -from typing import Optional, Dict, Callable, Union, List, Any, Tuple, Literal +from typing import Optional, Dict, Callable, Union, List, Any, Literal from scipy.ndimage import gaussian_filter from skimage.filters import sobel @@ -415,4 +415,4 @@ def segment(im: PIL.Image.Image, lines=blls, regions=regions, script_detection=script_detection, - line_orders=[order]) + line_orders=[order] if order else []) diff --git a/kraken/containers.py b/kraken/containers.py index 587ff1124..48b4719fe 100644 --- a/kraken/containers.py +++ b/kraken/containers.py @@ -161,7 +161,7 @@ class Segmentation: script_detection: bool lines: List[Union[BaselineLine, BBoxLine]] regions: Dict[str, List[Region]] - line_orders: Optional[List[List[int]]] = None + line_orders: List[List[int]] def __post_init__(self): if not self.regions: diff --git a/kraken/ketos/recognition.py b/kraken/ketos/recognition.py index 63dcc8f70..19ade5c0e 100644 --- a/kraken/ketos/recognition.py +++ b/kraken/ketos/recognition.py @@ -396,7 +396,7 @@ def test(ctx, batch_size, model, evaluation_files, device, pad, workers, from torch.utils.data import DataLoader from kraken.serialization import render_report - from kraken.lib import models + from kraken.lib import models, util from kraken.lib.xml import XMLPage from kraken.lib.dataset import (global_align, compute_confusions, PolygonGTDataset, GroundTruthDataset, @@ -419,7 +419,6 @@ def test(ctx, batch_size, model, evaluation_files, device, pad, workers, test_set = list(test_set) - if evaluation_files: test_set.extend(evaluation_files) @@ -445,7 +444,7 @@ def test(ctx, batch_size, model, evaluation_files, device, pad, workers, force_binarization = False if repolygonize: logger.warning('Repolygonization enabled in `path` mode. Will be ignored.') - test_set = [{'image': img} for img in test_set] + test_set = [{'line': util.parse_gt_path(img)} for img in test_set] valid_norm = True if len(test_set) == 0: @@ -480,7 +479,7 @@ def test(ctx, batch_size, model, evaluation_files, device, pad, workers, ds_loader = DataLoader(ds, batch_size=batch_size, num_workers=workers, - pin_memory=True, + pin_memory=pin_ds_mem, collate_fn=collate_sequences) with KrakenProgressBar() as progress: diff --git a/kraken/ketos/ro.py b/kraken/ketos/ro.py index 4075b638a..e2c97f19f 100644 --- a/kraken/ketos/ro.py +++ b/kraken/ketos/ro.py @@ -23,9 +23,7 @@ import logging from PIL import Image -from typing import Dict -from kraken.lib.exceptions import KrakenInputException from kraken.lib.default_specs import READING_ORDER_HYPER_PARAMS from kraken.ketos.util import _validate_manifests, _expand_gt, message, to_ptl_device @@ -36,6 +34,7 @@ # raise default max image size to 20k * 20k pixels Image.MAX_IMAGE_PIXELS = 20000 ** 2 + @click.command('rotrain') @click.pass_context @click.option('-B', '--batch-size', show_default=True, type=click.INT, @@ -156,14 +155,13 @@ def rotrain(ctx, batch_size, output, load, freq, quit, epochs, min_epochs, lag, from kraken.lib.ro import ROModel from kraken.lib.train import KrakenTrainer - from kraken.lib.progress import KrakenProgressBar if not (0 <= freq <= 1) and freq % 1.0 != 0: raise click.BadOptionUsage('freq', 'freq needs to be either in the interval [0,1.0] or a positive integer.') if pl_logger == 'tensorboard': try: - import tensorboard + import tensorboard # NOQA except ImportError: raise click.BadOptionUsage('logger', 'tensorboard logger needs the `tensorboard` package installed.') @@ -191,7 +189,9 @@ def rotrain(ctx, batch_size, output, load, freq, quit, epochs, min_epochs, lag, 'step_size': step_size, 'rop_patience': sched_patience, 'cos_t_max': cos_max, - 'pl_logger': pl_logger,}) + 'pl_logger': pl_logger, + } + ) # disable automatic partition when given evaluation set explicitly if evaluation_files: @@ -281,7 +281,6 @@ def roadd(ctx, output, ro_model, seg_model): """ from kraken.lib import vgsl from kraken.lib.ro import ROModel - from kraken.lib.train import KrakenTrainer message(f'Adding {ro_model} reading order model to {seg_model}.') ro_net = ROModel.load_from_checkpoint(ro_model) diff --git a/kraken/ketos/segmentation.py b/kraken/ketos/segmentation.py index 711aab1da..cf3d1047b 100644 --- a/kraken/ketos/segmentation.py +++ b/kraken/ketos/segmentation.py @@ -24,6 +24,8 @@ from PIL import Image +from typing import Dict + from kraken.lib.exceptions import KrakenInputException from kraken.lib.default_specs import SEGMENTATION_HYPER_PARAMS, SEGMENTATION_SPEC @@ -232,7 +234,6 @@ def segtrain(ctx, output, spec, line_width, pad, load, freq, quit, epochs, from threadpoolctl import threadpool_limits from kraken.lib.train import SegmentationModel, KrakenTrainer - from kraken.lib.progress import KrakenProgressBar if resize != 'fail' and not load: raise click.BadOptionUsage('resize', 'resize option requires loading an existing model') @@ -431,6 +432,7 @@ def segtest(ctx, model, evaluation_files, device, workers, threads, threshold, import torch import torch.nn.functional as F + from kraken.lib.progress import KrakenProgressBar from kraken.lib.train import BaselineSet, ImageInputTransforms from kraken.lib.vgsl import TorchVGSLModel diff --git a/kraken/lib/codec.py b/kraken/lib/codec.py index 23ec3233d..481951f4e 100644 --- a/kraken/lib/codec.py +++ b/kraken/lib/codec.py @@ -128,12 +128,12 @@ def encode(self, s: str) -> IntTensor: idx += len(code) encodable_suffix = True break - + if not encodable_suffix and s[idx] in self.c2l: labels.extend(self.c2l[s[idx]]) idx += 1 encodable_suffix = True - + if not encodable_suffix: if self.strict: raise KrakenEncodeException(f'Non-encodable sequence {s[idx:idx+5]}... encountered.') diff --git a/kraken/lib/dataset/__init__.py b/kraken/lib/dataset/__init__.py index c6710d24c..95cf91a84 100644 --- a/kraken/lib/dataset/__init__.py +++ b/kraken/lib/dataset/__init__.py @@ -15,7 +15,7 @@ """ Top-level module containing datasets for recognition and segmentation training. """ -from .recognition import ArrowIPCRecognitionDataset, PolygonGTDataset, GroundTruthDataset # NOQA -from .segmentation import BaselineSet # NOQA -from .ro import PairWiseROSet, PageWiseROSet #NOQA -from .utils import ImageInputTransforms, collate_sequences, global_align, compute_confusions # NOQA +from .recognition import ArrowIPCRecognitionDataset, PolygonGTDataset, GroundTruthDataset # NOQA +from .segmentation import BaselineSet # NOQA +from .ro import PairWiseROSet, PageWiseROSet # NOQA +from .utils import ImageInputTransforms, collate_sequences, global_align, compute_confusions # NOQA diff --git a/kraken/lib/dataset/recognition.py b/kraken/lib/dataset/recognition.py index 1348265a7..e7d6bffac 100644 --- a/kraken/lib/dataset/recognition.py +++ b/kraken/lib/dataset/recognition.py @@ -19,6 +19,7 @@ import json import torch import traceback +import dataclasses import numpy as np import pyarrow as pa @@ -28,7 +29,7 @@ from torchvision import transforms from collections import Counter from torch.utils.data import Dataset -from typing import Dict, List, Tuple, Callable, Optional, Any, Union, Literal +from typing import List, Tuple, Callable, Optional, Any, Union, Literal from kraken.containers import BaselineLine, BBoxLine, Segmentation from kraken.lib.util import is_bitonal @@ -76,6 +77,7 @@ def __init__(self): def __call__(self, image): return self._transforms(image=image) + class ArrowIPCRecognitionDataset(Dataset): """ Dataset for training a recognition model from a precompiled dataset in @@ -181,7 +183,7 @@ def add(self, file: Union[str, PathLike]) -> None: mask = np.ones(len(ds_table), dtype=bool) for index in range(len(ds_table)): try: - text = self._apply_text_transform(ds_table.column('lines')[index].as_py(),) + self._apply_text_transform(ds_table.column('lines')[index].as_py(),) except KrakenInputException: mask[index] = False continue @@ -335,7 +337,7 @@ def add(self, self.add_line(line) if page: self.add_page(page) - if not (line and page): + if not (line or page): raise ValueError('Neither line nor page data provided in dataset builder') def add_page(self, page: Segmentation): @@ -379,7 +381,7 @@ def add_line(self, line: BaselineLine): if not line.boundary: raise ValueError('No boundary given for line') - self._images.append((line.image, line.baseline, line.boundary)) + self._images.append((line.imagename, line.baseline, line.boundary)) self._gt.append(text) self.alphabet.update(text) @@ -412,8 +414,17 @@ def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]: im = item[0][0] if not isinstance(im, Image.Image): im = Image.open(im) - im, _ = next(extract_polygons(im, {'type': 'baselines', - 'lines': [{'baseline': item[0][1], 'boundary': item[0][2]}]})) + im, _ = next(extract_polygons(im, + Segmentation(type='baselines', + imagename=item[0][0], + text_direction='horizontal-lr', + lines=[BaselineLine('id_0', + baseline=item[0][1], + boundary=item[0][2])], + script_detection=True, + regions={}, + line_orders=[]) + )) im = self.transforms(im) if im.shape[0] == 3: im_mode = 'RGB' diff --git a/kraken/lib/dataset/ro.py b/kraken/lib/dataset/ro.py index cd9223698..46325d35b 100644 --- a/kraken/lib/dataset/ro.py +++ b/kraken/lib/dataset/ro.py @@ -15,22 +15,13 @@ """ Utility functions for data loading and training of VGSL networks. """ -import json import torch -import traceback import numpy as np -import torch.nn.functional as F -import shapely.geometry as geom from math import factorial -from os import path, PathLike -from PIL import Image -from shapely.ops import split -from itertools import groupby -from torchvision import transforms -from collections import defaultdict +from os import PathLike from torch.utils.data import Dataset -from typing import Dict, List, Tuple, Sequence, Callable, Any, Union, Literal, Optional +from typing import Dict, Sequence, Union, Literal, Optional from kraken.lib.xml import XMLPage @@ -112,8 +103,9 @@ def __init__(self, files: Sequence[Union[PathLike, str]] = None, torch.tensor(line_center, dtype=torch.float), # line center torch.tensor(line_coords[0, :], dtype=torch.float), # start_point coord torch.tensor(line_coords[-1, :], dtype=torch.float), # end point coord) - )) - } + ) + ) + } sorted_lines.append(line_data) if len(sorted_lines) > 1: self.data.append(sorted_lines) @@ -212,8 +204,9 @@ def __init__(self, files: Sequence[Union[PathLike, str]] = None, torch.tensor(line_center, dtype=torch.float), # line center torch.tensor(line_coords[0, :], dtype=torch.float), # start_point coord torch.tensor(line_coords[-1, :], dtype=torch.float), # end point coord) - )) - } + ) + ) + } sorted_lines.append(line_data) if len(sorted_lines) > 1: self.data.append(sorted_lines) diff --git a/kraken/lib/dataset/segmentation.py b/kraken/lib/dataset/segmentation.py index 0d248800e..01b7abb90 100644 --- a/kraken/lib/dataset/segmentation.py +++ b/kraken/lib/dataset/segmentation.py @@ -15,28 +15,25 @@ """ Utility functions for data loading and training of VGSL networks. """ -import json import torch import traceback import numpy as np import torch.nn.functional as F import shapely.geometry as geom -from os import path, PathLike from PIL import Image from shapely.ops import split from itertools import groupby from torchvision import transforms from collections import defaultdict from torch.utils.data import Dataset -from typing import Dict, List, Tuple, Sequence, Callable, Any, Union, Literal, Optional +from typing import Dict, Tuple, Sequence, Callable, Any, Union, Literal, Optional from skimage.draw import polygon from kraken.containers import Segmentation from kraken.lib.xml import XMLPage -from kraken.lib.exceptions import KrakenInputException __all__ = ['BaselineSet'] @@ -160,7 +157,7 @@ def add(self, doc: Union[Segmentation, XMLPage]): self.class_mapping['regions'][reg_type] = self.num_classes - 1 self.targets.append({'baselines': baselines_, 'regions': regions_}) - self.imgs.append(image) + self.imgs.append(doc.imagename) def __getitem__(self, idx): im = self.imgs[idx] diff --git a/kraken/lib/dataset/utils.py b/kraken/lib/dataset/utils.py index 728212ec5..64565bb94 100644 --- a/kraken/lib/dataset/utils.py +++ b/kraken/lib/dataset/utils.py @@ -26,7 +26,6 @@ from collections import Counter from typing import Dict, List, Tuple, Sequence, Any, Union -from kraken.lib.models import TorchSeqRecognizer from kraken.lib.exceptions import KrakenInputException from kraken.lib.lineest import CenterNormalizer diff --git a/kraken/lib/pretrain/model.py b/kraken/lib/pretrain/model.py index fdd956ad1..5e05ffd84 100644 --- a/kraken/lib/pretrain/model.py +++ b/kraken/lib/pretrain/model.py @@ -38,15 +38,14 @@ from os import PathLike from itertools import chain -from functools import partial from torch.optim import lr_scheduler -from torch.multiprocessing import Pool from typing import Dict, Optional, Sequence, Union, Any from pytorch_lightning.callbacks import EarlyStopping from pytorch_lightning.utilities.memory import is_oom_error, garbage_collection_cuda from kraken.lib import vgsl, default_specs, layers from kraken.lib.xml import XMLPage +from kraken.lib.util import parse_gt_path from kraken.lib.codec import PytorchCodec from kraken.lib.dataset import (ArrowIPCRecognitionDataset, GroundTruthDataset, PolygonGTDataset, @@ -138,10 +137,10 @@ def __init__(self, logger.warning('Internal binary dataset splits are enabled but using non-binary dataset files. Will be ignored.') binary_dataset_split = False logger.info(f'Got {len(training_data)} line strip images for training data') - training_data = [{'image': im} for im in training_data] + training_data = [{'line': parse_gt_path(im)} for im in training_data] if evaluation_data: logger.info(f'Got {len(evaluation_data)} line strip images for validation data') - evaluation_data = [{'image': im} for im in evaluation_data] + evaluation_data = [{'line': parse_gt_path(im)} for im in evaluation_data] valid_norm = True # format_type is None. Determine training type from length of training data entry elif not format_type: @@ -375,7 +374,6 @@ def validation_step(self, batch, batch_idx): with torch.no_grad(): if logits.numel() == 0: corr = 0 - count = 0 else: _max = logits.argmax(-1) == 0 _min = logits.argmin(-1) == 0 diff --git a/kraken/lib/ro/model.py b/kraken/lib/ro/model.py index 4fa957144..6724500d2 100644 --- a/kraken/lib/ro/model.py +++ b/kraken/lib/ro/model.py @@ -17,8 +17,6 @@ Adapted from: """ -import re -import math import torch import logging import numpy as np @@ -33,22 +31,23 @@ from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor -from kraken.lib import vgsl, default_specs, layers +from kraken.lib import default_specs from kraken.lib.dataset import PairWiseROSet, PageWiseROSet from kraken.lib.train import _configure_optimizer_and_lr_scheduler from kraken.lib.segmentation import _greedy_order_decoder from kraken.lib.ro.layers import MLP -from torch.utils.data import DataLoader, random_split, Subset +from torch.utils.data import DataLoader, Subset logger = logging.getLogger(__name__) + @dataclass class DummyVGSLModel: hyper_params: Dict[str, int] = field(default_factory=dict) user_metadata: Dict[str, List] = field(default_factory=dict) - one_channel_mode: Literal['1', 'L'] = '1' + one_channel_mode: Literal['1', 'L'] = '1' ptl_module: Module = None model_type: str = 'unknown' @@ -128,7 +127,7 @@ def __init__(self, self.best_epoch = -1 self.best_metric = torch.inf - logger.info(f'Creating new RO model') + logger.info('Creating new RO model') self.ro_net = MLP(train_set.get_feature_dim(), train_set.get_feature_dim() * 2) if 'file_system' in torch.multiprocessing.get_all_sharing_strategies(): @@ -245,4 +244,3 @@ def lr_scheduler_step(self, scheduler, metric): scheduler.step() else: scheduler.step(metric) - diff --git a/kraken/lib/segmentation.py b/kraken/lib/segmentation.py index 58984679b..5fe932b11 100644 --- a/kraken/lib/segmentation.py +++ b/kraken/lib/segmentation.py @@ -40,7 +40,7 @@ from skimage.morphology import skeletonize from skimage.transform import PiecewiseAffineTransform, SimilarityTransform, AffineTransform, warp -from typing import List, Tuple, Union, Dict, Any, Sequence, Optional, Literal +from typing import List, Tuple, Union, Dict, Sequence, Optional, Literal from kraken.lib import default_specs from kraken.lib.exceptions import KrakenInputException diff --git a/kraken/lib/train.py b/kraken/lib/train.py index 80395ec47..a3136c928 100644 --- a/kraken/lib/train.py +++ b/kraken/lib/train.py @@ -24,16 +24,15 @@ import pytorch_lightning as pl from os import PathLike -from functools import partial -from torch.multiprocessing import Pool from torchmetrics.classification import MultilabelAccuracy, MultilabelJaccardIndex from torchmetrics.text import CharErrorRate, WordErrorRate from torch.optim import lr_scheduler from typing import Callable, Dict, Optional, Sequence, Union, Any, Literal from pytorch_lightning.callbacks import Callback, EarlyStopping, BaseFinetuning, LearningRateMonitor +from kraken.containers import Segmentation, XMLPage from kraken.lib import models, vgsl, default_specs, progress -from kraken.lib.util import make_printable +from kraken.lib.util import make_printable, parse_gt_path from kraken.lib.codec import PytorchCodec from kraken.lib.dataset import (ArrowIPCRecognitionDataset, BaselineSet, GroundTruthDataset, PolygonGTDataset, @@ -56,6 +55,7 @@ def _star_fun(fun, kwargs): logger.warning(str(e)) return None + def _validation_worker_init_fn(worker_id): """ Fix random seeds so that augmentation always produces the same results when validating. Temporarily increase the logging level @@ -67,6 +67,7 @@ def _validation_worker_init_fn(worker_id): seed_everything(42) logging.getLogger("lightning_fabric.utilities.seed").setLevel(level) + class KrakenTrainer(pl.Trainer): def __init__(self, enable_progress_bar: bool = True, @@ -301,10 +302,10 @@ def __init__(self, logger.warning('Internal binary dataset splits are enabled but using non-binary dataset files. Will be ignored.') binary_dataset_split = False logger.info(f'Got {len(training_data)} line strip images for training data') - training_data = [{'image': im} for im in training_data] + training_data = [{'line': parse_gt_path(im)} for im in training_data] if evaluation_data: logger.info(f'Got {len(evaluation_data)} line strip images for validation data') - evaluation_data = [{'image': im} for im in evaluation_data] + evaluation_data = [{'line': parse_gt_path(im)} for im in evaluation_data] valid_norm = True # format_type is None. Determine training type from container class types elif not format_type: @@ -518,7 +519,7 @@ def setup(self, stage: Optional[str] = None): # Log a few sample images before the datasets are encoded. # This is only possible for Arrow datasets, because the # other dataset types can only be accessed after encoding - if self.logger and isinstance(self.train_set.dataset, ArrowIPCRecognitionDataset) : + if self.logger and isinstance(self.train_set.dataset, ArrowIPCRecognitionDataset): for i in range(min(len(self.train_set), 16)): idx = np.random.randint(len(self.train_set)) sample = self.train_set[idx] @@ -653,7 +654,6 @@ def configure_optimizers(self): len_train_set=len(self.train_set), loss_tracking_mode='max') - def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_closure): # update params optimizer.step(closure=optimizer_closure) diff --git a/kraken/lib/util.py b/kraken/lib/util.py index 359271e41..75d5fbe26 100644 --- a/kraken/lib/util.py +++ b/kraken/lib/util.py @@ -2,15 +2,20 @@ Ocropus's magic PIL-numpy array conversion routines. They express slightly different behavior from PIL.Image.toarray(). """ +import uuid import torch import unicodedata import numpy as np from PIL import Image +from os import PathLike -from typing import Union +from typing import Union, Callable -__all__ = ['pil2array', 'array2pil', 'is_bitonal', 'make_printable', 'get_im_str'] +from kraken.containers import BBoxLine +from kraken.lib import functional_im_transforms as F_t + +__all__ = ['pil2array', 'array2pil', 'is_bitonal', 'make_printable', 'get_im_str', 'parse_path'] def pil2array(im: Image.Image, alpha: int = 0) -> np.ndarray: @@ -93,3 +98,47 @@ def make_printable(char: str) -> str: return '0x{:x}'.format(ord(char)) else: return unicodedata.name(char) + + +def parse_gt_path(path: Union[str, PathLike], + suffix: str = '.gt.txt', + split: Callable[[Union[PathLike, str]], str] = F_t.default_split, + skip_empty_lines: bool = True, + base_dir: Optional[Literal['L', 'R']] = None, + text_direction: Literal['horizontal-lr', 'horizontal-rl', 'vertical-lr', 'vertical-rl'] = 'horizontal-lr') -> BBoxLine: + """ + Returns a BBoxLine from a image/text file pair. + + Args: + path: Path to image file + suffix: Suffix of the corresponding ground truth text file to image + file in `path`. + split: Suffix stripping function. + skip_empty_lines: Whether to raise an exception if ground truth is + empty or text file is missing. + base_dir: Unicode BiDi algorithm base direction + text_direction: Orientation of the line box. + """ + try: + with Image.open(path) as im: + w, h = im.size + except Exception e: + raise KrakenInputException(e) + + gt = '' + try: + with open(F_t.suffix_split(path, split=split, suffix=suffix), 'r', encoding='utf-8') as fp: + gt = fp.read().strip('\n\r') + except FileNotFoundError: + if not skip_empty_lines: + raise KrakenInputException(f'No text file found for ground truth line {path}.') + + if not gt and skip_empty_lines: + raise KrakenInputException(f'No text for ground truth line {path}.') + + return BBoxLine(id=uuid.uuid4(), + bbox=((0,0), (w,0), (w,h), (0,h)), + text=gt, + base_dir=base_dir, + imagename=path, + text_direction=text_direction) diff --git a/kraken/pageseg.py b/kraken/pageseg.py index b98b175b7..6cc94ea31 100644 --- a/kraken/pageseg.py +++ b/kraken/pageseg.py @@ -24,7 +24,7 @@ import logging import numpy as np -from typing import Tuple, List, Callable, Optional, Dict, Any, Union +from typing import Tuple, List, Callable, Optional, Union from scipy.ndimage.filters import (gaussian_filter, uniform_filter, maximum_filter) diff --git a/kraken/repo.py b/kraken/repo.py index 62d02047f..6f484db4a 100644 --- a/kraken/repo.py +++ b/kraken/repo.py @@ -17,7 +17,6 @@ """ import os import json -import urllib import logging import requests diff --git a/kraken/rpred.py b/kraken/rpred.py index 823cad5ea..da7e6afcf 100644 --- a/kraken/rpred.py +++ b/kraken/rpred.py @@ -20,7 +20,6 @@ """ import logging import dataclasses -import numpy as np from PIL import Image from functools import partial @@ -34,7 +33,6 @@ from kraken.lib.exceptions import KrakenInputException from kraken.lib.dataset import ImageInputTransforms -import copy __all__ = ['mm_rpred', 'rpred'] @@ -151,7 +149,6 @@ def _recognize_box_line(self, line): flat_box = [point for box in line.bbox for point in box] xmin, xmax = min(flat_box[::2]), max(flat_box[::2]) ymin, ymax = min(flat_box[1::2]), max(flat_box[1::2]) - line_bbox = ((xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin)) prediction = '' cuts = [] confidences = [] diff --git a/kraken/serialization.py b/kraken/serialization.py index ae6679762..930f3eeb3 100644 --- a/kraken/serialization.py +++ b/kraken/serialization.py @@ -17,7 +17,6 @@ import regex import logging import datetime -import shapely.geometry as geom from os import PathLike from pkg_resources import get_distribution @@ -25,13 +24,12 @@ from kraken.containers import Segmentation, ProcessingStep from kraken.lib.util import make_printable -from kraken.lib.segmentation import is_in_region -from typing import Union, List, Tuple, Iterable, Optional, Sequence, Dict, Any, Literal +from typing import List, Tuple, Iterable, Optional, Sequence, Literal logger = logging.getLogger(__name__) -__all__ = ['serialize', 'serialize_segmentation', 'render_report'] +__all__ = ['serialize', 'render_report'] def _rescale(val: Sequence[float], low: float, high: float) -> List[float]: @@ -127,14 +125,13 @@ def serialize(results: Segmentation, if line.tags is not None: types.extend((k, v) for k, v in line.tags.items()) page['line_types'] = list(set(types)) - page['region_types'] =[list(results.regions.keys())] + page['region_types'] = [list(results.regions.keys())] # map reading orders indices to line IDs ros = [] for ro in results.line_orders: ros.append([results.lines[idx].id for idx in ro]) page['line_orders'] = ros - # build region ID to region dict reg_dict = {} for key, regs in results.regions.items():