diff --git a/kraken/blla.py b/kraken/blla.py index ac05ea5fa..00370bc25 100644 --- a/kraken/blla.py +++ b/kraken/blla.py @@ -390,7 +390,6 @@ def segment(im: PIL.Image.Image, # create objects and assign IDs blls = [] - reg_idx = 0 _shp_regs = {} for reg_type, rgs in regions.items(): for reg in rgs: diff --git a/kraken/containers.py b/kraken/containers.py index 48b4719fe..54885b81d 100644 --- a/kraken/containers.py +++ b/kraken/containers.py @@ -1,5 +1,24 @@ - -import PIL.Image +# +# Copyright 2023 Benjamin Kiessling +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +""" +kraken.containers +~~~~~~~~~~~~~~~~~ + +Container classes replacing the old dictionaries returned by kraken's +functional blocks. +""" import numpy as np import bidi.algorithm as bd @@ -187,9 +206,9 @@ class ocr_record(ABC): def __init__(self, prediction: str, cuts: List[Union[Tuple[int, int], Tuple[Tuple[int, int], - Tuple[int, int], - Tuple[int, int], - Tuple[int, int]]]], + Tuple[int, int], + Tuple[int, int], + Tuple[int, int]]]], confidences: List[float], display_order: bool = True) -> None: self._prediction = prediction @@ -463,9 +482,9 @@ class BBoxOCRRecord(ocr_record, BBoxLine): def __init__(self, prediction: str, cuts: List[Tuple[Tuple[int, int], - Tuple[int, int], - Tuple[int, int], - Tuple[int, int]]], + Tuple[int, int], + Tuple[int, int], + Tuple[int, int]]], confidences: List[float], line: Union[BBoxLine, Dict[str, Any]], base_dir: Optional[Literal['L', 'R']] = None, @@ -593,5 +612,3 @@ def _reorder(self, base_dir: Optional[Literal['L', 'R']] = None) -> 'BBoxOCRReco base_dir=base_dir, display_order=not self._display_order) return rec - - diff --git a/kraken/lib/arrow_dataset.py b/kraken/lib/arrow_dataset.py index be5fe607b..7f7e00962 100755 --- a/kraken/lib/arrow_dataset.py +++ b/kraken/lib/arrow_dataset.py @@ -34,7 +34,7 @@ from kraken.lib.xml import XMLPage from kraken.lib.util import is_bitonal, make_printable from kraken.lib.exceptions import KrakenInputException -from os import extsep, PathLike +from os import PathLike import logging diff --git a/kraken/lib/codec.py b/kraken/lib/codec.py index 481951f4e..a72e90da6 100644 --- a/kraken/lib/codec.py +++ b/kraken/lib/codec.py @@ -169,9 +169,9 @@ def decode(self, labels: Sequence[Tuple[int, int, int, float]]) -> List[Tuple[st if int(labels[idx]) in self.l2c_single: code = self.l2c_single[int(labels[idx])] decoded.extend([(c, s, e, u) for c, s, e, u in zip(code, - len(code) * [start[idx]], - len(code) * [end[idx]], - len(code) * [con[idx]])]) + len(code) * [start[idx]], + len(code) * [end[idx]], + len(code) * [con[idx]])]) idx += 1 decodable_suffix = True else: diff --git a/kraken/lib/layers.py b/kraken/lib/layers.py index c86dbcad5..0c4fee1ef 100644 --- a/kraken/lib/layers.py +++ b/kraken/lib/layers.py @@ -12,7 +12,7 @@ logger = logging.getLogger('coremltools') logger.setLevel(logging.ERROR) -from coremltools.proto import NeuralNetwork_pb2 +from coremltools.proto import NeuralNetwork_pb2 # NOQA logger.setLevel(logging.WARNING) # all tensors are ordered NCHW, the "feature" dimension is C, so the output of @@ -745,6 +745,7 @@ def resize(self, output_size: int, del_indices: Optional[Iterable[int]] = None) self.lin.weight = torch.nn.Parameter(weight) self.lin.bias = torch.nn.Parameter(bias) + class ActConv2D(Module): """ A wrapper for convolution + activation with automatic padding ensuring no @@ -760,7 +761,7 @@ def __init__(self, in_channels: int, out_channels: int, kernel_size: Tuple[int, self.dilation = dilation self.padding = tuple((dilation[i] * (kernel_size[i] - 1)) // 2 for i in range(2)) self.transposed = transposed - + if nl == 's': self.nl = torch.sigmoid self.nl_name = 'SIGMOID' @@ -779,7 +780,7 @@ def __init__(self, in_channels: int, out_channels: int, kernel_size: Tuple[int, else: self.nl_name = 'LINEAR' self.nl = lambda x: x - + if self.transposed: self.co = torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=self.padding, dilation=self.dilation) @@ -799,8 +800,8 @@ def forward(self, inputs: torch.Tensor, seq_len: Optional[torch.Tensor] = None, if seq_len is not None: if self.transposed: seq_len = torch.floor( - ((seq_len - 1) * self.stride[1]\ - - 2 * self.padding[1]\ + ((seq_len - 1) * self.stride[1] + - 2 * self.padding[1] + self.dilation[1] * (self.kernel_size[1] - 1) + 1)) else: diff --git a/kraken/lib/pretrain/model.py b/kraken/lib/pretrain/model.py index 5e05ffd84..63f53f199 100644 --- a/kraken/lib/pretrain/model.py +++ b/kraken/lib/pretrain/model.py @@ -43,6 +43,8 @@ from pytorch_lightning.callbacks import EarlyStopping from pytorch_lightning.utilities.memory import is_oom_error, garbage_collection_cuda +from kraken.containers import Segmentation + from kraken.lib import vgsl, default_specs, layers from kraken.lib.xml import XMLPage from kraken.lib.util import parse_gt_path @@ -371,14 +373,6 @@ def validation_step(self, batch, batch_idx): o = self._step(batch, batch_idx) if o is not None: logits, targets, loss = o - with torch.no_grad(): - if logits.numel() == 0: - corr = 0 - else: - _max = logits.argmax(-1) == 0 - _min = logits.argmin(-1) == 0 - both = _max & _min - corr = _max.long().sum().item() - both.long().sum().item() self.val_ce.append(loss.cpu()) self.log('CE', loss, on_step=True, on_epoch=True) diff --git a/kraken/lib/progress.py b/kraken/lib/progress.py index 344864d01..e3dbbac8b 100644 --- a/kraken/lib/progress.py +++ b/kraken/lib/progress.py @@ -15,22 +15,21 @@ """ Handlers for rich-based progress bars. """ -from typing import Any, Dict, Optional, Union -from numbers import Number +from typing import Union from dataclasses import dataclass -import pytorch_lightning as pl from pytorch_lightning.callbacks.progress.rich_progress import CustomProgress, RichProgressBar, MetricsTextColumn from rich import get_console, reconfigure -from rich.console import Console, RenderableType -from rich.progress import BarColumn, Progress, ProgressColumn, Task, TextColumn, TimeRemainingColumn, TimeElapsedColumn, DownloadColumn +from rich.console import RenderableType +from rich.progress import BarColumn, Progress, ProgressColumn, TextColumn, TimeRemainingColumn, TimeElapsedColumn, DownloadColumn from rich.text import Text from rich.style import Style from rich.default_styles import DEFAULT_STYLES __all__ = ['KrakenProgressBar', 'KrakenDownloadProgressBar', 'KrakenTrainProgressBar'] + class BatchesProcessedColumn(ProgressColumn): def __init__(self): super().__init__() @@ -130,6 +129,7 @@ def _get_train_description(self, current_epoch: int) -> str: return f"stage {current_epoch}/" \ f"{self.trainer.max_epochs if self.trainer.model.hparams.hyper_params['quit'] == 'fixed' else '∞'}" + @dataclass class RichProgressBarTheme: """Styles to associate to different base components. diff --git a/kraken/lib/util.py b/kraken/lib/util.py index 75d5fbe26..66ac7faec 100644 --- a/kraken/lib/util.py +++ b/kraken/lib/util.py @@ -10,12 +10,13 @@ from PIL import Image from os import PathLike -from typing import Union, Callable +from typing import Union, Callable, Optional, Literal -from kraken.containers import BBoxLine from kraken.lib import functional_im_transforms as F_t +from kraken.containers import BBoxLine +from kraken.exceptions import KrakenInputException -__all__ = ['pil2array', 'array2pil', 'is_bitonal', 'make_printable', 'get_im_str', 'parse_path'] +__all__ = ['pil2array', 'array2pil', 'is_bitonal', 'make_printable', 'get_im_str', 'parse_gt_path'] def pil2array(im: Image.Image, alpha: int = 0) -> np.ndarray: @@ -122,7 +123,7 @@ def parse_gt_path(path: Union[str, PathLike], try: with Image.open(path) as im: w, h = im.size - except Exception e: + except Exception as e: raise KrakenInputException(e) gt = '' @@ -137,7 +138,7 @@ def parse_gt_path(path: Union[str, PathLike], raise KrakenInputException(f'No text for ground truth line {path}.') return BBoxLine(id=uuid.uuid4(), - bbox=((0,0), (w,0), (w,h), (0,h)), + bbox=((0, 0), (w, 0), (w, h), (0, h)), text=gt, base_dir=base_dir, imagename=path, diff --git a/kraken/lib/vgsl.py b/kraken/lib/vgsl.py index fe2d5f4ba..2bc83d693 100644 --- a/kraken/lib/vgsl.py +++ b/kraken/lib/vgsl.py @@ -5,7 +5,6 @@ import json import torch import logging -import warnings from torch import nn from os import PathLike @@ -496,7 +495,7 @@ def build_dropout(self, input: Tuple[int, int, int, int], blocks: List[str], idx: int, - target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]: + target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]: pattern = re.compile(r'(?PDo)(?P{\w+})?(?P

(\d+(\.\d*)?|\.\d+))?(,(?P\d+))?') m = pattern.match(blocks[idx]) if not m: @@ -735,7 +734,7 @@ def _bracket_count(self, block: str) -> int: elif c != ")": break return rst - + def _parenthesis_count(self, block: str) -> int: rst = 0 for c in block: @@ -777,7 +776,7 @@ def build_series(self, named_spec[0]._block = '[' + named_spec[0]._block named_spec[-1]._block = named_spec[-1]._block + ']' return oshape, named_spec, nn - + def build_parallel(self, input: Tuple[int, int, int, int], blocks: List[str], diff --git a/kraken/lib/xml.py b/kraken/lib/xml.py index fb9bcec4e..d65535489 100644 --- a/kraken/lib/xml.py +++ b/kraken/lib/xml.py @@ -23,13 +23,10 @@ from itertools import groupby from lxml import etree -from PIL import Image from typing import Union, Dict, Any, Sequence, Tuple, Literal, Optional, List from collections import defaultdict from kraken.containers import Segmentation, BaselineLine, Region -from kraken.lib.segmentation import calculate_polygonal_environment -from kraken.lib.exceptions import KrakenInputException logger = logging.getLogger(__name__) @@ -247,7 +244,7 @@ def _parse_group(el): _ro = [] if el.tag.endswith('UnorderedGroup'): _ro = [_parse_group(x) for x in el.iterchildren()] - is_total = False + is_total = False # NOQA elif el.tag.endswith('OrderedGroup'): _ro.extend(_parse_group(x) for x in el.iterchildren()) else: @@ -306,7 +303,6 @@ def _parse_page(self): self._tag_set = set(('default',)) tmp_transkribus_line_order = defaultdict(list) - valid_tr_lo = True for region in regions: if not any([True if region.tag.endswith(k) else False for k in page_regions.keys()]): @@ -329,7 +325,7 @@ def _parse_page(self): if not rtype and 'structure' in cs and 'type' in cs['structure']: rtype = cs['structure']['type'] # transkribus-style reading order - if 'readingOrder' in cs and 'index'in cs['readingOrder']: + if 'readingOrder' in cs and 'index' in cs['readingOrder']: tr_region_order.append((region.get('id'), int(cs['readingOrder']['index']))) # fall back to default region type if nothing is given if not rtype: @@ -388,7 +384,6 @@ def _parse_page(self): reg_cus = self._parse_page_custom(line.getparent().get('custom')) if 'readingOrder' not in reg_cus or 'index' not in reg_cus['readingOrder']: logger.warning('Incomplete `custom` attribute reading order found.') - valid_tr_lo = False else: tmp_transkribus_line_order[int(reg_cus['readingOrder']['index'])].append((int(cs['readingOrder']['index']), line.get('id'))) @@ -433,7 +428,7 @@ def _parse_group(el): _ro = [] if el.tag.endswith('UnorderedGroup'): _ro = [_parse_group(x) for x in el.iterchildren()] - is_total = False + is_total = False # NOQA elif el.tag.endswith('OrderedGroup'): _ro.extend(_parse_group(x) for x in el.iterchildren()) else: diff --git a/kraken/transcribe.py b/kraken/transcribe.py index 5b39ee2f7..fd4b645ab 100644 --- a/kraken/transcribe.py +++ b/kraken/transcribe.py @@ -18,6 +18,7 @@ from kraken.lib.exceptions import KrakenInputException from kraken.lib.util import get_im_str +from typing import List from jinja2 import Environment, PackageLoader from io import BytesIO