Skip to content

Commit

Permalink
More linting
Browse files Browse the repository at this point in the history
See #557.
  • Loading branch information
mittagessen committed Dec 11, 2023
1 parent 56d84b0 commit f403185
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 50 deletions.
1 change: 0 additions & 1 deletion kraken/blla.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,6 @@ def segment(im: PIL.Image.Image,

# create objects and assign IDs
blls = []
reg_idx = 0
_shp_regs = {}
for reg_type, rgs in regions.items():
for reg in rgs:
Expand Down
37 changes: 27 additions & 10 deletions kraken/containers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@

import PIL.Image
#
# Copyright 2023 Benjamin Kiessling
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
"""
kraken.containers
~~~~~~~~~~~~~~~~~
Container classes replacing the old dictionaries returned by kraken's
functional blocks.
"""
import numpy as np
import bidi.algorithm as bd

Expand Down Expand Up @@ -187,9 +206,9 @@ class ocr_record(ABC):
def __init__(self,
prediction: str,
cuts: List[Union[Tuple[int, int], Tuple[Tuple[int, int],
Tuple[int, int],
Tuple[int, int],
Tuple[int, int]]]],
Tuple[int, int],
Tuple[int, int],
Tuple[int, int]]]],
confidences: List[float],
display_order: bool = True) -> None:
self._prediction = prediction
Expand Down Expand Up @@ -463,9 +482,9 @@ class BBoxOCRRecord(ocr_record, BBoxLine):
def __init__(self,
prediction: str,
cuts: List[Tuple[Tuple[int, int],
Tuple[int, int],
Tuple[int, int],
Tuple[int, int]]],
Tuple[int, int],
Tuple[int, int],
Tuple[int, int]]],
confidences: List[float],
line: Union[BBoxLine, Dict[str, Any]],
base_dir: Optional[Literal['L', 'R']] = None,
Expand Down Expand Up @@ -593,5 +612,3 @@ def _reorder(self, base_dir: Optional[Literal['L', 'R']] = None) -> 'BBoxOCRReco
base_dir=base_dir,
display_order=not self._display_order)
return rec


2 changes: 1 addition & 1 deletion kraken/lib/arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from kraken.lib.xml import XMLPage
from kraken.lib.util import is_bitonal, make_printable
from kraken.lib.exceptions import KrakenInputException
from os import extsep, PathLike
from os import PathLike

import logging

Expand Down
6 changes: 3 additions & 3 deletions kraken/lib/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ def decode(self, labels: Sequence[Tuple[int, int, int, float]]) -> List[Tuple[st
if int(labels[idx]) in self.l2c_single:
code = self.l2c_single[int(labels[idx])]
decoded.extend([(c, s, e, u) for c, s, e, u in zip(code,
len(code) * [start[idx]],
len(code) * [end[idx]],
len(code) * [con[idx]])])
len(code) * [start[idx]],
len(code) * [end[idx]],
len(code) * [con[idx]])])
idx += 1
decodable_suffix = True
else:
Expand Down
11 changes: 6 additions & 5 deletions kraken/lib/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

logger = logging.getLogger('coremltools')
logger.setLevel(logging.ERROR)
from coremltools.proto import NeuralNetwork_pb2
from coremltools.proto import NeuralNetwork_pb2 # NOQA
logger.setLevel(logging.WARNING)

# all tensors are ordered NCHW, the "feature" dimension is C, so the output of
Expand Down Expand Up @@ -745,6 +745,7 @@ def resize(self, output_size: int, del_indices: Optional[Iterable[int]] = None)
self.lin.weight = torch.nn.Parameter(weight)
self.lin.bias = torch.nn.Parameter(bias)


class ActConv2D(Module):
"""
A wrapper for convolution + activation with automatic padding ensuring no
Expand All @@ -760,7 +761,7 @@ def __init__(self, in_channels: int, out_channels: int, kernel_size: Tuple[int,
self.dilation = dilation
self.padding = tuple((dilation[i] * (kernel_size[i] - 1)) // 2 for i in range(2))
self.transposed = transposed

if nl == 's':
self.nl = torch.sigmoid
self.nl_name = 'SIGMOID'
Expand All @@ -779,7 +780,7 @@ def __init__(self, in_channels: int, out_channels: int, kernel_size: Tuple[int,
else:
self.nl_name = 'LINEAR'
self.nl = lambda x: x

if self.transposed:
self.co = torch.nn.ConvTranspose2d(in_channels, out_channels, kernel_size,
stride=stride, padding=self.padding, dilation=self.dilation)
Expand All @@ -799,8 +800,8 @@ def forward(self, inputs: torch.Tensor, seq_len: Optional[torch.Tensor] = None,
if seq_len is not None:
if self.transposed:
seq_len = torch.floor(
((seq_len - 1) * self.stride[1]\
- 2 * self.padding[1]\
((seq_len - 1) * self.stride[1]
- 2 * self.padding[1]
+ self.dilation[1] * (self.kernel_size[1] - 1)
+ 1))
else:
Expand Down
10 changes: 2 additions & 8 deletions kraken/lib/pretrain/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.utilities.memory import is_oom_error, garbage_collection_cuda

from kraken.containers import Segmentation

from kraken.lib import vgsl, default_specs, layers
from kraken.lib.xml import XMLPage
from kraken.lib.util import parse_gt_path
Expand Down Expand Up @@ -371,14 +373,6 @@ def validation_step(self, batch, batch_idx):
o = self._step(batch, batch_idx)
if o is not None:
logits, targets, loss = o
with torch.no_grad():
if logits.numel() == 0:
corr = 0
else:
_max = logits.argmax(-1) == 0
_min = logits.argmin(-1) == 0
both = _max & _min
corr = _max.long().sum().item() - both.long().sum().item()
self.val_ce.append(loss.cpu())
self.log('CE', loss, on_step=True, on_epoch=True)

Expand Down
10 changes: 5 additions & 5 deletions kraken/lib/progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,21 @@
"""
Handlers for rich-based progress bars.
"""
from typing import Any, Dict, Optional, Union
from numbers import Number
from typing import Union
from dataclasses import dataclass

import pytorch_lightning as pl
from pytorch_lightning.callbacks.progress.rich_progress import CustomProgress, RichProgressBar, MetricsTextColumn

from rich import get_console, reconfigure
from rich.console import Console, RenderableType
from rich.progress import BarColumn, Progress, ProgressColumn, Task, TextColumn, TimeRemainingColumn, TimeElapsedColumn, DownloadColumn
from rich.console import RenderableType
from rich.progress import BarColumn, Progress, ProgressColumn, TextColumn, TimeRemainingColumn, TimeElapsedColumn, DownloadColumn
from rich.text import Text
from rich.style import Style
from rich.default_styles import DEFAULT_STYLES

__all__ = ['KrakenProgressBar', 'KrakenDownloadProgressBar', 'KrakenTrainProgressBar']


class BatchesProcessedColumn(ProgressColumn):
def __init__(self):
super().__init__()
Expand Down Expand Up @@ -130,6 +129,7 @@ def _get_train_description(self, current_epoch: int) -> str:
return f"stage {current_epoch}/" \
f"{self.trainer.max_epochs if self.trainer.model.hparams.hyper_params['quit'] == 'fixed' else '∞'}"


@dataclass
class RichProgressBarTheme:
"""Styles to associate to different base components.
Expand Down
11 changes: 6 additions & 5 deletions kraken/lib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
from PIL import Image
from os import PathLike

from typing import Union, Callable
from typing import Union, Callable, Optional, Literal

from kraken.containers import BBoxLine
from kraken.lib import functional_im_transforms as F_t
from kraken.containers import BBoxLine
from kraken.exceptions import KrakenInputException

__all__ = ['pil2array', 'array2pil', 'is_bitonal', 'make_printable', 'get_im_str', 'parse_path']
__all__ = ['pil2array', 'array2pil', 'is_bitonal', 'make_printable', 'get_im_str', 'parse_gt_path']


def pil2array(im: Image.Image, alpha: int = 0) -> np.ndarray:
Expand Down Expand Up @@ -122,7 +123,7 @@ def parse_gt_path(path: Union[str, PathLike],
try:
with Image.open(path) as im:
w, h = im.size
except Exception e:
except Exception as e:
raise KrakenInputException(e)

gt = ''
Expand All @@ -137,7 +138,7 @@ def parse_gt_path(path: Union[str, PathLike],
raise KrakenInputException(f'No text for ground truth line {path}.')

return BBoxLine(id=uuid.uuid4(),
bbox=((0,0), (w,0), (w,h), (0,h)),
bbox=((0, 0), (w, 0), (w, h), (0, h)),
text=gt,
base_dir=base_dir,
imagename=path,
Expand Down
7 changes: 3 additions & 4 deletions kraken/lib/vgsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import json
import torch
import logging
import warnings

from torch import nn
from os import PathLike
Expand Down Expand Up @@ -496,7 +495,7 @@ def build_dropout(self,
input: Tuple[int, int, int, int],
blocks: List[str],
idx: int,
target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
pattern = re.compile(r'(?P<type>Do)(?P<name>{\w+})?(?P<p>(\d+(\.\d*)?|\.\d+))?(,(?P<dim>\d+))?')
m = pattern.match(blocks[idx])
if not m:
Expand Down Expand Up @@ -735,7 +734,7 @@ def _bracket_count(self, block: str) -> int:
elif c != ")":
break
return rst

def _parenthesis_count(self, block: str) -> int:
rst = 0
for c in block:
Expand Down Expand Up @@ -777,7 +776,7 @@ def build_series(self,
named_spec[0]._block = '[' + named_spec[0]._block
named_spec[-1]._block = named_spec[-1]._block + ']'
return oshape, named_spec, nn

def build_parallel(self,
input: Tuple[int, int, int, int],
blocks: List[str],
Expand Down
11 changes: 3 additions & 8 deletions kraken/lib/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,10 @@

from itertools import groupby
from lxml import etree
from PIL import Image
from typing import Union, Dict, Any, Sequence, Tuple, Literal, Optional, List

from collections import defaultdict
from kraken.containers import Segmentation, BaselineLine, Region
from kraken.lib.segmentation import calculate_polygonal_environment
from kraken.lib.exceptions import KrakenInputException

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -247,7 +244,7 @@ def _parse_group(el):
_ro = []
if el.tag.endswith('UnorderedGroup'):
_ro = [_parse_group(x) for x in el.iterchildren()]
is_total = False
is_total = False # NOQA
elif el.tag.endswith('OrderedGroup'):
_ro.extend(_parse_group(x) for x in el.iterchildren())
else:
Expand Down Expand Up @@ -306,7 +303,6 @@ def _parse_page(self):

self._tag_set = set(('default',))
tmp_transkribus_line_order = defaultdict(list)
valid_tr_lo = True

for region in regions:
if not any([True if region.tag.endswith(k) else False for k in page_regions.keys()]):
Expand All @@ -329,7 +325,7 @@ def _parse_page(self):
if not rtype and 'structure' in cs and 'type' in cs['structure']:
rtype = cs['structure']['type']
# transkribus-style reading order
if 'readingOrder' in cs and 'index'in cs['readingOrder']:
if 'readingOrder' in cs and 'index' in cs['readingOrder']:
tr_region_order.append((region.get('id'), int(cs['readingOrder']['index'])))
# fall back to default region type if nothing is given
if not rtype:
Expand Down Expand Up @@ -388,7 +384,6 @@ def _parse_page(self):
reg_cus = self._parse_page_custom(line.getparent().get('custom'))
if 'readingOrder' not in reg_cus or 'index' not in reg_cus['readingOrder']:
logger.warning('Incomplete `custom` attribute reading order found.')
valid_tr_lo = False
else:
tmp_transkribus_line_order[int(reg_cus['readingOrder']['index'])].append((int(cs['readingOrder']['index']), line.get('id')))

Expand Down Expand Up @@ -433,7 +428,7 @@ def _parse_group(el):
_ro = []
if el.tag.endswith('UnorderedGroup'):
_ro = [_parse_group(x) for x in el.iterchildren()]
is_total = False
is_total = False # NOQA
elif el.tag.endswith('OrderedGroup'):
_ro.extend(_parse_group(x) for x in el.iterchildren())
else:
Expand Down
1 change: 1 addition & 0 deletions kraken/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from kraken.lib.exceptions import KrakenInputException
from kraken.lib.util import get_im_str

from typing import List
from jinja2 import Environment, PackageLoader
from io import BytesIO

Expand Down

0 comments on commit f403185

Please sign in to comment.