Skip to content

Commit

Permalink
Linting and type hint imports
Browse files Browse the repository at this point in the history
  • Loading branch information
mittagessen committed Dec 20, 2023
1 parent 1b596ca commit 283a4e3
Show file tree
Hide file tree
Showing 30 changed files with 265 additions and 179 deletions.
8 changes: 5 additions & 3 deletions kraken/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,18 @@
from bidi.algorithm import get_display

from dataclasses import dataclass
from typing import Optional, Literal
from typing import Optional, Literal, TYPE_CHECKING

from kraken import rpred
from kraken.containers import Segmentation, BaselineOCRRecord
from kraken.lib.models import TorchSeqRecognizer

if TYPE_CHECKING:
from kraken.lib.models import TorchSeqRecognizer

logger = logging.getLogger('kraken')


def forced_align(doc: Segmentation, model: TorchSeqRecognizer, base_dir: Optional[Literal['L', 'R']] = None) -> Segmentation:
def forced_align(doc: Segmentation, model: 'TorchSeqRecognizer', base_dir: Optional[Literal['L', 'R']] = None) -> Segmentation:
"""
Performs a forced character alignment of text with recognition model
output activations.
Expand Down
11 changes: 8 additions & 3 deletions kraken/binarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,32 @@
import logging
import numpy as np

from PIL import Image
from kraken.lib.util import pil2array, array2pil, is_bitonal, get_im_str
from scipy.ndimage import affine_transform, percentile_filter, gaussian_filter, binary_dilation
from scipy.ndimage import zoom as _zoom

from typing import TYPE_CHECKING

from kraken.lib.exceptions import KrakenInputException

if TYPE_CHECKING:
from PIL import Image


__all__ = ['nlbin']

logger = logging.getLogger(__name__)


def nlbin(im: Image.Image,
def nlbin(im: 'Image.Image',
threshold: float = 0.5,
zoom: float = 0.5,
escale: float = 1.0,
border: float = 0.1,
perc: int = 80,
range: int = 20,
low: int = 5,
high: int = 90) -> Image.Image:
high: int = 90) -> 'Image.Image':
"""
Performs binarization using non-linear processing.
Expand Down
15 changes: 9 additions & 6 deletions kraken/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,16 @@
import numpy as np
import bidi.algorithm as bd

from os import PathLike
from typing import Literal, List, Dict, Union, Optional, Tuple, Any
from typing import Literal, List, Dict, Union, Optional, Tuple, Any, TYPE_CHECKING
from dataclasses import dataclass, asdict
from abc import ABC, abstractmethod

from kraken.lib.segmentation import compute_polygon_section

if TYPE_CHECKING:
from os import PathLike


__all__ = ['BaselineLine',
'BBoxLine',
'Segmentation',
Expand Down Expand Up @@ -85,7 +88,7 @@ class BaselineLine:
text: Optional[str] = None
base_dir: Optional[Literal['L', 'R']] = None
type: str = 'baselines'
imagename: Optional[Union[str, PathLike]] = None
imagename: Optional[Union[str, 'PathLike']] = None
tags: Optional[Dict[str, str]] = None
split: Optional[Literal['train', 'validation', 'test']] = None
regions: Optional[List[str]] = None
Expand Down Expand Up @@ -124,7 +127,7 @@ class BBoxLine:
text: Optional[str] = None
base_dir: Optional[Literal['L', 'R']] = None
type: str = 'bbox'
imagename: Optional[Union[str, PathLike]] = None
imagename: Optional[Union[str, 'PathLike']] = None
tags: Optional[Dict[str, str]] = None
split: Optional[Literal['train', 'validation', 'test']] = None
regions: Optional[List[str]] = None
Expand All @@ -145,7 +148,7 @@ class Region:
"""
id: str
boundary: List[Tuple[int, int]]
imagename: Optional[Union[str, PathLike]] = None
imagename: Optional[Union[str, 'PathLike']] = None
tags: Optional[Dict[str, str]] = None


Expand Down Expand Up @@ -175,7 +178,7 @@ class Segmentation:
Each reading order is a list of line indices.
"""
type: Literal['baselines', 'bbox']
imagename: Union[str, PathLike]
imagename: Union[str, 'PathLike']
text_direction: Literal['horizontal-lr', 'horizontal-rl', 'vertical-lr', 'vertical-rl']
script_detection: bool
lines: List[Union[BaselineLine, BBoxLine]]
Expand Down
1 change: 1 addition & 0 deletions kraken/contrib/extract_lines.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#! /usr/bin/env python
import click


@click.command()
@click.option('-f', '--format-type', type=click.Choice(['xml', 'alto', 'page', 'binary']), default='xml',
help='Sets the input document format. In ALTO and PageXML mode all '
Expand Down
1 change: 1 addition & 0 deletions kraken/contrib/heatmap_overlay.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
import click


@click.command()
@click.option('-i', '--model', default=None, show_default=True, type=click.Path(exists=True),
help='Baseline detection model to use.')
Expand Down
2 changes: 2 additions & 0 deletions kraken/contrib/hyperparameters/tune_pretraining.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def train_tune(config, training_data=None, epochs=100, spec=RECOGNITION_SPEC):
enable_progress_bar=False)
trainer.fit(model, datamodule=data_module)


@click.command()
@click.option('-v', '--verbose', default=0, count=True)
@click.option('-s', '--seed', default=42, type=click.INT,
Expand Down Expand Up @@ -83,5 +84,6 @@ def cli(verbose, seed, output, num_samples, epochs, spec, training_files, files)

click.echo("Best hyperparameters found were: ", analysis.get_best_config(metric='accuracy', mode='max'))


if __name__ == '__main__':
cli()
1 change: 0 additions & 1 deletion kraken/contrib/hyperparameters/tune_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from kraken.lib.default_spec import RECOGNITION_PRETRAIN_HYPER_PARAMS, RECOGNITION_SPEC
from kraken.lib.pretrain.model import PretrainDataModule, RecognitionPretrainModel
from ray.tune.schedulers import ASHAScheduler

import pytorch_lightning as pl

Expand Down
1 change: 1 addition & 0 deletions kraken/contrib/segmentation_overlay.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from itertools import cycle
from collections import defaultdict


cmap = cycle([(230, 25, 75, 127),
(60, 180, 75, 127)])

Expand Down
2 changes: 1 addition & 1 deletion kraken/ketos/transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def transcription(ctx, text_direction, scale, bw, maxcolseps,
else:
with click.open_file(lines, 'r') as fp:
try:
fp = cast(IO[Any], fp)
fp = cast('IO[Any]', fp)
res = json.load(fp)
except ValueError as e:
raise click.UsageError('{} invalid segmentation: {}'.format(lines, str(e)))
Expand Down
10 changes: 5 additions & 5 deletions kraken/kraken.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def binarizer(threshold, zoom, escale, border, perc, range, low, high, input, ou
low, high)
if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native':
with click.open_file(output, 'w', encoding='utf-8') as fp:
fp = cast(IO[Any], fp)
fp = cast('IO[Any]', fp)
logger.info('Serializing as {} into {}'.format(ctx.meta['output_mode'], output))
res.save(f'{output}.png')
from kraken import serialization
Expand Down Expand Up @@ -159,7 +159,7 @@ def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps,
ctx.exit(1)
if ctx.meta['last_process'] and ctx.meta['output_mode'] != 'native':
with click.open_file(output, 'w', encoding='utf-8') as fp:
fp = cast(IO[Any], fp)
fp = cast('IO[Any]', fp)
logger.info('Serializing as {} into {}'.format(ctx.meta['output_mode'], output))
from kraken import serialization
fp.write(serialization.serialize_segmentation(res,
Expand All @@ -170,7 +170,7 @@ def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps,
processing_steps=ctx.meta['steps']))
else:
with click.open_file(output, 'w') as fp:
fp = cast(IO[Any], fp)
fp = cast('IO[Any]', fp)
json.dump(dataclasses.asdict(res), fp)
message('\u2713', fg='green')

Expand Down Expand Up @@ -208,7 +208,7 @@ def recognizer(model, pad, no_segmentation, bidi_reordering, tags_ignore, input,
if not bounds and ctx.meta['base_image'] != input:
with click.open_file(input, 'r') as fp:
try:
fp = cast(IO[Any], fp)
fp = cast('IO[Any]', fp)
bounds = Segmentation(**json.load(fp))
except ValueError as e:
raise click.UsageError(f'{input} invalid segmentation: {str(e)}')
Expand Down Expand Up @@ -244,7 +244,7 @@ def recognizer(model, pad, no_segmentation, bidi_reordering, tags_ignore, input,

ctx = click.get_current_context()
with click.open_file(output, 'w', encoding='utf-8') as fp:
fp = cast(IO[Any], fp)
fp = cast('IO[Any]', fp)
message(f'Writing recognition results for {ctx.meta["orig_file"]}\t', nl=False)
logger.info('Serializing as {} into {}'.format(ctx.meta['output_mode'], output))
if ctx.meta['output_mode'] != 'native':
Expand Down
12 changes: 7 additions & 5 deletions kraken/lib/arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,17 @@
from PIL import Image, UnidentifiedImageError
from functools import partial
from collections import Counter
from typing import Optional, List, Union, Callable, Tuple, Dict
from typing import Optional, List, Union, Callable, Tuple, Dict, TYPE_CHECKING
from multiprocessing import Pool
from kraken.containers import Segmentation
from kraken.lib import functional_im_transforms as F_t
from kraken.lib.segmentation import extract_polygons
from kraken.lib.xml import XMLPage
from kraken.lib.util import is_bitonal, make_printable
from kraken.lib.exceptions import KrakenInputException
from os import PathLike

if TYPE_CHECKING:
from os import PathLike

import logging

Expand Down Expand Up @@ -89,7 +91,7 @@ def _extract_path_line(xml_record, skip_empty_lines: bool = True):
return [line], im.mode


def parse_path(path: Union[str, PathLike],
def parse_path(path: Union[str, 'PathLike'],
suffix: str = '.gt.txt',
split=F_t.default_split,
skip_empty_lines: bool = True):
Expand All @@ -101,8 +103,8 @@ def parse_path(path: Union[str, PathLike],
return {'image': path, 'lines': [{'text': gt}]}


def build_binary_dataset(files: Optional[List[Union[str, PathLike, Dict]]] = None,
output_file: Union[str, PathLike] = None,
def build_binary_dataset(files: Optional[List[Union[str, 'PathLike', Dict]]] = None,
output_file: Union[str, 'PathLike'] = None,
format_type: str = 'xml',
num_workers: int = 0,
ignore_splits: bool = False,
Expand Down
8 changes: 5 additions & 3 deletions kraken/lib/dataset/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,11 @@
import pyarrow as pa

from PIL import Image
from os import PathLike
from functools import partial
from torchvision import transforms
from collections import Counter
from torch.utils.data import Dataset
from typing import List, Tuple, Callable, Optional, Any, Union, Literal
from typing import List, Tuple, Callable, Optional, Any, Union, Literal, TYPE_CHECKING

from kraken.containers import BaselineLine, BBoxLine, Segmentation
from kraken.lib.util import is_bitonal
Expand All @@ -39,6 +38,9 @@

from kraken.lib import functional_im_transforms as F_t

if TYPE_CHECKING:
from os import PathLike

__all__ = ['DefaultAugmenter',
'ArrowIPCRecognitionDataset',
'PolygonGTDataset',
Expand Down Expand Up @@ -137,7 +139,7 @@ def __init__(self,

self.im_mode = self.transforms.mode

def add(self, file: Union[str, PathLike]) -> None:
def add(self, file: Union[str, 'PathLike']) -> None:
"""
Adds an Arrow IPC file to the dataset.
Expand Down
10 changes: 6 additions & 4 deletions kraken/lib/dataset/ro.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@
import numpy as np

from math import factorial
from os import PathLike
from torch.utils.data import Dataset
from typing import Dict, Sequence, Union, Literal, Optional
from typing import Dict, Sequence, Union, Literal, Optional, TYPE_CHECKING

from kraken.lib.xml import XMLPage

from kraken.lib.exceptions import KrakenInputException

if TYPE_CHECKING:
from os import PathLike

__all__ = ['PairWiseROSet', 'PageWiseROSet']

import logging
Expand All @@ -40,7 +42,7 @@ class PairWiseROSet(Dataset):
Returns random pairs of lines from the same page.
"""
def __init__(self, files: Sequence[Union[PathLike, str]] = None,
def __init__(self, files: Sequence[Union['PathLike', str]] = None,
mode: Optional[Literal['alto', 'page', 'xml']] = 'xml',
level: Literal['regions', 'baselines'] = 'baselines',
ro_id: Optional[str] = None,
Expand Down Expand Up @@ -142,7 +144,7 @@ class PageWiseROSet(Dataset):
Returns all lines from the same page.
"""
def __init__(self, files: Sequence[Union[PathLike, str]] = None,
def __init__(self, files: Sequence[Union['PathLike', str]] = None,
mode: Optional[Literal['alto', 'page', 'xml']] = 'xml',
level: Literal['regions', 'baselines'] = 'baselines',
ro_id: Optional[str] = None,
Expand Down
9 changes: 5 additions & 4 deletions kraken/lib/dataset/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@
from torchvision import transforms
from collections import defaultdict
from torch.utils.data import Dataset
from typing import Dict, Tuple, Sequence, Callable, Any, Union, Literal, Optional
from typing import Dict, Tuple, Sequence, Callable, Any, Union, Literal, Optional, TYPE_CHECKING

from skimage.draw import polygon

from kraken.containers import Segmentation
from kraken.lib.xml import XMLPage
if TYPE_CHECKING:
from kraken.containers import Segmentation
from kraken.lib.xml import XMLPage


__all__ = ['BaselineSet']
Expand Down Expand Up @@ -124,7 +125,7 @@ def __init__(self,
self.transforms = im_transforms
self.seg_type = None

def add(self, doc: Union[Segmentation, XMLPage]):
def add(self, doc: Union['Segmentation', 'XMLPage']):
"""
Adds a page to the dataset.
Expand Down
8 changes: 5 additions & 3 deletions kraken/lib/functional_im_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,18 @@
import unicodedata
import bidi.algorithm as bd

from os import PathLike
from pathlib import Path
from PIL import Image
from PIL.Image import Resampling

from typing import Tuple, Optional, Callable, Any, Union
from typing import Tuple, Optional, Callable, Any, Union, TYPE_CHECKING

from kraken.binarization import nlbin
from kraken.lib.lineest import dewarp, CenterNormalizer

if TYPE_CHECKING:
from os import PathLike
from PIL import Image


def pil_to_mode(im: Image.Image, mode: str) -> Image.Image:
return im.convert(mode)
Expand Down
Loading

0 comments on commit 283a4e3

Please sign in to comment.