From 95981e0bcd354f37e2df7d3d07d40ebefc426400 Mon Sep 17 00:00:00 2001 From: Benjamin Kiessling Date: Fri, 19 Jan 2024 01:14:17 +0100 Subject: [PATCH] Move variable types from comments to annotations --- docs/advanced.rst | 8 +- kraken/ketos/segmentation.py | 2 +- kraken/kraken.py | 8 +- kraken/lib/codec.py | 4 +- kraken/lib/dataset/recognition.py | 20 ++--- kraken/lib/dataset/segmentation.py | 3 +- kraken/lib/dataset/utils.py | 2 +- kraken/lib/layers.py | 4 +- kraken/lib/lstm.py | 126 ----------------------------- kraken/lib/vgsl.py | 30 +++---- kraken/pageseg.py | 6 +- kraken/serialization.py | 18 ++--- kraken/transcribe.py | 11 +-- tests/test_models.py | 2 - 14 files changed, 58 insertions(+), 186 deletions(-) delete mode 100644 kraken/lib/lstm.py diff --git a/docs/advanced.rst b/docs/advanced.rst index b0e232b6c..533e1280f 100644 --- a/docs/advanced.rst +++ b/docs/advanced.rst @@ -451,15 +451,15 @@ segmentation and a mapping between scripts and models: .. code-block:: console - $ kraken -i ... ... ocr -m Grek:porson.clstm -m Latn:antiqua.clstm + $ kraken -i ... ... ocr -m Grek:porson.mlmodel -m Latn:antiqua.mlmodel -All polytonic Greek text portions will be recognized using the `porson.clstm` -model while Latin text will be fed into the `antiqua.clstm` model. It is +All polytonic Greek text portions will be recognized using the `porson.mlmodel` +model while Latin text will be fed into the `antiqua.mlmodel` model. It is possible to define a fallback model that other text will be fed to: .. code-block:: console - $ kraken -i ... ... ocr -m ... -m ... -m default:porson.clstm + $ kraken -i ... ... ocr -m ... -m ... -m default:porson.mlmodel It is also possible to disable recognition on a particular script by mapping to the special model keyword `ignore`. Ignored lines will still be serialized but diff --git a/kraken/ketos/segmentation.py b/kraken/ketos/segmentation.py index c144e9fbd..addd5ebbf 100644 --- a/kraken/ketos/segmentation.py +++ b/kraken/ketos/segmentation.py @@ -45,7 +45,7 @@ def _validate_merging(ctx, param, value): """ if not value: return None - merge_dict = {} # type: Dict[str, str] + merge_dict: Dict[str, str] = {} try: for m in value: lexer = shlex.shlex(m, posix=True) diff --git a/kraken/kraken.py b/kraken/kraken.py index 633d7bdfe..ccb98e8a2 100644 --- a/kraken/kraken.py +++ b/kraken/kraken.py @@ -30,7 +30,7 @@ from functools import partial from rich.traceback import install -from typing import Dict, cast, Any, IO, Callable +from typing import Dict, cast, Any, IO, Callable, Union, List import click @@ -541,7 +541,7 @@ def _validate_mm(ctx, param, value): """ Maps model mappings to a dictionary. """ - model_dict = {'ignore': []} # type: Dict[str, Union[str, List[str]]] + model_dict: Dict[str, Union[str, List[str]]] = {'ignore': []} if len(value) == 1: lexer = shlex.shlex(value[0], posix=True) lexer.wordchars += r'\/.+-()=^&;,.' @@ -603,7 +603,7 @@ def ocr(ctx, model, pad, reorder, base_dir, no_segmentation, text_direction): reorder = base_dir # first we try to find the model in the absolute path, then ~/.kraken - nm = {} # type: Dict[str, models.TorchSeqRecognizer] + nm: Dict[str, models.TorchSeqRecognizer] = {} ign_tags = model.pop('ignore') for k, v in model.items(): search = [v, @@ -629,7 +629,7 @@ def ocr(ctx, model, pad, reorder, base_dir, no_segmentation, text_direction): if 'default' in nm: from collections import defaultdict - nn = defaultdict(lambda: nm['default']) # type: Dict[str, models.TorchSeqRecognizer] + nn: Dict[str, models.TorchSeqRecognizer] = defaultdict(lambda: nm['default']) nn.update(nm) nm = nn diff --git a/kraken/lib/codec.py b/kraken/lib/codec.py index a72e90da6..8eb911441 100644 --- a/kraken/lib/codec.py +++ b/kraken/lib/codec.py @@ -64,7 +64,7 @@ def __init__(self, charset: Union[Dict[str, Sequence[int]], Sequence[str], str], raise KrakenCodecException(f'Duplicate entry in codec definition string: {cc}') self.c2l = {k: [v] for v, k in enumerate(sorted(charset), start=1)} self.c_sorted = sorted(self.c2l.keys(), key=len, reverse=True) - self.l2c = {tuple(v): k for k, v in self.c2l.items()} # type: Dict[Tuple[int], str] + self.l2c: Dict[Tuple[int], str] = {tuple(v): k for k, v in self.c2l.items()} self.l2c_single = {k[0]: v for k, v in self.l2c.items() if len(k) == 1} self.strict = strict if not self.is_valid: @@ -116,7 +116,7 @@ def encode(self, s: str) -> IntTensor: KrakenEncodeException: if the a subsequence is not encodable and the codec is set to strict mode. """ - labels = [] # type: List[int] + labels: List[int] = [] idx = 0 while idx < len(s): encodable_suffix = False diff --git a/kraken/lib/dataset/recognition.py b/kraken/lib/dataset/recognition.py index 529149810..16d00fa0e 100644 --- a/kraken/lib/dataset/recognition.py +++ b/kraken/lib/dataset/recognition.py @@ -112,8 +112,8 @@ def __init__(self, `test` only rows with the appropriate flag set in the file will be considered. """ - self.alphabet = Counter() # type: Counter - self.text_transforms = [] # type: List[Callable[[str], str]] + self.alphabet: Counter = Counter() + self.text_transforms: List[Callable[[str], str]] = [] self.failed_samples = set() self.transforms = im_transforms self.aug = None @@ -300,10 +300,10 @@ def __init__(self, suitable for forward passes. augmentation: Enables augmentation. """ - self._images = [] # type: Union[List[Image], List[torch.Tensor]] - self._gt = [] # type: List[str] - self.alphabet = Counter() # type: Counter - self.text_transforms = [] # type: List[Callable[[str], str]] + self._images: Union[List[Image.Image], List[torch.Tensor]] = [] + self._gt: List[str] = [] + self.alphabet: Counter = Counter() + self.text_transforms: List[Callable[[str], str]] = [] self.transforms = im_transforms self.aug = None self.skip_empty_lines = skip_empty_lines @@ -397,7 +397,7 @@ def encode(self, codec: Optional[PytorchCodec] = None) -> None: self.codec = codec else: self.codec = PytorchCodec(''.join(self.alphabet.keys())) - self.training_set = [] # type: List[Tuple[Union[Image, torch.Tensor], torch.Tensor]] + self.training_set: List[Tuple[Union[Image.Image, torch.Tensor], torch.Tensor]] = [] for im, gt in zip(self._images, self._gt): self.training_set.append((im, self.codec.encode(gt))) @@ -405,7 +405,7 @@ def no_encode(self) -> None: """ Creates an unencoded dataset. """ - self.training_set = [] # type: List[Tuple[Union[Image, torch.Tensor], str]] + self.training_set: List[Tuple[Union[Image.Image, torch.Tensor], str]] = [] for im, gt in zip(self._images, self._gt): self.training_set.append((im, gt)) @@ -584,7 +584,7 @@ def encode(self, codec: Optional[PytorchCodec] = None) -> None: self.codec = codec else: self.codec = PytorchCodec(''.join(self.alphabet.keys())) - self.training_set = [] # type: List[Tuple[Union[Image, torch.Tensor], torch.Tensor]] + self.training_set: List[Tuple[Union[Image.Image, torch.Tensor], torch.Tensor]] = [] for im, gt in zip(self._images, self._gt): self.training_set.append((im, self.codec.encode(gt))) @@ -592,7 +592,7 @@ def no_encode(self) -> None: """ Creates an unencoded dataset. """ - self.training_set = [] # type: List[Tuple[Union[Image, torch.Tensor], str]] + self.training_set: List[Tuple[Union[Image.Image, torch.Tensor], str]] = [] for im, gt in zip(self._images, self._gt): self.training_set.append((im, gt)) diff --git a/kraken/lib/dataset/segmentation.py b/kraken/lib/dataset/segmentation.py index 84d697a49..61a77e4eb 100644 --- a/kraken/lib/dataset/segmentation.py +++ b/kraken/lib/dataset/segmentation.py @@ -27,7 +27,7 @@ from torchvision import transforms from collections import defaultdict from torch.utils.data import Dataset -from typing import Dict, Tuple, Sequence, Callable, Any, Union, Literal, Optional, TYPE_CHECKING +from typing import Dict, Tuple, Sequence, Callable, Any, TYPE_CHECKING from skimage.draw import polygon @@ -35,7 +35,6 @@ if TYPE_CHECKING: from kraken.containers import Segmentation - from kraken.lib.xml import XMLPage __all__ = ['BaselineSet'] diff --git a/kraken/lib/dataset/utils.py b/kraken/lib/dataset/utils.py index b690fdabf..98defd9d9 100644 --- a/kraken/lib/dataset/utils.py +++ b/kraken/lib/dataset/utils.py @@ -66,7 +66,7 @@ def __init__(self, """ super().__init__(None) - self._scale = (height, width) # type: Tuple[int, int] + self._scale: Tuple[int, int] = (height, width) self._valid_norm = valid_norm self._force_binarization = force_binarization self._batch = batch diff --git a/kraken/lib/layers.py b/kraken/lib/layers.py index 76f069574..38b25aa1b 100644 --- a/kraken/lib/layers.py +++ b/kraken/lib/layers.py @@ -46,7 +46,7 @@ def forward(self, *inputs, output_shape: Optional[Tuple[int, int]] = None): outputs = [] seq_lens = None for module in self._modules.values(): - if type(inputs) == tuple: + if isinstance(inputs, tuple): output, seq_lens = module(*inputs, output_shape=output_shape) outputs.append(output) else: @@ -135,7 +135,7 @@ def __init__(self, input_size: int, hidden_size: int) -> None: self.input_size = input_size self.hidden_size = hidden_size - self._all_weights = [] # type: List[List[str]] + self._all_weights: List[List[str]] = [] gate_size = 4 * hidden_size for direction in range(2): w_ih = torch.nn.Parameter(torch.Tensor(gate_size, input_size)) diff --git a/kraken/lib/lstm.py b/kraken/lib/lstm.py deleted file mode 100644 index 565803051..000000000 --- a/kraken/lib/lstm.py +++ /dev/null @@ -1,126 +0,0 @@ -# flake8: noqa -from typing import Dict -from scipy.special import expit - -initial_range = 0.1 - - -class Codec(object): - """Translate between integer codes and characters.""" - def init(self, charset): - charset = sorted(list(set(charset))) - self.code2char = {} # type: Dict[int, str] - self.char2code = {} # type: Dict[str, int] - for code,char in enumerate(charset): - self.code2char[code] = char - self.char2code[char] = code - return self - def size(self): - """The total number of codes (use this for the number of output - classes when training a classifier.""" - return len(list(self.code2char.keys())) - def encode(self, s): - "Encode the string `s` into a code sequence." - dflt = self.char2code["~"] - return [self.char2code.get(c,dflt) for c in s] - def decode(self, l): - "Decode a code sequence into a string." - s = [self.code2char.get(c,"~") for c in l] - return s - -class Network: - def predict(self,xs): - """Prediction is the same as forward propagation.""" - return self.forward(xs) - -class Softmax(Network): - """A logistic regression network.""" - def __init__(self,Nh,No,initial_range=0.1,rand=None): - pass - def ninputs(self): - pass - def noutputs(self): - pass - def forward(self,ys): - pass - def backward(self,deltas): - pass - - -class LSTM(Network): - """A standard LSTM network. This is a direct implementation of all the forward - and backward propagation formulas, mainly for speed. (There is another, more - abstract implementation as well, but that's significantly slower in Python - due to function call overhead.)""" - def __init__(self,ni,ns,initial=0.1,maxlen=5000): - pass - - def init_weights(self,initial): - pass - - def allocate(self,n): - pass - - def reset(self,n): - pass - - def forward(self,xs): - pass - -################################################################ -# combination classifiers -################################################################ - -class Stacked(Network): - """Stack two networks on top of each other.""" - def __init__(self,nets): - self.nets = nets - def forward(self,xs): - pass - -class Reversed(Network): - """Run a network on the time-reversed input.""" - def __init__(self,net): - self.net = net - def forward(self,xs): - pass - -class Parallel(Network): - """Run multiple networks in parallel on the same input.""" - def __init__(self,*nets): - self.nets = nets - def forward(self,xs): - pass - -def BIDILSTM(Ni,Ns,No): - """A bidirectional LSTM, constructed from regular and reversed LSTMs.""" - lstm1 = LSTM(Ni,Ns) - lstm2 = Reversed(LSTM(Ni,Ns)) - bidi = Parallel(lstm1,lstm2) - logreg = Softmax(2*Ns,No) - stacked = Stacked([bidi,logreg]) - return stacked - - -class SeqRecognizer(Network): - """Perform sequence recognition using BIDILSTM and alignment.""" - def __init__(self,ninput,nstates,noutput=-1,codec=None,normalize=None): - self.Ni = ninput - if codec: noutput = codec.size() - self.No = noutput - self.lstm = BIDILSTM(ninput,nstates,noutput) - self.codec = codec - def translate_back(self, output): - pass - def translate_back_locations(self, output): - pass - def predictSequence(self,xs): - "Predict an integer sequence of codes." - pass - def l2s(self,l): - "Convert a code sequence into a unicode string after recognition." - l = self.codec.decode(l) - return u"".join(l) - def predictString(self,xs): - "Predict output as a string. This uses codec and normalizer." - pass diff --git a/kraken/lib/vgsl.py b/kraken/lib/vgsl.py index 2bc83d693..4335fe7e4 100644 --- a/kraken/lib/vgsl.py +++ b/kraken/lib/vgsl.py @@ -127,21 +127,21 @@ def __init__(self, spec: str) -> None: dimension. """ self.spec = spec - self.named_spec = [] # type: List[str] + self.named_spec: List[str] = [] self.ops = [self.build_addition, self.build_identity, self.build_rnn, self.build_dropout, self.build_maxpool, self.build_conv, self.build_output, self.build_reshape, self.build_wav2vec2, self.build_groupnorm, self.build_series, self.build_parallel, self.build_ro] - self.codec = None # type: Optional[PytorchCodec] - self.criterion = None # type: Any + self.codec: Optional[PytorchCodec] = None + self.criterion: Any = None self.nn = layers.MultiParamSequential() - self.user_metadata = {'accuracy': [], - 'metrics': [], - 'seg_type': None, - 'one_channel_mode': None, - 'model_type': None, - 'hyper_params': {}} # type: dict[str, Any] + self.user_metadata: Dict[str, Any] = {'accuracy': [], + 'metrics': [], + 'seg_type': None, + 'one_channel_mode': None, + 'model_type': None, + 'hyper_params': {}} self._aux_layers = nn.ModuleDict() self.idx = -1 @@ -304,12 +304,12 @@ def _deserialize_layers(name, layer): if 'codec' in mlmodel.user_defined_metadata: nn.add_codec(PytorchCodec(json.loads(mlmodel.user_defined_metadata['codec']))) - nn.user_metadata = {'accuracy': [], - 'metrics': [], - 'seg_type': 'bbox', - 'one_channel_mode': '1', - 'model_type': None, - 'hyper_params': {}} # type: dict[str, str] + nn.user_metadata: Dict[str, Any] = {'accuracy': [], + 'metrics': [], + 'seg_type': 'bbox', + 'one_channel_mode': '1', + 'model_type': None, + 'hyper_params': {}} if 'kraken_meta' in mlmodel.user_defined_metadata: nn.user_metadata.update(json.loads(mlmodel.user_defined_metadata['kraken_meta'])) diff --git a/kraken/pageseg.py b/kraken/pageseg.py index 6cc94ea31..82ab81c0d 100644 --- a/kraken/pageseg.py +++ b/kraken/pageseg.py @@ -46,9 +46,9 @@ class record(object): """ def __init__(self, **kw): self.__dict__.update(kw) - self.label = 0 # type: int - self.bounds = [] # type: List - self.mask = None # type: np.ndarray + self.label: int = 0 + self.bounds: List = [] + self.mask: np.ndarray = None def find(condition): diff --git a/kraken/serialization.py b/kraken/serialization.py index 94e490191..e1392feb0 100644 --- a/kraken/serialization.py +++ b/kraken/serialization.py @@ -22,7 +22,7 @@ from kraken.lib.util import make_printable -from typing import List, Tuple, Iterable, Optional, Sequence, Literal, TYPE_CHECKING +from typing import List, Tuple, Iterable, Optional, Sequence, Literal, TYPE_CHECKING, Dict, Any if TYPE_CHECKING: from os import PathLike @@ -107,14 +107,14 @@ def serialize(results: 'Segmentation', The rendered template """ logger.info(f'Serialize {len(results.lines)} records from {results.imagename} with template {template}.') - page = {'entities': [], - 'size': image_size, - 'name': results.imagename, - 'writing_mode': writing_mode, - 'scripts': scripts, - 'date': datetime.datetime.now(datetime.timezone.utc).isoformat(), - 'base_dir': [rec.base_dir for rec in results.lines][0] if len(results.lines) else None, - 'seg_type': results.type} # type: dict + page: Dict[str, Any] = {'entities': [], + 'size': image_size, + 'name': results.imagename, + 'writing_mode': writing_mode, + 'scripts': scripts, + 'date': datetime.datetime.now(datetime.timezone.utc).isoformat(), + 'base_dir': [rec.base_dir for rec in results.lines][0] if len(results.lines) else None, + 'seg_type': results.type} metadata = {'processing_steps': processing_steps, 'version': importlib.metadata.version('kraken')} diff --git a/kraken/transcribe.py b/kraken/transcribe.py index 5b39ee2f7..6a6ba0a68 100644 --- a/kraken/transcribe.py +++ b/kraken/transcribe.py @@ -15,16 +15,17 @@ """ Utility functions for ground truth transcription. """ -from kraken.lib.exceptions import KrakenInputException -from kraken.lib.util import get_im_str - -from jinja2 import Environment, PackageLoader from io import BytesIO +from typing import List, Dict, Any +from jinja2 import Environment, PackageLoader import uuid import base64 import logging +from kraken.lib.exceptions import KrakenInputException +from kraken.lib.util import get_im_str + logger = logging.getLogger() @@ -36,7 +37,7 @@ def __init__(self, font=None, font_style=None): env = Environment(loader=PackageLoader('kraken', 'templates'), autoescape=True) logger.debug('Loading transcription template.') self.tmpl = env.get_template('layout.html') - self.pages = [] # type: List[dict] + self.pages: List[Dict[Any, Any]] = [] self.font = {'font': font, 'style': font_style} self.text_direction = 'horizontal-tb' self.page_idx = 1 diff --git a/tests/test_models.py b/tests/test_models.py index cb57b05cd..f928d4fcb 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -7,8 +7,6 @@ from pytest import raises from pathlib import Path -import kraken.lib.lstm - from kraken.lib import models from kraken.lib.exceptions import KrakenInvalidModelException