Skip to content

Commit

Permalink
Move variable types from comments to annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
mittagessen committed Jan 19, 2024
1 parent aea5098 commit 95981e0
Show file tree
Hide file tree
Showing 14 changed files with 58 additions and 186 deletions.
8 changes: 4 additions & 4 deletions docs/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -451,15 +451,15 @@ segmentation and a mapping between scripts and models:

.. code-block:: console
$ kraken -i ... ... ocr -m Grek:porson.clstm -m Latn:antiqua.clstm
$ kraken -i ... ... ocr -m Grek:porson.mlmodel -m Latn:antiqua.mlmodel
All polytonic Greek text portions will be recognized using the `porson.clstm`
model while Latin text will be fed into the `antiqua.clstm` model. It is
All polytonic Greek text portions will be recognized using the `porson.mlmodel`
model while Latin text will be fed into the `antiqua.mlmodel` model. It is
possible to define a fallback model that other text will be fed to:

.. code-block:: console
$ kraken -i ... ... ocr -m ... -m ... -m default:porson.clstm
$ kraken -i ... ... ocr -m ... -m ... -m default:porson.mlmodel
It is also possible to disable recognition on a particular script by mapping to
the special model keyword `ignore`. Ignored lines will still be serialized but
Expand Down
2 changes: 1 addition & 1 deletion kraken/ketos/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _validate_merging(ctx, param, value):
"""
if not value:
return None
merge_dict = {} # type: Dict[str, str]
merge_dict: Dict[str, str] = {}
try:
for m in value:
lexer = shlex.shlex(m, posix=True)
Expand Down
8 changes: 4 additions & 4 deletions kraken/kraken.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

from functools import partial
from rich.traceback import install
from typing import Dict, cast, Any, IO, Callable
from typing import Dict, cast, Any, IO, Callable, Union, List

import click

Expand Down Expand Up @@ -541,7 +541,7 @@ def _validate_mm(ctx, param, value):
"""
Maps model mappings to a dictionary.
"""
model_dict = {'ignore': []} # type: Dict[str, Union[str, List[str]]]
model_dict: Dict[str, Union[str, List[str]]] = {'ignore': []}
if len(value) == 1:
lexer = shlex.shlex(value[0], posix=True)
lexer.wordchars += r'\/.+-()=^&;,.'
Expand Down Expand Up @@ -603,7 +603,7 @@ def ocr(ctx, model, pad, reorder, base_dir, no_segmentation, text_direction):
reorder = base_dir

# first we try to find the model in the absolute path, then ~/.kraken
nm = {} # type: Dict[str, models.TorchSeqRecognizer]
nm: Dict[str, models.TorchSeqRecognizer] = {}
ign_tags = model.pop('ignore')
for k, v in model.items():
search = [v,
Expand All @@ -629,7 +629,7 @@ def ocr(ctx, model, pad, reorder, base_dir, no_segmentation, text_direction):
if 'default' in nm:
from collections import defaultdict

nn = defaultdict(lambda: nm['default']) # type: Dict[str, models.TorchSeqRecognizer]
nn: Dict[str, models.TorchSeqRecognizer] = defaultdict(lambda: nm['default'])
nn.update(nm)
nm = nn

Expand Down
4 changes: 2 additions & 2 deletions kraken/lib/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __init__(self, charset: Union[Dict[str, Sequence[int]], Sequence[str], str],
raise KrakenCodecException(f'Duplicate entry in codec definition string: {cc}')
self.c2l = {k: [v] for v, k in enumerate(sorted(charset), start=1)}
self.c_sorted = sorted(self.c2l.keys(), key=len, reverse=True)
self.l2c = {tuple(v): k for k, v in self.c2l.items()} # type: Dict[Tuple[int], str]
self.l2c: Dict[Tuple[int], str] = {tuple(v): k for k, v in self.c2l.items()}
self.l2c_single = {k[0]: v for k, v in self.l2c.items() if len(k) == 1}
self.strict = strict
if not self.is_valid:
Expand Down Expand Up @@ -116,7 +116,7 @@ def encode(self, s: str) -> IntTensor:
KrakenEncodeException: if the a subsequence is not encodable and the
codec is set to strict mode.
"""
labels = [] # type: List[int]
labels: List[int] = []
idx = 0
while idx < len(s):
encodable_suffix = False
Expand Down
20 changes: 10 additions & 10 deletions kraken/lib/dataset/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ def __init__(self,
`test` only rows with the appropriate flag set in the
file will be considered.
"""
self.alphabet = Counter() # type: Counter
self.text_transforms = [] # type: List[Callable[[str], str]]
self.alphabet: Counter = Counter()
self.text_transforms: List[Callable[[str], str]] = []
self.failed_samples = set()
self.transforms = im_transforms
self.aug = None
Expand Down Expand Up @@ -300,10 +300,10 @@ def __init__(self,
suitable for forward passes.
augmentation: Enables augmentation.
"""
self._images = [] # type: Union[List[Image], List[torch.Tensor]]
self._gt = [] # type: List[str]
self.alphabet = Counter() # type: Counter
self.text_transforms = [] # type: List[Callable[[str], str]]
self._images: Union[List[Image.Image], List[torch.Tensor]] = []
self._gt: List[str] = []
self.alphabet: Counter = Counter()
self.text_transforms: List[Callable[[str], str]] = []
self.transforms = im_transforms
self.aug = None
self.skip_empty_lines = skip_empty_lines
Expand Down Expand Up @@ -397,15 +397,15 @@ def encode(self, codec: Optional[PytorchCodec] = None) -> None:
self.codec = codec
else:
self.codec = PytorchCodec(''.join(self.alphabet.keys()))
self.training_set = [] # type: List[Tuple[Union[Image, torch.Tensor], torch.Tensor]]
self.training_set: List[Tuple[Union[Image.Image, torch.Tensor], torch.Tensor]] = []
for im, gt in zip(self._images, self._gt):
self.training_set.append((im, self.codec.encode(gt)))

def no_encode(self) -> None:
"""
Creates an unencoded dataset.
"""
self.training_set = [] # type: List[Tuple[Union[Image, torch.Tensor], str]]
self.training_set: List[Tuple[Union[Image.Image, torch.Tensor], str]] = []
for im, gt in zip(self._images, self._gt):
self.training_set.append((im, gt))

Expand Down Expand Up @@ -584,15 +584,15 @@ def encode(self, codec: Optional[PytorchCodec] = None) -> None:
self.codec = codec
else:
self.codec = PytorchCodec(''.join(self.alphabet.keys()))
self.training_set = [] # type: List[Tuple[Union[Image, torch.Tensor], torch.Tensor]]
self.training_set: List[Tuple[Union[Image.Image, torch.Tensor], torch.Tensor]] = []
for im, gt in zip(self._images, self._gt):
self.training_set.append((im, self.codec.encode(gt)))

def no_encode(self) -> None:
"""
Creates an unencoded dataset.
"""
self.training_set = [] # type: List[Tuple[Union[Image, torch.Tensor], str]]
self.training_set: List[Tuple[Union[Image.Image, torch.Tensor], str]] = []
for im, gt in zip(self._images, self._gt):
self.training_set.append((im, gt))

Expand Down
3 changes: 1 addition & 2 deletions kraken/lib/dataset/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,14 @@
from torchvision import transforms
from collections import defaultdict
from torch.utils.data import Dataset
from typing import Dict, Tuple, Sequence, Callable, Any, Union, Literal, Optional, TYPE_CHECKING
from typing import Dict, Tuple, Sequence, Callable, Any, TYPE_CHECKING

from skimage.draw import polygon

from kraken.lib.segmentation import scale_regions

if TYPE_CHECKING:
from kraken.containers import Segmentation
from kraken.lib.xml import XMLPage


__all__ = ['BaselineSet']
Expand Down
2 changes: 1 addition & 1 deletion kraken/lib/dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(self,
"""
super().__init__(None)

self._scale = (height, width) # type: Tuple[int, int]
self._scale: Tuple[int, int] = (height, width)
self._valid_norm = valid_norm
self._force_binarization = force_binarization
self._batch = batch
Expand Down
4 changes: 2 additions & 2 deletions kraken/lib/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def forward(self, *inputs, output_shape: Optional[Tuple[int, int]] = None):
outputs = []
seq_lens = None
for module in self._modules.values():
if type(inputs) == tuple:
if isinstance(inputs, tuple):
output, seq_lens = module(*inputs, output_shape=output_shape)
outputs.append(output)
else:
Expand Down Expand Up @@ -135,7 +135,7 @@ def __init__(self, input_size: int, hidden_size: int) -> None:

self.input_size = input_size
self.hidden_size = hidden_size
self._all_weights = [] # type: List[List[str]]
self._all_weights: List[List[str]] = []
gate_size = 4 * hidden_size
for direction in range(2):
w_ih = torch.nn.Parameter(torch.Tensor(gate_size, input_size))
Expand Down
126 changes: 0 additions & 126 deletions kraken/lib/lstm.py

This file was deleted.

30 changes: 15 additions & 15 deletions kraken/lib/vgsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,21 +127,21 @@ def __init__(self, spec: str) -> None:
dimension.
"""
self.spec = spec
self.named_spec = [] # type: List[str]
self.named_spec: List[str] = []
self.ops = [self.build_addition, self.build_identity, self.build_rnn,
self.build_dropout, self.build_maxpool, self.build_conv,
self.build_output, self.build_reshape, self.build_wav2vec2,
self.build_groupnorm, self.build_series,
self.build_parallel, self.build_ro]
self.codec = None # type: Optional[PytorchCodec]
self.criterion = None # type: Any
self.codec: Optional[PytorchCodec] = None
self.criterion: Any = None
self.nn = layers.MultiParamSequential()
self.user_metadata = {'accuracy': [],
'metrics': [],
'seg_type': None,
'one_channel_mode': None,
'model_type': None,
'hyper_params': {}} # type: dict[str, Any]
self.user_metadata: Dict[str, Any] = {'accuracy': [],
'metrics': [],
'seg_type': None,
'one_channel_mode': None,
'model_type': None,
'hyper_params': {}}
self._aux_layers = nn.ModuleDict()

self.idx = -1
Expand Down Expand Up @@ -304,12 +304,12 @@ def _deserialize_layers(name, layer):
if 'codec' in mlmodel.user_defined_metadata:
nn.add_codec(PytorchCodec(json.loads(mlmodel.user_defined_metadata['codec'])))

nn.user_metadata = {'accuracy': [],
'metrics': [],
'seg_type': 'bbox',
'one_channel_mode': '1',
'model_type': None,
'hyper_params': {}} # type: dict[str, str]
nn.user_metadata: Dict[str, Any] = {'accuracy': [],
'metrics': [],
'seg_type': 'bbox',
'one_channel_mode': '1',
'model_type': None,
'hyper_params': {}}

if 'kraken_meta' in mlmodel.user_defined_metadata:
nn.user_metadata.update(json.loads(mlmodel.user_defined_metadata['kraken_meta']))
Expand Down
6 changes: 3 additions & 3 deletions kraken/pageseg.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ class record(object):
"""
def __init__(self, **kw):
self.__dict__.update(kw)
self.label = 0 # type: int
self.bounds = [] # type: List
self.mask = None # type: np.ndarray
self.label: int = 0
self.bounds: List = []
self.mask: np.ndarray = None


def find(condition):
Expand Down
Loading

0 comments on commit 95981e0

Please sign in to comment.