From 5af9c8c6c13fd7bf16327ba40ccaa6d156c7e038 Mon Sep 17 00:00:00 2001 From: Benjamin Kiessling Date: Tue, 26 Mar 2024 11:48:24 +0100 Subject: [PATCH] Cleanup of PR #555 merge --- kraken/lib/dataset/recognition.py | 2 +- kraken/lib/dataset/segmentation.py | 1 - kraken/lib/pretrain/model.py | 7 ++++--- kraken/lib/pretrain/util.py | 2 +- kraken/lib/segmentation.py | 7 ++++--- kraken/lib/train.py | 7 ++++--- kraken/lib/vgsl.py | 6 +++--- tests/test_newpolygons.py | 6 +++--- 8 files changed, 20 insertions(+), 18 deletions(-) diff --git a/kraken/lib/dataset/recognition.py b/kraken/lib/dataset/recognition.py index fde975cc5..5f8744724 100644 --- a/kraken/lib/dataset/recognition.py +++ b/kraken/lib/dataset/recognition.py @@ -292,7 +292,7 @@ def __init__(self, reorder: Union[bool, Literal['L', 'R']] = True, im_transforms: Callable[[Any], torch.Tensor] = transforms.Compose([]), augmentation: bool = False, - legacy_polygons: bool=False) -> None: + legacy_polygons: bool = False) -> None: """ Creates a dataset for a polygonal (baseline) transcription model. diff --git a/kraken/lib/dataset/segmentation.py b/kraken/lib/dataset/segmentation.py index b52fe7ddc..a9f962226 100644 --- a/kraken/lib/dataset/segmentation.py +++ b/kraken/lib/dataset/segmentation.py @@ -165,7 +165,6 @@ def __getitem__(self, idx): im, target = self.transform(im, target) return {'image': im, 'target': target} except Exception: - raise self.failed_samples.add(idx) idx = np.random.randint(0, len(self.imgs)) logger.debug(traceback.format_exc()) diff --git a/kraken/lib/pretrain/model.py b/kraken/lib/pretrain/model.py index 68626cf49..d86d2a7c1 100644 --- a/kraken/lib/pretrain/model.py +++ b/kraken/lib/pretrain/model.py @@ -212,9 +212,10 @@ def __init__(self, if format_type == 'binary': legacy_train_status = train_set.legacy_polygons_status if val_set and val_set.legacy_polygons_status != legacy_train_status: - logger.warning( - f'Train and validation set have different legacy polygon status: {legacy_train_status} and {val_set.legacy_polygons_status}.' - 'Train set status prevails.') + logger.warning('Train and validation set have different legacy ' + f'polygon status: {legacy_train_status} and ' + f'{val_set.legacy_polygons_status}. Train set ' + 'status prevails.') if legacy_train_status == "mixed": logger.warning('Mixed legacy polygon status in training dataset. Consider recompilation.') legacy_train_status = False diff --git a/kraken/lib/pretrain/util.py b/kraken/lib/pretrain/util.py index 7bf3760a8..ce29c4b00 100644 --- a/kraken/lib/pretrain/util.py +++ b/kraken/lib/pretrain/util.py @@ -139,7 +139,7 @@ def arrange(s, e, length, keep_length): for length in sorted(lengths, reverse=True): lens = np.fromiter( (e - s if e - s >= length + mask_min_space else 0 for s, e in parts), - np.int, + int, ) l_sum = np.sum(lens) if l_sum == 0: diff --git a/kraken/lib/segmentation.py b/kraken/lib/segmentation.py index 9fad17a60..3ab6008a1 100644 --- a/kraken/lib/segmentation.py +++ b/kraken/lib/segmentation.py @@ -25,6 +25,7 @@ import torch import torch.nn.functional as F from PIL import Image, ImageDraw +from PIL.Image import Resampling, Transform from scipy.ndimage import (binary_erosion, distance_transform_cdt, gaussian_filter, maximum_filter, affine_transform) from scipy.signal import convolve2d @@ -419,8 +420,8 @@ def _rotate(image: _T_pil_or_np, if isinstance(image, Image.Image): # PIL is much faster than scipy pdata = tform.params.flatten().tolist()[:6] - resample = {0: Image.NEAREST, 1: Image.BILINEAR, 2: Image.BICUBIC, 3: Image.BICUBIC}.get(order, Image.NEAREST) - return tform, image.transform(output_shape[::-1], Image.AFFINE, data=pdata, resample=resample, fillcolor=cval) + resample = {0: Resampling.NEAREST, 1: Resampling.BILINEAR, 2: Resampling.BICUBIC, 3: Resampling.BICUBIC}.get(order, Resampling.NEAREST) + return tform, image.transform(output_shape[::-1], Transform.AFFINE, data=pdata, resample=resample, fillcolor=cval) # params for scipy # swap X and Y axis for scipy @@ -1332,7 +1333,7 @@ def extract_polygons(im: Image.Image, for i in range(0, len(source_envelope)-3, 2) ] # warp - resample = {0: Image.NEAREST, 1: Image.BILINEAR, 2: Image.BICUBIC, 3: Image.BICUBIC}.get(order, Image.NEAREST) + resample = {0: Resampling.NEAREST, 1: Resampling.BILINEAR, 2: Resampling.BICUBIC, 3: Resampling.BICUBIC}.get(order, Resampling.NEAREST) i = patch.transform((output_shape[1], output_shape[0]), Image.MESH, data=deform_mesh, resample=resample) yield i.crop(i.getbbox()), line diff --git a/kraken/lib/train.py b/kraken/lib/train.py index da1f4ff79..fb54d5791 100644 --- a/kraken/lib/train.py +++ b/kraken/lib/train.py @@ -406,9 +406,10 @@ def __init__(self, if format_type == 'binary': legacy_train_status = train_set.legacy_polygons_status if val_set and val_set.legacy_polygons_status != legacy_train_status: - logger.warning( - f'Train and validation set have different legacy polygon status: {legacy_train_status} and {val_set.legacy_polygons_status}.' - 'Train set status prevails.') + logger.warning('Train and validation set have different legacy ' + f'polygon status: {legacy_train_status} and ' + f'{val_set.legacy_polygons_status}. Train set ' + 'status prevails.') if legacy_train_status == "mixed": logger.warning('Mixed legacy polygon status in training dataset. Consider recompilation.') legacy_train_status = False diff --git a/kraken/lib/vgsl.py b/kraken/lib/vgsl.py index 93a7d0cb3..9a4f0759b 100644 --- a/kraken/lib/vgsl.py +++ b/kraken/lib/vgsl.py @@ -141,7 +141,7 @@ def __init__(self, spec: str) -> None: 'one_channel_mode': None, 'model_type': None, 'hyper_params': {}, - 'legacy_polygons': False} # enable new polygons by default on new models + 'legacy_polygons': False} # enable new polygons by default on new models self._aux_layers = nn.ModuleDict() self.idx = -1 @@ -313,7 +313,7 @@ def _deserialize_layers(name, layer): 'one_channel_mode': '1', 'model_type': None, 'hyper_params': {}, - 'legacy_polygons': True} # disable new polygons by default on load + 'legacy_polygons': True} # disable new polygons by default on load if 'kraken_meta' in mlmodel.user_defined_metadata: nn.user_metadata.update(json.loads(mlmodel.user_defined_metadata['kraken_meta'])) @@ -368,7 +368,7 @@ def aux_layers(self, val: Dict[str, torch.nn.Module]): @property def use_legacy_polygons(self): return self.user_metadata.get('legacy_polygons', True) - + @use_legacy_polygons.setter def use_legacy_polygons(self, val: bool): self.user_metadata['legacy_polygons'] = val diff --git a/tests/test_newpolygons.py b/tests/test_newpolygons.py index 0c05256c5..18d7faacd 100644 --- a/tests/test_newpolygons.py +++ b/tests/test_newpolygons.py @@ -5,7 +5,7 @@ import tempfile from unittest.mock import Mock, patch from pathlib import Path -from traceback import print_exception +from traceback import print_exc import warnings from typing import Optional, List, Union @@ -89,7 +89,7 @@ def _test_krakencli(self, extractor_mock: Mock, *, args, force_no_legacy: bool=F print("kraken", *args) if result.exception: - print_exception(result.exception) + print_exc() self.assertEqual(result.exit_code, 0) extractor_mock.assert_called() @@ -106,7 +106,7 @@ def _test_ketoscli(self, *, args, expect_legacy: bool, check_exit_code: Optional print("ketos", *args) if result.exception: print(result.output) - print_exception(result.exception) + print_exc() if check_exit_code is not None: if isinstance(check_exit_code, int):