Skip to content

Commit

Permalink
A couple of refactors
Browse files Browse the repository at this point in the history
To resolve circular dependencies and also a couple of bug fixes
  • Loading branch information
sadra-barikbin committed Dec 16, 2023
1 parent 1b596ca commit b53b463
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 63 deletions.
13 changes: 6 additions & 7 deletions kraken/lib/arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,9 @@
from typing import Optional, List, Union, Callable, Tuple, Dict
from multiprocessing import Pool
from kraken.containers import Segmentation
from kraken.lib import functional_im_transforms as F_t
from kraken.lib.segmentation import extract_polygons
from kraken.lib.xml import XMLPage
from kraken.lib.util import is_bitonal, make_printable
from kraken.lib import util
from kraken.lib.exceptions import KrakenInputException
from os import PathLike

Expand All @@ -47,7 +46,7 @@ def _extract_line(xml_record, skip_empty_lines: bool = True):
im = Image.open(xml_record.imagename)
except (FileNotFoundError, UnidentifiedImageError):
return lines, None, None
if is_bitonal(im):
if util.is_bitonal(im):
im = im.convert('1')
recs = xml_record.lines.values()
for idx, rec in enumerate(recs):
Expand Down Expand Up @@ -81,7 +80,7 @@ def _extract_path_line(xml_record, skip_empty_lines: bool = True):
return [], None, None
if not xml_record['lines'][0]['text'] and skip_empty_lines:
return [], None, None
if is_bitonal(im):
if util.is_bitonal(im):
im = im.convert('1')
fp = io.BytesIO()
im.save(fp, format='png')
Expand All @@ -91,9 +90,9 @@ def _extract_path_line(xml_record, skip_empty_lines: bool = True):

def parse_path(path: Union[str, PathLike],
suffix: str = '.gt.txt',
split=F_t.default_split,
split=util.default_split,
skip_empty_lines: bool = True):
with open(F_t.suffix_split(path, split=split, suffix=suffix), 'r', encoding='utf-8') as fp:
with open(util.suffix_split(path, split=split, suffix=suffix), 'r', encoding='utf-8') as fp:
gt = fp.read().strip('\n\r')
if not gt and skip_empty_lines:
raise KrakenInputException(f'No text for ground truth line {path}.')
Expand Down Expand Up @@ -197,7 +196,7 @@ def build_binary_dataset(files: Optional[List[Union[str, PathLike, Dict]]] = Non
callback(0, num_lines)

for k, v in sorted(alphabet.items(), key=lambda x: x[1], reverse=True):
char = make_printable(k)
char = util.make_printable(k)
if char == k:
char = '\t' + char
logger.info(f'{char}\t{v}')
Expand Down
32 changes: 15 additions & 17 deletions kraken/lib/dataset/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,11 @@
from typing import List, Tuple, Callable, Optional, Any, Union, Literal

from kraken.containers import BaselineLine, BBoxLine, Segmentation
from kraken.lib.util import is_bitonal
from kraken.lib import util
from kraken.lib.codec import PytorchCodec
from kraken.lib.segmentation import extract_polygons
from kraken.lib.exceptions import KrakenInputException, KrakenEncodeException

from kraken.lib import functional_im_transforms as F_t

__all__ = ['DefaultAugmenter',
'ArrowIPCRecognitionDataset',
'PolygonGTDataset',
Expand Down Expand Up @@ -124,14 +122,14 @@ def __init__(self,
self.seg_type = None
# built text transformations
if normalization:
self.text_transforms.append(partial(F_t.text_normalize, normalization=normalization))
self.text_transforms.append(partial(util.text_normalize, normalization=normalization))
if whitespace_normalization:
self.text_transforms.append(F_t.text_whitespace_normalize)
self.text_transforms.append(util.text_whitespace_normalize)
if reorder:
if reorder in ('L', 'R'):
self.text_transforms.append(partial(F_t.text_reorder, base_dir=reorder))
self.text_transforms.append(partial(util.text_reorder, base_dir=reorder))
else:
self.text_transforms.append(F_t.text_reorder)
self.text_transforms.append(util.text_reorder)
if augmentation:
self.aug = DefaultAugmenter()

Expand Down Expand Up @@ -310,14 +308,14 @@ def __init__(self,
self.seg_type = 'baselines'
# built text transformations
if normalization:
self.text_transforms.append(partial(F_t.text_normalize, normalization=normalization))
self.text_transforms.append(partial(util.text_normalize, normalization=normalization))
if whitespace_normalization:
self.text_transforms.append(F_t.text_whitespace_normalize)
self.text_transforms.append(util.text_whitespace_normalize)
if reorder:
if reorder in ('L', 'R'):
self.text_transforms.append(partial(F_t.text_reorder, base_dir=reorder))
self.text_transforms.append(partial(util.text_reorder, base_dir=reorder))
else:
self.text_transforms.append(F_t.text_reorder)
self.text_transforms.append(util.text_reorder)
if augmentation:
self.aug = DefaultAugmenter()

Expand Down Expand Up @@ -430,7 +428,7 @@ def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
im_mode = 'RGB'
elif im.shape[0] == 1:
im_mode = 'L'
if is_bitonal(im):
if util.is_bitonal(im):
im_mode = '1'

if im_mode > self.im_mode:
Expand Down Expand Up @@ -499,14 +497,14 @@ def __init__(self,
self.seg_type = 'bbox'
# built text transformations
if normalization:
self.text_transforms.append(partial(F_t.text_normalize, normalization=normalization))
self.text_transforms.append(partial(util.text_normalize, normalization=normalization))
if whitespace_normalization:
self.text_transforms.append(F_t.text_whitespace_normalize)
self.text_transforms.append(util.text_whitespace_normalize)
if reorder:
if reorder in ('L', 'R'):
self.text_transforms.append(partial(F_t.text_reorder, base_dir=reorder))
self.text_transforms.append(partial(util.text_reorder, base_dir=reorder))
else:
self.text_transforms.append(F_t.text_reorder)
self.text_transforms.append(util.text_reorder)
if augmentation:
self.aug = DefaultAugmenter()

Expand Down Expand Up @@ -608,7 +606,7 @@ def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
im_mode = 'RGB'
elif im.shape[0] == 1:
im_mode = 'L'
if is_bitonal(im):
if util.is_bitonal(im):
im_mode = '1'
if im_mode > self.im_mode:
logger.info(f'Upgrading "im_mode" from {self.im_mode} to {im_mode}')
Expand Down
30 changes: 1 addition & 29 deletions kraken/lib/functional_im_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,11 @@
facilitate pickling.
"""
import torch
import regex
import unicodedata
import bidi.algorithm as bd

from os import PathLike
from pathlib import Path
from PIL import Image
from PIL.Image import Resampling

from typing import Tuple, Optional, Callable, Any, Union
from typing import Tuple, Any

from kraken.binarization import nlbin
from kraken.lib.lineest import dewarp, CenterNormalizer
Expand Down Expand Up @@ -77,26 +72,3 @@ def _fixed_resize(img: Image.Image, size: Tuple[int, int], interpolation: int =
ow = int(w * oh/h)
img = img.resize((ow, oh), interpolation)
return img


def text_normalize(text: str, normalization: str) -> str:
return unicodedata.normalize(normalization, text)


def text_whitespace_normalize(text: str) -> str:
return regex.sub(r'\s', ' ', text).strip()


def text_reorder(text: str, base_dir: Optional[str] = None) -> str:
return bd.get_display(text, base_dir=base_dir)


def default_split(x: Union[PathLike, str]) -> str:
x = Path(x)
while x.suffixes:
x = x.with_suffix('')
return str(x)


def suffix_split(x: Union[PathLike, str], split: Callable[[Union[PathLike, str]], str], suffix: str) -> str:
return split(x) + suffix
8 changes: 4 additions & 4 deletions kraken/lib/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@
from skimage.morphology import skeletonize
from skimage.transform import PiecewiseAffineTransform, SimilarityTransform, AffineTransform, warp

from typing import List, Tuple, Union, Dict, Sequence, Optional, Literal

from kraken.containers import Segmentation
from typing import List, Tuple, Union, Dict, Sequence, Optional, Literal, TYPE_CHECKING

from kraken.lib import default_specs
from kraken.lib.vgsl import TorchVGSLModel
from kraken.lib.exceptions import KrakenInputException

if TYPE_CHECKING:
from kraken.containers import Segmentation

logger = logging.getLogger('kraken')

Expand Down Expand Up @@ -1032,7 +1032,7 @@ def compute_polygon_section(baseline: Sequence[Tuple[int, int]],
return tuple(o)


def extract_polygons(im: Image.Image, bounds: Segmentation) -> Image.Image:
def extract_polygons(im: Image.Image, bounds: "Segmentation") -> Image.Image:
"""
Yields the subimages of image im defined in the list of bounding polygons
with baselines preserving order.
Expand Down
34 changes: 30 additions & 4 deletions kraken/lib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@
import torch
import unicodedata
import numpy as np
import bidi.algorithm as bd
import regex

from os import PathLike
from pathlib import Path
from PIL import Image
from os import PathLike

from typing import Union, Callable, Optional, Literal

from kraken.lib import functional_im_transforms as F_t
from kraken.containers import BBoxLine
from kraken.exceptions import KrakenInputException
from kraken.lib.exceptions import KrakenInputException

__all__ = ['pil2array', 'array2pil', 'is_bitonal', 'make_printable', 'get_im_str', 'parse_gt_path']

Expand Down Expand Up @@ -62,6 +65,29 @@ def get_im_str(im: Image.Image) -> str:
return im.filename if hasattr(im, 'filename') else str(im)


def text_normalize(text: str, normalization: str) -> str:
return unicodedata.normalize(normalization, text)


def text_whitespace_normalize(text: str) -> str:
return regex.sub(r'\s', ' ', text).strip()


def text_reorder(text: str, base_dir: Optional[str] = None) -> str:
return bd.get_display(text, base_dir=base_dir)


def default_split(x: Union[PathLike, str]) -> str:
x = Path(x)
while x.suffixes:
x = x.with_suffix('')
return str(x)


def suffix_split(x: Union[PathLike, str], split: Callable[[Union[PathLike, str]], str], suffix: str) -> str:
return split(x) + suffix


def is_printable(char: str) -> bool:
"""
Determines if a chode point is printable/visible when printed.
Expand Down Expand Up @@ -103,7 +129,7 @@ def make_printable(char: str) -> str:

def parse_gt_path(path: Union[str, PathLike],
suffix: str = '.gt.txt',
split: Callable[[Union[PathLike, str]], str] = F_t.default_split,
split: Callable[[Union[PathLike, str]], str] = default_split,
skip_empty_lines: bool = True,
base_dir: Optional[Literal['L', 'R']] = None,
text_direction: Literal['horizontal-lr', 'horizontal-rl', 'vertical-lr', 'vertical-rl'] = 'horizontal-lr') -> BBoxLine:
Expand All @@ -128,7 +154,7 @@ def parse_gt_path(path: Union[str, PathLike],

gt = ''
try:
with open(F_t.suffix_split(path, split=split, suffix=suffix), 'r', encoding='utf-8') as fp:
with open(suffix_split(path, split=split, suffix=suffix), 'r', encoding='utf-8') as fp:
gt = fp.read().strip('\n\r')
except FileNotFoundError:
if not skip_empty_lines:
Expand Down
4 changes: 2 additions & 2 deletions kraken/templates/alto
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@
<OtherTag DESCRIPTION="region type" ID="TYPE_{{ loop.index }}" TYPE={{ type }} LABEL="{{ label }}"/>
{% endfor %}
</Tags>
{% if len(page.line_orders) > 0 %}
{% if page.line_orders | length > 0 %}
<ReadingOrder>
{% if len(page.line_orders) == 1 %}
{% if page.line_orders | length == 1 %}
<OrderedGroup ID="ro_0">
{% for id in page.line_orders[0] %}
<ElementRef ID="o_{{ loop.index }}" REF="{{ id }}"/>
Expand Down

0 comments on commit b53b463

Please sign in to comment.