Skip to content

Commit

Permalink
Merge branch 'mittagessen:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
sadra-barikbin authored Dec 12, 2023
2 parents 0cb6244 + 1b596ca commit 36e348a
Show file tree
Hide file tree
Showing 29 changed files with 185 additions and 141 deletions.
4 changes: 2 additions & 2 deletions docs/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ option type
\--high INTEGER RANGE
============ ====

To binarize a image:
To binarize an image:

.. code-block:: console
Expand Down Expand Up @@ -303,7 +303,7 @@ to the right (inverse for right-to-left scripts like Arabic which start on the
top right-most columns, continuing leftward, and returning to the right-most
column just below when none remain).

In multi-script documents the order of is determined by the primary writing
In multi-script documents the order is determined by the primary writing
system employed in the document, e.g. for a modern book containing both Latin
and Arabic script text it would be set to `lr` when Latin is primary, e.g. when
the binding is on the left side of the book seen from the title cover, and
Expand Down
9 changes: 3 additions & 6 deletions kraken/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,16 @@
import torch
import logging
import dataclasses
import numpy as np

from PIL import Image
from bidi.algorithm import get_display

from dataclasses import dataclass
from typing import List, Dict, Any, Optional, Literal
from typing import Optional, Literal

from kraken import rpred
from kraken.containers import Segmentation, BaselineOCRRecord
from kraken.lib.codec import PytorchCodec
from kraken.lib.xml import XMLPage
from kraken.lib.models import TorchSeqRecognizer
from kraken.lib.exceptions import KrakenInputException, KrakenEncodeException
from kraken.lib.segmentation import compute_polygon_section

logger = logging.getLogger('kraken')

Expand Down Expand Up @@ -95,6 +90,8 @@ def forced_align(doc: Segmentation, model: TorchSeqRecognizer, base_dir: Optiona
at:
https://github.com/pytorch/audio/blob/main/examples/tutorials/forced_alignment_tutorial.py
"""


@dataclass
class Point:
token_index: int
Expand Down
5 changes: 2 additions & 3 deletions kraken/blla.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import torch.nn.functional as F
import torchvision.transforms as tf

from typing import Optional, Dict, Callable, Union, List, Any, Tuple, Literal
from typing import Optional, Dict, Callable, Union, List, Any, Literal

from scipy.ndimage import gaussian_filter
from skimage.filters import sobel
Expand Down Expand Up @@ -390,7 +390,6 @@ def segment(im: PIL.Image.Image,

# create objects and assign IDs
blls = []
reg_idx = 0
_shp_regs = {}
for reg_type, rgs in regions.items():
for reg in rgs:
Expand All @@ -415,4 +414,4 @@ def segment(im: PIL.Image.Image,
lines=blls,
regions=regions,
script_detection=script_detection,
line_orders=[order])
line_orders=[order] if order else [])
39 changes: 28 additions & 11 deletions kraken/containers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@

import PIL.Image
#
# Copyright 2023 Benjamin Kiessling
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
"""
kraken.containers
~~~~~~~~~~~~~~~~~
Container classes replacing the old dictionaries returned by kraken's
functional blocks.
"""
import numpy as np
import bidi.algorithm as bd

Expand Down Expand Up @@ -161,7 +180,7 @@ class Segmentation:
script_detection: bool
lines: List[Union[BaselineLine, BBoxLine]]
regions: Dict[str, List[Region]]
line_orders: Optional[List[List[int]]] = None
line_orders: List[List[int]]

def __post_init__(self):
if not self.regions:
Expand All @@ -187,9 +206,9 @@ class ocr_record(ABC):
def __init__(self,
prediction: str,
cuts: List[Union[Tuple[int, int], Tuple[Tuple[int, int],
Tuple[int, int],
Tuple[int, int],
Tuple[int, int]]]],
Tuple[int, int],
Tuple[int, int],
Tuple[int, int]]]],
confidences: List[float],
display_order: bool = True) -> None:
self._prediction = prediction
Expand Down Expand Up @@ -463,9 +482,9 @@ class BBoxOCRRecord(ocr_record, BBoxLine):
def __init__(self,
prediction: str,
cuts: List[Tuple[Tuple[int, int],
Tuple[int, int],
Tuple[int, int],
Tuple[int, int]]],
Tuple[int, int],
Tuple[int, int],
Tuple[int, int]]],
confidences: List[float],
line: Union[BBoxLine, Dict[str, Any]],
base_dir: Optional[Literal['L', 'R']] = None,
Expand Down Expand Up @@ -593,5 +612,3 @@ def _reorder(self, base_dir: Optional[Literal['L', 'R']] = None) -> 'BBoxOCRReco
base_dir=base_dir,
display_order=not self._display_order)
return rec


7 changes: 3 additions & 4 deletions kraken/ketos/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def test(ctx, batch_size, model, evaluation_files, device, pad, workers,
from torch.utils.data import DataLoader

from kraken.serialization import render_report
from kraken.lib import models
from kraken.lib import models, util
from kraken.lib.xml import XMLPage
from kraken.lib.dataset import (global_align, compute_confusions,
PolygonGTDataset, GroundTruthDataset,
Expand All @@ -419,7 +419,6 @@ def test(ctx, batch_size, model, evaluation_files, device, pad, workers,

test_set = list(test_set)


if evaluation_files:
test_set.extend(evaluation_files)

Expand All @@ -445,7 +444,7 @@ def test(ctx, batch_size, model, evaluation_files, device, pad, workers,
force_binarization = False
if repolygonize:
logger.warning('Repolygonization enabled in `path` mode. Will be ignored.')
test_set = [{'image': img} for img in test_set]
test_set = [{'line': util.parse_gt_path(img)} for img in test_set]
valid_norm = True

if len(test_set) == 0:
Expand Down Expand Up @@ -480,7 +479,7 @@ def test(ctx, batch_size, model, evaluation_files, device, pad, workers,
ds_loader = DataLoader(ds,
batch_size=batch_size,
num_workers=workers,
pin_memory=True,
pin_memory=pin_ds_mem,
collate_fn=collate_sequences)

with KrakenProgressBar() as progress:
Expand Down
11 changes: 5 additions & 6 deletions kraken/ketos/ro.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
import logging

from PIL import Image
from typing import Dict

from kraken.lib.exceptions import KrakenInputException
from kraken.lib.default_specs import READING_ORDER_HYPER_PARAMS

from kraken.ketos.util import _validate_manifests, _expand_gt, message, to_ptl_device
Expand All @@ -36,6 +34,7 @@
# raise default max image size to 20k * 20k pixels
Image.MAX_IMAGE_PIXELS = 20000 ** 2


@click.command('rotrain')
@click.pass_context
@click.option('-B', '--batch-size', show_default=True, type=click.INT,
Expand Down Expand Up @@ -156,14 +155,13 @@ def rotrain(ctx, batch_size, output, load, freq, quit, epochs, min_epochs, lag,

from kraken.lib.ro import ROModel
from kraken.lib.train import KrakenTrainer
from kraken.lib.progress import KrakenProgressBar

if not (0 <= freq <= 1) and freq % 1.0 != 0:
raise click.BadOptionUsage('freq', 'freq needs to be either in the interval [0,1.0] or a positive integer.')

if pl_logger == 'tensorboard':
try:
import tensorboard
import tensorboard # NOQA
except ImportError:
raise click.BadOptionUsage('logger', 'tensorboard logger needs the `tensorboard` package installed.')

Expand Down Expand Up @@ -191,7 +189,9 @@ def rotrain(ctx, batch_size, output, load, freq, quit, epochs, min_epochs, lag,
'step_size': step_size,
'rop_patience': sched_patience,
'cos_t_max': cos_max,
'pl_logger': pl_logger,})
'pl_logger': pl_logger,
}
)

# disable automatic partition when given evaluation set explicitly
if evaluation_files:
Expand Down Expand Up @@ -281,7 +281,6 @@ def roadd(ctx, output, ro_model, seg_model):
"""
from kraken.lib import vgsl
from kraken.lib.ro import ROModel
from kraken.lib.train import KrakenTrainer

message(f'Adding {ro_model} reading order model to {seg_model}.')
ro_net = ROModel.load_from_checkpoint(ro_model)
Expand Down
4 changes: 3 additions & 1 deletion kraken/ketos/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

from PIL import Image

from typing import Dict

from kraken.lib.exceptions import KrakenInputException
from kraken.lib.default_specs import SEGMENTATION_HYPER_PARAMS, SEGMENTATION_SPEC

Expand Down Expand Up @@ -232,7 +234,6 @@ def segtrain(ctx, output, spec, line_width, pad, load, freq, quit, epochs,
from threadpoolctl import threadpool_limits

from kraken.lib.train import SegmentationModel, KrakenTrainer
from kraken.lib.progress import KrakenProgressBar

if resize != 'fail' and not load:
raise click.BadOptionUsage('resize', 'resize option requires loading an existing model')
Expand Down Expand Up @@ -431,6 +432,7 @@ def segtest(ctx, model, evaluation_files, device, workers, threads, threshold,
import torch
import torch.nn.functional as F

from kraken.lib.progress import KrakenProgressBar
from kraken.lib.train import BaselineSet, ImageInputTransforms
from kraken.lib.vgsl import TorchVGSLModel

Expand Down
2 changes: 1 addition & 1 deletion kraken/lib/arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from kraken.lib.xml import XMLPage
from kraken.lib.util import is_bitonal, make_printable
from kraken.lib.exceptions import KrakenInputException
from os import extsep, PathLike
from os import PathLike

import logging

Expand Down
10 changes: 5 additions & 5 deletions kraken/lib/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,12 @@ def encode(self, s: str) -> IntTensor:
idx += len(code)
encodable_suffix = True
break

if not encodable_suffix and s[idx] in self.c2l:
labels.extend(self.c2l[s[idx]])
idx += 1
encodable_suffix = True

if not encodable_suffix:
if self.strict:
raise KrakenEncodeException(f'Non-encodable sequence {s[idx:idx+5]}... encountered.')
Expand Down Expand Up @@ -169,9 +169,9 @@ def decode(self, labels: Sequence[Tuple[int, int, int, float]]) -> List[Tuple[st
if int(labels[idx]) in self.l2c_single:
code = self.l2c_single[int(labels[idx])]
decoded.extend([(c, s, e, u) for c, s, e, u in zip(code,
len(code) * [start[idx]],
len(code) * [end[idx]],
len(code) * [con[idx]])])
len(code) * [start[idx]],
len(code) * [end[idx]],
len(code) * [con[idx]])])
idx += 1
decodable_suffix = True
else:
Expand Down
8 changes: 4 additions & 4 deletions kraken/lib/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"""
Top-level module containing datasets for recognition and segmentation training.
"""
from .recognition import ArrowIPCRecognitionDataset, PolygonGTDataset, GroundTruthDataset # NOQA
from .segmentation import BaselineSet # NOQA
from .ro import PairWiseROSet, PageWiseROSet #NOQA
from .utils import ImageInputTransforms, collate_sequences, global_align, compute_confusions # NOQA
from .recognition import ArrowIPCRecognitionDataset, PolygonGTDataset, GroundTruthDataset # NOQA
from .segmentation import BaselineSet # NOQA
from .ro import PairWiseROSet, PageWiseROSet # NOQA
from .utils import ImageInputTransforms, collate_sequences, global_align, compute_confusions # NOQA
23 changes: 17 additions & 6 deletions kraken/lib/dataset/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import json
import torch
import traceback
import dataclasses
import numpy as np
import pyarrow as pa

Expand All @@ -28,7 +29,7 @@
from torchvision import transforms
from collections import Counter
from torch.utils.data import Dataset
from typing import Dict, List, Tuple, Callable, Optional, Any, Union, Literal
from typing import List, Tuple, Callable, Optional, Any, Union, Literal

from kraken.containers import BaselineLine, BBoxLine, Segmentation
from kraken.lib.util import is_bitonal
Expand Down Expand Up @@ -76,6 +77,7 @@ def __init__(self):
def __call__(self, image):
return self._transforms(image=image)


class ArrowIPCRecognitionDataset(Dataset):
"""
Dataset for training a recognition model from a precompiled dataset in
Expand Down Expand Up @@ -181,7 +183,7 @@ def add(self, file: Union[str, PathLike]) -> None:
mask = np.ones(len(ds_table), dtype=bool)
for index in range(len(ds_table)):
try:
text = self._apply_text_transform(ds_table.column('lines')[index].as_py(),)
self._apply_text_transform(ds_table.column('lines')[index].as_py(),)
except KrakenInputException:
mask[index] = False
continue
Expand Down Expand Up @@ -335,7 +337,7 @@ def add(self,
self.add_line(line)
if page:
self.add_page(page)
if not (line and page):
if not (line or page):
raise ValueError('Neither line nor page data provided in dataset builder')

def add_page(self, page: Segmentation):
Expand Down Expand Up @@ -379,7 +381,7 @@ def add_line(self, line: BaselineLine):
if not line.boundary:
raise ValueError('No boundary given for line')

self._images.append((line.image, line.baseline, line.boundary))
self._images.append((line.imagename, line.baseline, line.boundary))
self._gt.append(text)
self.alphabet.update(text)

Expand Down Expand Up @@ -412,8 +414,17 @@ def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
im = item[0][0]
if not isinstance(im, Image.Image):
im = Image.open(im)
im, _ = next(extract_polygons(im, {'type': 'baselines',
'lines': [{'baseline': item[0][1], 'boundary': item[0][2]}]}))
im, _ = next(extract_polygons(im,
Segmentation(type='baselines',
imagename=item[0][0],
text_direction='horizontal-lr',
lines=[BaselineLine('id_0',
baseline=item[0][1],
boundary=item[0][2])],
script_detection=True,
regions={},
line_orders=[])
))
im = self.transforms(im)
if im.shape[0] == 3:
im_mode = 'RGB'
Expand Down
Loading

0 comments on commit 36e348a

Please sign in to comment.