Skip to content

Commit

Permalink
Merge pull request #570 from PonteIneptique/patch-6
Browse files Browse the repository at this point in the history
Add support fox --fixed-splits on ketos test
  • Loading branch information
mittagessen authored Jan 30, 2024
2 parents 95981e0 + b59bff3 commit b71cee0
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions kraken/ketos/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,12 @@ def train(ctx, batch_size, pad, output, spec, append, load, freq, quit, epochs,
'sharing a prefix up to the last extension with JSON `.path` files '
'containing the baseline information. In `binary` mode files are '
'collections of pre-extracted text line images.')
@click.option('--fixed-splits/--ignore-fixed-split', show_default=True, default=False,
help='Whether to honor fixed splits in binary datasets.')
@click.argument('test_set', nargs=-1, callback=_expand_gt, type=click.Path(exists=False, dir_okay=False))
def test(ctx, batch_size, model, evaluation_files, device, pad, workers,
threads, reorder, base_dir, normalization, normalize_whitespace,
repolygonize, force_binarization, format_type, test_set):
repolygonize, force_binarization, format_type, fixed_splits, test_set):
"""
Evaluate on a test set.
"""
Expand Down Expand Up @@ -425,6 +427,14 @@ def test(ctx, batch_size, model, evaluation_files, device, pad, workers,
if len(test_set) == 0:
raise click.UsageError('No evaluation data was provided to the test command. Use `-e` or the `test_set` argument.')

dataset_kwargs = {}
if fixed_splits:
if format_type != "binary":
logger.warning("--fixed-splits can only be use with data using binary format")
else:
dataset_kwargs["split_filter"] = "test"


if format_type in ['xml', 'page', 'alto']:
if repolygonize:
message('Repolygonizing data')
Expand Down Expand Up @@ -468,7 +478,8 @@ def test(ctx, batch_size, model, evaluation_files, device, pad, workers,
ds = DatasetClass(normalization=normalization,
whitespace_normalization=normalize_whitespace,
reorder=reorder,
im_transforms=ts)
im_transforms=ts,
**dataset_kwargs)
for line in test_set:
try:
ds.add(**line)
Expand Down

0 comments on commit b71cee0

Please sign in to comment.