Skip to content

Commit

Permalink
Merge branch 'fix/mirrored_glyph_codec'
Browse files Browse the repository at this point in the history
  • Loading branch information
mittagessen committed Jun 19, 2024
2 parents 803553f + d483227 commit c36af21
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions kraken/lib/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,8 +457,10 @@ def _build_dataset(self,
dataset.add(**sample)
except KrakenInputException as e:
logger.warning(str(e))
if self.format_type == 'binary' and self.hparams.hyper_params['normalization']:
logger.debug('Rebuilding dataset using unicode normalization')
if self.format_type == 'binary' and (self.hparams.hyper_params['normalization'] or
self.hparams.hyper_params['normalize_whitespace'] or
self.reorder):
logger.debug('Text transformations modifying alphabet selected. Rebuilding alphabet')
dataset.rebuild_alphabet()

return dataset
Expand Down

0 comments on commit c36af21

Please sign in to comment.