From 963fd95e6912175c216f61520627c56859fdf9f5 Mon Sep 17 00:00:00 2001 From: Rohit Jain Date: Wed, 14 Jul 2021 00:28:42 +0530 Subject: [PATCH 1/2] Enable toggling Case Encoding flag from C++ Train API --- src/sentencepiece_trainer.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc index 80a86708..19166de4 100644 --- a/src/sentencepiece_trainer.cc +++ b/src/sentencepiece_trainer.cc @@ -153,6 +153,12 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs( CHECK_OR_RETURN(absl::SimpleAtoi(value, &v)); absl::SetFlag(&FLAGS_minloglevel, v); continue; + } else if(key == "encode_unicode_case") { + bool b; + std::istringstream("true") >> std::boolalpha >> b; + normalizer_spec->set_encode_case(b); + denormalizer_spec->set_decode_case(b); + continue; } const auto status_train = SetProtoField(key, value, trainer_spec); From f0da7ec75cd743f5072b0b85f5f6002af81fa305 Mon Sep 17 00:00:00 2001 From: Rohit Jain Date: Wed, 14 Jul 2021 12:51:27 +0530 Subject: [PATCH 2/2] Fixing issue with hardcoding truth value of encode_decode_case flag --- src/sentencepiece_trainer.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc index 19166de4..25bf92fe 100644 --- a/src/sentencepiece_trainer.cc +++ b/src/sentencepiece_trainer.cc @@ -154,10 +154,10 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs( absl::SetFlag(&FLAGS_minloglevel, v); continue; } else if(key == "encode_unicode_case") { - bool b; - std::istringstream("true") >> std::boolalpha >> b; - normalizer_spec->set_encode_case(b); - denormalizer_spec->set_decode_case(b); + bool encode_unicode_case; + std::istringstream(value) >> std::boolalpha >> encode_unicode_case; + normalizer_spec->set_encode_case(encode_unicode_case); + denormalizer_spec->set_decode_case(encode_unicode_case); continue; }