From 7c2f7c36e339a1be6e318998fc9f2a214c8a378b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thibault=20Cl=C3=A9rice?= Date: Sat, 25 May 2019 07:59:54 +0200 Subject: [PATCH] Annotation is better --- annotate_text.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/annotate_text.py b/annotate_text.py index c1569d9..24c2cdb 100644 --- a/annotate_text.py +++ b/annotate_text.py @@ -19,7 +19,7 @@ qui estoit senescaus de la tiere ,%.%. Robiers ses freres ,%.%. Gautiers de voignori ,%.%. Gautiers de Mombelyart ,%.%. Eustasces d'escouflans ,%.%. Guis dou plaissie %,%. et ses freres ,%% Henris D'ardillieres ,%.%. Ogiers de saint chienon ,%.%.""".replace( - "%", "").replace("\n", " ").replace(" ", "") + "%", "").replace("\n", " ") print(input_text) @@ -30,4 +30,4 @@ logger.setLevel(logging.DEBUG) tokenizer = Seq2SeqTokenizer.load("/home/thibault/dev/boudams/models/linear-conv2019-05-24--14:08:58-0.0001.tar", device="cpu") -print("".join(tokenizer.annotate_text(input_text))) \ No newline at end of file +print(" ".join(tokenizer.annotate_text(input_text.replace(" ", "")))) \ No newline at end of file