From 54596f455016d22fa34b4e7dba8d3cfea053f3d9 Mon Sep 17 00:00:00 2001 From: Pieter De Schepper Date: Thu, 16 Jan 2020 12:53:54 +0100 Subject: [PATCH 1/3] make sure a custom seperator can also be used instead of wrapping in tags --- budou/budou.py | 14 +++++++++++--- budou/chunk.py | 6 ++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/budou/budou.py b/budou/budou.py index f90caaa..1ea14a5 100644 --- a/budou/budou.py +++ b/budou/budou.py @@ -17,7 +17,7 @@ """Budou: an automatic organizer tool for beautiful line breaking in CJK Usage: - budou [--segmenter=] [--language=] [--classname=] [--inlinestyle] [--wbr] [] + budou [--segmenter=] [--language=] [--seperator=] [--classname=] [--inlinestyle] [--wbr] [] budou -h | --help budou -v | --version @@ -31,6 +31,9 @@ --language= Language the source in. + --seperator= Custom seperator instead of SPAN tags, when used + classname and inlinestyle are ignored + --classname= Class name for output SPAN tags. Use comma-separated value to specify multiple classes. @@ -75,10 +78,15 @@ def main(): inlinestyle=args['--inlinestyle'], wbr=args['--wbr'], ) - print(result['html_code'].encode('utf-8')) + + if args['--seperator']: + print(result['chunks'].seperator_serialize(args['--seperator']).encode('utf-8')) + else: + print(result['html_code'].encode('utf-8')) + sys.exit() -def parse(source, segmenter='nlapi', language=None, max_length=None, +def parse(source, segmenter='nlapi', language=None, max_length=None, seperator=None, classname=None, attributes=None, inlinestyle=False, wbr=False, **kwargs): """Parses input source. diff --git a/budou/chunk.py b/budou/chunk.py index cf1f909..215ba1d 100644 --- a/budou/chunk.py +++ b/budou/chunk.py @@ -373,3 +373,9 @@ def wbr_serialize(self): )) return result + def seperator_serialize(self, seperator): + result =[] + for chunk in self: + result.append(chunk.word) + return seperator.join(result) + From 18da49c262fb8c7a8d69fc9d4fd98a2a8c6dbb3a Mon Sep 17 00:00:00 2001 From: Pieter De Schepper Date: Thu, 16 Jan 2020 12:53:54 +0100 Subject: [PATCH 2/3] make sure a custom seperator can also be used instead of wrapping in tags --- budou/budou.py | 14 +++++++++++--- budou/chunk.py | 6 ++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/budou/budou.py b/budou/budou.py index f90caaa..1ea14a5 100644 --- a/budou/budou.py +++ b/budou/budou.py @@ -17,7 +17,7 @@ """Budou: an automatic organizer tool for beautiful line breaking in CJK Usage: - budou [--segmenter=] [--language=] [--classname=] [--inlinestyle] [--wbr] [] + budou [--segmenter=] [--language=] [--seperator=] [--classname=] [--inlinestyle] [--wbr] [] budou -h | --help budou -v | --version @@ -31,6 +31,9 @@ --language= Language the source in. + --seperator= Custom seperator instead of SPAN tags, when used + classname and inlinestyle are ignored + --classname= Class name for output SPAN tags. Use comma-separated value to specify multiple classes. @@ -75,10 +78,15 @@ def main(): inlinestyle=args['--inlinestyle'], wbr=args['--wbr'], ) - print(result['html_code'].encode('utf-8')) + + if args['--seperator']: + print(result['chunks'].seperator_serialize(args['--seperator']).encode('utf-8')) + else: + print(result['html_code'].encode('utf-8')) + sys.exit() -def parse(source, segmenter='nlapi', language=None, max_length=None, +def parse(source, segmenter='nlapi', language=None, max_length=None, seperator=None, classname=None, attributes=None, inlinestyle=False, wbr=False, **kwargs): """Parses input source. diff --git a/budou/chunk.py b/budou/chunk.py index cf1f909..215ba1d 100644 --- a/budou/chunk.py +++ b/budou/chunk.py @@ -373,3 +373,9 @@ def wbr_serialize(self): )) return result + def seperator_serialize(self, seperator): + result =[] + for chunk in self: + result.append(chunk.word) + return seperator.join(result) + From 1f46714f58e7a81cca67b646de96e0b514521ae9 Mon Sep 17 00:00:00 2001 From: Pieter De Schepper Date: Mon, 20 Jan 2020 14:20:42 +0100 Subject: [PATCH 3/3] Update budou.py remove obsolete separator argument --- budou/budou.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/budou/budou.py b/budou/budou.py index 03738a7..3780e7a 100644 --- a/budou/budou.py +++ b/budou/budou.py @@ -90,7 +90,7 @@ def main(): sys.exit() -def parse(source, segmenter='nlapi', language=None, max_length=None, separator=None, +def parse(source, segmenter='nlapi', language=None, max_length=None, classname=None, attributes=None, inlinestyle=False, wbr=False, **kwargs): """Parses input source.