From 34dc25e3cfc63ef94113ef440991e0129569983e Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 12:39:46 +0900 Subject: [PATCH 01/18] add: option for selecting of model language to cli --- budoux/main.py | 56 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/budoux/main.py b/budoux/main.py index 0321364d..c24f90f3 100644 --- a/budoux/main.py +++ b/budoux/main.py @@ -20,6 +20,7 @@ import sys import textwrap import typing +import glob import pkg_resources @@ -34,21 +35,52 @@ class BudouxHelpFormatter(argparse.ArgumentDefaultsHelpFormatter, def check_file(path: str) -> str: - """Check if filepath is exist or not. + """Check if a given filepath exists or not. Args: path (str): Model path Raises: - FileNotFoundError: Raise if given path is not exist. + FileNotFoundError: Raise if given path does not exist. Returns: - str: Model path confirmed its existance. + str: A model path. """ if os.path.isfile(path): return path else: - raise FileNotFoundError("'{}' is not found.".format(path)) + raise argparse.ArgumentTypeError(f"'{path}' is not found.") + + +def get_model_langs() -> typing.Dict[str, str]: + """Get a dictionary of model languages and its paths. + + Returns: + typing.Dict[str, str]: A dictionary of model languages and its paths. + """ + models = glob.glob( + pkg_resources.resource_filename(__name__, "models") + "/*-*.json") + return {model.split("/")[-1][:2]: model for model in models} + + +def check_lang(lang: str) -> str: + """Check if given language exists or not. + + Args: + lang (str): language code (e.g.: 'ja') + + Raises: + argparse.ArgumentTypeError: Raise if no model for given language exists. + + Returns: + str: A model path. + """ + langs = get_model_langs() + if lang in langs: + return langs[lang] + else: + raise argparse.ArgumentTypeError( + f"'{lang}' does not exist in builtin models.") def parse_args(test: ArgList = None) -> argparse.Namespace: @@ -72,7 +104,9 @@ def parse_args(test: ArgList = None) -> argparse.Namespace: description=textwrap.dedent("""\ BudouX is the successor to Budou, the machine learning powered line break organizer tool."""), - ) + epilog="\n- ".join( + ["supported languages of `-l`, `--lang`:", + *get_model_langs().keys()])) parser.add_argument("text", metavar="TXT", nargs="?", type=str, help="text") parser.add_argument( @@ -81,7 +115,8 @@ def parse_args(test: ArgList = None) -> argparse.Namespace: action="store_true", help="HTML mode", ) - parser.add_argument( + model_select_group = parser.add_mutually_exclusive_group() + model_select_group.add_argument( "-m", "--model", metavar="JSON", @@ -89,6 +124,13 @@ def parse_args(test: ArgList = None) -> argparse.Namespace: default=pkg_resources.resource_filename(__name__, "models/ja-knbc.json"), help="custom model file path", ) + model_select_group.add_argument( + "-l", + "--lang", + metavar="LANG", + type=check_lang, + help="language of custom model", + ) parser.add_argument( "-d", "--delim", @@ -118,7 +160,7 @@ def parse_args(test: ArgList = None) -> argparse.Namespace: def _main(test: ArgList = None) -> str: args = parse_args(test=test) - with open(args.model, "r") as f: + with open(args.lang or args.model, "r") as f: model = json.load(f) parser = budoux.Parser(model) From 7cb4886b25975f053015b9ca90c472b53da8e84f Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 12:46:15 +0900 Subject: [PATCH 02/18] fix: remove checks of Python<3.7 in test --- tests/test_feature_extractor.py | 4 ++-- tests/test_main.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_feature_extractor.py b/tests/test_feature_extractor.py index 991c8f29..0d502965 100644 --- a/tests/test_feature_extractor.py +++ b/tests/test_feature_extractor.py @@ -26,10 +26,10 @@ from budoux import feature_extractor, utils # noqa (module hack) -if isinstance(sys.stdin, io.TextIOWrapper) and sys.version_info >= (3, 7): +if isinstance(sys.stdin, io.TextIOWrapper): sys.stdin.reconfigure(encoding='utf-8') -if isinstance(sys.stdout, io.TextIOWrapper) and sys.version_info >= (3, 7): +if isinstance(sys.stdout, io.TextIOWrapper): sys.stdout.reconfigure(encoding='utf-8') SOURCE_FILE_PATH = os.path.abspath( diff --git a/tests/test_main.py b/tests/test_main.py index cd2881dd..2e89f1ef 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -24,10 +24,10 @@ from budoux import main # noqa (module hack) -if isinstance(sys.stdin, io.TextIOWrapper) and sys.version_info >= (3, 7): +if isinstance(sys.stdin, io.TextIOWrapper): sys.stdin.reconfigure(encoding='utf-8') -if isinstance(sys.stdout, io.TextIOWrapper) and sys.version_info >= (3, 7): +if isinstance(sys.stdout, io.TextIOWrapper): sys.stdout.reconfigure(encoding='utf-8') From 60e10b728b04b81a701a6b487aa5d3c87852f7d5 Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 12:50:14 +0900 Subject: [PATCH 03/18] add: explanatory var for model path --- budoux/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/budoux/main.py b/budoux/main.py index c24f90f3..b5cac4e9 100644 --- a/budoux/main.py +++ b/budoux/main.py @@ -160,7 +160,8 @@ def parse_args(test: ArgList = None) -> argparse.Namespace: def _main(test: ArgList = None) -> str: args = parse_args(test=test) - with open(args.lang or args.model, "r") as f: + model_path = args.lang or args.model + with open(model_path, "r") as f: model = json.load(f) parser = budoux.Parser(model) From e790d039704cc4b4d63629474690bf8e5ba8b233 Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 12:58:07 +0900 Subject: [PATCH 04/18] add: coverage files to ignored files --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 69e15341..1939ad90 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ __pycache__ *.pyc *.log *.egg-info +*,cover +*.coverage # Python related files build/ From df71c1f76de07d8c81c0abadfbfb1af761cc366a Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 13:27:18 +0900 Subject: [PATCH 05/18] add: coverage files to ignored files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1939ad90..fc407a73 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__ *.egg-info *,cover *.coverage +xov.xml # Python related files build/ From 4683e17eabc93aedb294cfc7cb5ea4e021a4f365 Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 13:27:42 +0900 Subject: [PATCH 06/18] add: coverage files to ignored files --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fc407a73..0b4e2913 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,7 @@ __pycache__ *.egg-info *,cover *.coverage -xov.xml +cov.xml # Python related files build/ From 1dbac39a54dbd843dc4ac89d7b36cedbe1428256 Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 13:44:34 +0900 Subject: [PATCH 07/18] add: tests for language option --- tests/test_main.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_main.py b/tests/test_main.py index 2e89f1ef..7bf6ec20 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -55,6 +55,29 @@ def test_cmdargs_version(self) -> None: self.assertEqual(cm.exception.code, 0) +class TestModelOption(unittest.TestCase): + + def test_cmdargs_invalid_json(self) -> None: + cmdargs = ['-m', '404.json'] + with self.assertRaises(SystemExit) as cm: + main.parse_args(cmdargs) + + self.assertEqual(cm.exception.code, 2) + + def test_cmdargs_invalid_lang(self) -> None: + cmdargs = ['-l', 'aa'] + with self.assertRaises(SystemExit) as cm: + main.parse_args(cmdargs) + + self.assertEqual(cm.exception.code, 2) + + def test_cmdargs_lang_ja(self) -> None: + cmdargs = ['-l', 'ja', '今日はいい天気ですね。'] + output = main._main(cmdargs) + + self.assertEqual(output, '今日は\nいい\n天気ですね。') + + class TestTextArguments(unittest.TestCase): def test_cmdargs_single_text(self) -> None: From ce7edff335fa8f97ac88933d7cfcbc37a561c539 Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 13:45:15 +0900 Subject: [PATCH 08/18] add: tests for methods to load default parser --- tests/test_parser.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_parser.py b/tests/test_parser.py index 3d2fcd15..d2345516 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -143,5 +143,16 @@ def test_translate_html_string(self) -> None: 'Should work with emojis.') +class TestDefaultParser(unittest.TestCase): + + def test_load_default_japanese_parser(self): + p_ja = parser.load_default_japanese_parser() + self.assertTrue("UW4:私" in p_ja.model) + + def test_load_default_simplified_chinese_parser(self): + p_ch = parser.load_default_simplified_chinese_parser() + self.assertTrue("UW4:力" in p_ch.model) + + if __name__ == '__main__': unittest.main() From b19a48536584abe920bddb9501865e7862cad23c Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 13:46:22 +0900 Subject: [PATCH 09/18] fix: improve error message --- budoux/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/budoux/main.py b/budoux/main.py index b5cac4e9..ca0f9d16 100644 --- a/budoux/main.py +++ b/budoux/main.py @@ -80,7 +80,8 @@ def check_lang(lang: str) -> str: return langs[lang] else: raise argparse.ArgumentTypeError( - f"'{lang}' does not exist in builtin models.") + f"'{lang}' does not exist in builtin models. (supported languages: {langs.keys()})" + ) def parse_args(test: ArgList = None) -> argparse.Namespace: From 4dc4e4b701c813d23a9755ed40355cc8292144da Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 13:51:45 +0900 Subject: [PATCH 10/18] update: README --- README.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 45ed05de..1bbfdff4 100644 --- a/README.md +++ b/README.md @@ -91,9 +91,13 @@ For more details of the JavaScript model, please refer to [JavaScript module REA You can also format inputs on your terminal with `budoux` command. ```shellsession -$ budoux 本日は晴天です。 +$ budoux 本日は晴天です。 # default: japanese 本日は 晴天です。 + +$ budoux -l zh 今天天气晴朗。 +今天天气 +晴朗。 ``` ```shellsession @@ -114,7 +118,7 @@ If you want to see help, run `budoux -h`. ```shellsession $ budoux -h -usage: budoux [-h] [-H] [-m JSON] [-d STR] [-t THRES] [-V] [TXT] +usage: budoux [-h] [-H] [-m JSON | -l LANG] [-d STR] [-t THRES] [-V] [TXT] BudouX is the successor to Budou, the machine learning powered line break organizer tool. @@ -122,13 +126,18 @@ the machine learning powered line break organizer tool. positional arguments: TXT text (default: None) -optional arguments: +options: -h, --help show this help message and exit -H, --html HTML mode (default: False) - -m JSON, --model JSON custom model file path (default: /path/to/models/ja-knbc.json) + -m JSON, --model JSON custom model file path (default: /home/eggplants/prog/budoux/budoux/models/ja-knbc.json) + -l LANG, --lang LANG language of custom model (default: None) -d STR, --delim STR output delimiter in TEXT mode (default: ---) -t THRES, --thres THRES threshold value to separate chunks (default: 1000) -V, --version show program's version number and exit + +supported languages of `-l`, `--lang`: +- zh +- ja ``` ## Caveat From 36ea83c631cd2eb8027ed79901a7851e377c17fb Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 13:54:43 +0900 Subject: [PATCH 11/18] fix: error message --- budoux/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/budoux/main.py b/budoux/main.py index ca0f9d16..17fd11c4 100644 --- a/budoux/main.py +++ b/budoux/main.py @@ -80,7 +80,7 @@ def check_lang(lang: str) -> str: return langs[lang] else: raise argparse.ArgumentTypeError( - f"'{lang}' does not exist in builtin models. (supported languages: {langs.keys()})" + f"'{lang}' does not exist in builtin models. (supported languages: {list(langs.keys())})" ) From b52d387317ff6031b2747321ea7540d32d435c4b Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 13:58:54 +0900 Subject: [PATCH 12/18] fix: mypy error --- tests/test_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index d2345516..e7932d41 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -145,11 +145,11 @@ def test_translate_html_string(self) -> None: class TestDefaultParser(unittest.TestCase): - def test_load_default_japanese_parser(self): + def test_load_default_japanese_parser(self) -> None: p_ja = parser.load_default_japanese_parser() self.assertTrue("UW4:私" in p_ja.model) - def test_load_default_simplified_chinese_parser(self): + def test_load_default_simplified_chinese_parser(self) -> None: p_ch = parser.load_default_simplified_chinese_parser() self.assertTrue("UW4:力" in p_ch.model) From a605a7be71515267ad281e106b5d59e4bb7d15cc Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 14:22:47 +0900 Subject: [PATCH 13/18] fix: /->os.sep --- budoux/feature_extractor.py | 2 +- budoux/main.py | 4 ++-- budoux/parser.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/budoux/feature_extractor.py b/budoux/feature_extractor.py index 5d411e24..8db0afed 100644 --- a/budoux/feature_extractor.py +++ b/budoux/feature_extractor.py @@ -20,7 +20,7 @@ import sys import typing -from .utils import SEP, INVALID, Result +from .utils import INVALID, SEP, Result with open(os.path.join(os.path.dirname(__file__), 'unicode_blocks.json')) as f: block_starts: typing.List[int] = json.load(f) diff --git a/budoux/main.py b/budoux/main.py index 17fd11c4..ecef7da2 100644 --- a/budoux/main.py +++ b/budoux/main.py @@ -14,13 +14,13 @@ # limitations under the License. """BudouX Script to provide CLI for user.""" import argparse +import glob import json import os import shutil import sys import textwrap import typing -import glob import pkg_resources @@ -60,7 +60,7 @@ def get_model_langs() -> typing.Dict[str, str]: """ models = glob.glob( pkg_resources.resource_filename(__name__, "models") + "/*-*.json") - return {model.split("/")[-1][:2]: model for model in models} + return {model.split(os.sep)[-1][:2]: model for model in models} def check_lang(lang: str) -> str: diff --git a/budoux/parser.py b/budoux/parser.py index 45c52735..0613df19 100644 --- a/budoux/parser.py +++ b/budoux/parser.py @@ -19,7 +19,7 @@ from html.parser import HTMLParser from .feature_extractor import get_feature -from .utils import SEP, INVALID, Result +from .utils import INVALID, SEP, Result MODEL_DIR = os.path.join(os.path.dirname(__file__), 'models') PARENT_CSS_STYLE = 'word-break: keep-all; overflow-wrap: break-word;' From c36174e9b489a02d7d4e25c2f0c7d146617b7e17 Mon Sep 17 00:00:00 2001 From: eggplants Date: Sun, 3 Apr 2022 14:28:55 +0900 Subject: [PATCH 14/18] update: README in js --- javascript/README.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/javascript/README.md b/javascript/README.md index 8e3d8d82..ac4c7a74 100644 --- a/javascript/README.md +++ b/javascript/README.md @@ -133,19 +133,20 @@ If you want to see help, run `budoux -h`. ```shellsession $ budoux -h -Usage: budoux [-h] [-H] [-m JSON] [-d STR] [-V] [TXT] +Usage: budoux [-h] [-H] [-d STR] [-t THRES] [-m JSON] [-V] [TXT] BudouX is the successor to Budou, the machine learning powered line break organizer tool. Arguments: - txt text + txt text Options: - -H, --html HTML mode - -d, --delim output delimiter in TEXT mode (default: "---") - -m, --model custom model file path - -V, --version output the version number - -h, --help display help for command + -H, --html HTML mode (default: false) + -d, --delim output delimiter in TEXT mode (default: "---") + -t, --thres threshold value to separate chunks (default: "1000") + -m, --model custom model file path + -V, --version output the version number + -h, --help display help for command ``` ### Attributes From c508d0ece631236855fb8517414f48d3a7cd66fd Mon Sep 17 00:00:00 2001 From: eggplants Date: Mon, 4 Apr 2022 09:41:31 +0900 Subject: [PATCH 15/18] fix:remove useless ignored file pattern --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 0b4e2913..f1cac983 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,6 @@ __pycache__ *.pyc *.log *.egg-info -*,cover *.coverage cov.xml From 1c03f6e0f0590580cd552cfba31a92f8937e73c8 Mon Sep 17 00:00:00 2001 From: eggplants Date: Mon, 4 Apr 2022 09:48:38 +0900 Subject: [PATCH 16/18] fix: `--lang [lang-code]` -> `--lang [lang-code]-[data-source]` --- README.md | 8 ++++---- budoux/main.py | 4 ++-- tests/test_main.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1bbfdff4..694bde7f 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ $ budoux 本日は晴天です。 # default: japanese 本日は 晴天です。 -$ budoux -l zh 今天天气晴朗。 +$ budoux -l zh-hans 今天天气晴朗。 今天天气 晴朗。 ``` @@ -129,15 +129,15 @@ positional arguments: options: -h, --help show this help message and exit -H, --html HTML mode (default: False) - -m JSON, --model JSON custom model file path (default: /home/eggplants/prog/budoux/budoux/models/ja-knbc.json) + -m JSON, --model JSON custom model file path (default: /path/to/models/ja-knbc.json) -l LANG, --lang LANG language of custom model (default: None) -d STR, --delim STR output delimiter in TEXT mode (default: ---) -t THRES, --thres THRES threshold value to separate chunks (default: 1000) -V, --version show program's version number and exit supported languages of `-l`, `--lang`: -- zh -- ja +- zh-hans +- ja-knbc ``` ## Caveat diff --git a/budoux/main.py b/budoux/main.py index ecef7da2..96bf617b 100644 --- a/budoux/main.py +++ b/budoux/main.py @@ -60,14 +60,14 @@ def get_model_langs() -> typing.Dict[str, str]: """ models = glob.glob( pkg_resources.resource_filename(__name__, "models") + "/*-*.json") - return {model.split(os.sep)[-1][:2]: model for model in models} + return {model.split(os.sep)[-1][:-5]: model for model in models} def check_lang(lang: str) -> str: """Check if given language exists or not. Args: - lang (str): language code (e.g.: 'ja') + lang (str): language code (e.g.: 'ja-knbc') Raises: argparse.ArgumentTypeError: Raise if no model for given language exists. diff --git a/tests/test_main.py b/tests/test_main.py index 7bf6ec20..cc242a4e 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -72,7 +72,7 @@ def test_cmdargs_invalid_lang(self) -> None: self.assertEqual(cm.exception.code, 2) def test_cmdargs_lang_ja(self) -> None: - cmdargs = ['-l', 'ja', '今日はいい天気ですね。'] + cmdargs = ['-l', 'ja-knbc', '今日はいい天気ですね。'] output = main._main(cmdargs) self.assertEqual(output, '今日は\nいい\n天気ですね。') From a52c8abe33a7d99db8d1d02bfe58800b85dd38dc Mon Sep 17 00:00:00 2001 From: eggplants Date: Mon, 4 Apr 2022 09:59:20 +0900 Subject: [PATCH 17/18] fix: lang zh-hans.json -> zh-hans, ja-knbc.json -> ja --- README.md | 8 ++++++-- budoux/main.py | 9 +++++++-- tests/test_main.py | 17 +++++++++++++++-- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 694bde7f..4163cb40 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,11 @@ For more details of the JavaScript model, please refer to [JavaScript module REA You can also format inputs on your terminal with `budoux` command. ```shellsession -$ budoux 本日は晴天です。 # default: japanese +$ budoux 本日は晴天です。 # default: japanese +本日は +晴天です。 + +$ budoux -l ja 本日は晴天です。 本日は 晴天です。 @@ -137,7 +141,7 @@ options: supported languages of `-l`, `--lang`: - zh-hans -- ja-knbc +- ja ``` ## Caveat diff --git a/budoux/main.py b/budoux/main.py index 96bf617b..5c70ef72 100644 --- a/budoux/main.py +++ b/budoux/main.py @@ -60,14 +60,19 @@ def get_model_langs() -> typing.Dict[str, str]: """ models = glob.glob( pkg_resources.resource_filename(__name__, "models") + "/*-*.json") - return {model.split(os.sep)[-1][:-5]: model for model in models} + langs = {} + for model in models: + model_name = model.split(os.sep)[-1][:-5] + langs[model_name if model_name.startswith('zh-') else model_name[:2]] = model + else: + return langs def check_lang(lang: str) -> str: """Check if given language exists or not. Args: - lang (str): language code (e.g.: 'ja-knbc') + lang (str): language code (e.g.: 'ja') Raises: argparse.ArgumentTypeError: Raise if no model for given language exists. diff --git a/tests/test_main.py b/tests/test_main.py index cc242a4e..9574c61c 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -64,19 +64,32 @@ def test_cmdargs_invalid_json(self) -> None: self.assertEqual(cm.exception.code, 2) - def test_cmdargs_invalid_lang(self) -> None: + def test_cmdargs_invalid_lang_1(self) -> None: cmdargs = ['-l', 'aa'] with self.assertRaises(SystemExit) as cm: main.parse_args(cmdargs) self.assertEqual(cm.exception.code, 2) + def test_cmdargs_invalid_lang_2(self) -> None: + cmdargs = ['-l', 'ja-knbc'] + with self.assertRaises(SystemExit) as cm: + main.parse_args(cmdargs) + + self.assertEqual(cm.exception.code, 2) + def test_cmdargs_lang_ja(self) -> None: - cmdargs = ['-l', 'ja-knbc', '今日はいい天気ですね。'] + cmdargs = ['-l', 'ja', '今日はいい天気ですね。'] output = main._main(cmdargs) self.assertEqual(output, '今日は\nいい\n天気ですね。') + def test_cmdargs_lang_ja(self) -> None: + cmdargs = ['-l', 'zh-hans', '今天天气晴朗。'] + output = main._main(cmdargs) + + self.assertEqual(output, '今天天气\n晴朗。') + class TestTextArguments(unittest.TestCase): From 371a0bae00c34d64746f7331777d9966529ead6b Mon Sep 17 00:00:00 2001 From: eggplants Date: Mon, 4 Apr 2022 10:05:54 +0900 Subject: [PATCH 18/18] fix: style errors --- budoux/main.py | 8 +++++--- tests/test_main.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/budoux/main.py b/budoux/main.py index 5c70ef72..3718c705 100644 --- a/budoux/main.py +++ b/budoux/main.py @@ -63,9 +63,11 @@ def get_model_langs() -> typing.Dict[str, str]: langs = {} for model in models: model_name = model.split(os.sep)[-1][:-5] - langs[model_name if model_name.startswith('zh-') else model_name[:2]] = model - else: - return langs + if model_name.startswith('zh-'): + langs[model_name] = model + else: + langs[model_name[:2]] = model + return langs def check_lang(lang: str) -> str: diff --git a/tests/test_main.py b/tests/test_main.py index 9574c61c..7b461e07 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -84,7 +84,7 @@ def test_cmdargs_lang_ja(self) -> None: self.assertEqual(output, '今日は\nいい\n天気ですね。') - def test_cmdargs_lang_ja(self) -> None: + def test_cmdargs_lang_zh_hans(self) -> None: cmdargs = ['-l', 'zh-hans', '今天天气晴朗。'] output = main._main(cmdargs)