From ba2a7c1ae705dbb98f5a87ec4651154ae2fc3b1d Mon Sep 17 00:00:00 2001 From: mozillazg Date: Sat, 9 Mar 2024 15:08:05 +0000 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=96=B0=E7=9A=84=E5=91=BD?= =?UTF-8?q?=E4=BB=A4=E8=A1=8C=E5=B7=A5=E5=85=B7=EF=BC=9A=20python=20-m=20p?= =?UTF-8?q?ypinyin.tools.toneconvert?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 提供一个用于转换拼音风格的命令行辅助工具 $ python -m pypinyin.tools.toneconvert to-tone 'zhong4 xin1' zhòng xīn --- README.rst | 4 +- docs/usage.rst | 32 ++++++++++++ pypinyin/tools/__init__.py | 0 pypinyin/tools/toneconvert.py | 93 ++++++++++++++++++++++++++++++++++ pypinyin/tools/toneconvert.pyi | 14 +++++ tests/test_cmd.py | 45 ++++++++++++---- 6 files changed, 178 insertions(+), 10 deletions(-) create mode 100644 pypinyin/tools/__init__.py create mode 100644 pypinyin/tools/toneconvert.py create mode 100644 pypinyin/tools/toneconvert.pyi diff --git a/README.rst b/README.rst index a85d544d..4eb4a5b0 100644 --- a/README.rst +++ b/README.rst @@ -80,7 +80,9 @@ Python 3(Python 2 下把 ``'中心'`` 替换为 ``u'中心'`` 即可): $ pypinyin 音乐 yīn yuè - $ pypinyin -h + + $ python -m pypinyin.tools.toneconvert to-tone 'zhong4 xin1' + zhòng xīn 文档 diff --git a/docs/usage.rst b/docs/usage.rst index 68931224..f63bd592 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -213,6 +213,10 @@ 命令行工具 ------------ + +pypinyin +~~~~~~~~~~~ + 程序内置了一个命令行工具 ``pypinyin`` : .. code-block:: console @@ -283,4 +287,32 @@ CYRILLIC_FIRST :py:attr:`~pypinyin.Style.CYRILLIC_FIRST` ================== ========================================= +toneconvert +~~~~~~~~~~~~~ + +通过 ``python -m pypinyin.tools.toneconvert`` 命令可以运行一个辅助转换拼音风格的工具:: + + + $ python -m pypinyin.tools.toneconvert to-tone 'zhong4 xin1' + zhòng xīn + +**注意**: 当输入包含多个拼音时,必须使用空格或英文逗号分隔,该工具不支持多个拼音连在一起的输入。 + +该工具支持的只命令如下:: + + $ python -m pypinyin.tools.toneconvert -h + + usage: toneconvert.py [-h] {to-normal,to-tone,to-tone2,to-tone3} ... + + options: + -h, --help show this help message and exit + + subcommands: + {to-normal,to-tone,to-tone2,to-tone3} + to-normal call pypinyin.contrib.tone_convert.to_normal() with inputs + to-tone call pypinyin.contrib.tone_convert.to_tone() with inputs + to-tone2 call pypinyin.contrib.tone_convert.to_tone2() with inputs + to-tone3 call pypinyin.contrib.tone_convert.to_tone3() with inputs + + .. _《汉语拼音方案》: http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html diff --git a/pypinyin/tools/__init__.py b/pypinyin/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pypinyin/tools/toneconvert.py b/pypinyin/tools/toneconvert.py new file mode 100644 index 00000000..7f04109c --- /dev/null +++ b/pypinyin/tools/toneconvert.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals +from argparse import ArgumentParser +from functools import partial +import re +import sys + +from pypinyin.compat import PY2 +from pypinyin.style._constants import PHONETIC_SYMBOL_DICT +from pypinyin.contrib.tone_convert import ( + to_normal, + to_tone, + to_tone2, + to_tone3, + # to_initials, + # to_finals, + # to_finals_tone, + # to_finals_tone2, + # to_finals_tone3, +) + +re_pinyin = re.compile( + r'(?m)(^|\s|,)([1-5a-zêü{0}]+)'.format( + re.escape( + ''.join(x for x in PHONETIC_SYMBOL_DICT if len(x) == 1) + ) + ) +) +ACTIONS = { + 'to_normal': to_normal, + 'to_tone': to_tone, + 'to_tone2': to_tone2, + 'to_tone3': to_tone3, + # 'to_initials': to_initials, + # 'to_finals': to_finals, + # 'to_finals_tone': to_finals_tone, + # 'to_finals_tone2': to_finals_tone2, + # 'to_finals_tone3': to_finals_tone3, +} + + +def re_sub(action, match_obj): + func = ACTIONS[action] + converted = func(match_obj.group(2)) + return '{0}{1}'.format(match_obj.group(1), converted) + + +def convert(action, args): + inputs = args.inputs + for item in inputs: + result = re_pinyin.sub(lambda m: re_sub(action, m), item) + print(result) + + +def get_parser(): + parser = ArgumentParser() + + if PY2 or sys.version_info < (3, 7): + subparser = parser.add_subparsers() + else: + subparser = parser.add_subparsers(required=True, title='subcommands') + + for key in ACTIONS.keys(): + name = key.replace('_', '-') + func = partial(convert, key) + p = subparser.add_parser( + name, + help='call pypinyin.contrib.tone_convert.{}() with inputs'.format(key)) + p.set_defaults(func=func) + p.add_argument('inputs', nargs='+') + + return parser + + +def main(argv): + argv = argv[:] + + if not sys.stdin.isatty(): + pipe_data = sys.stdin.read().strip() + else: + pipe_data = '' + if pipe_data: + argv.append(pipe_data) + + parser = get_parser() + args = parser.parse_args(argv) + args.func(args) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/pypinyin/tools/toneconvert.pyi b/pypinyin/tools/toneconvert.pyi new file mode 100644 index 00000000..627fd8ce --- /dev/null +++ b/pypinyin/tools/toneconvert.pyi @@ -0,0 +1,14 @@ +from argparse import ArgumentParser, Namespace +import re +from typing import Union, Text, ByteString, Dict, Any, List + +re_pinyin = ... # type: Any +ACTIONS = ... # type: Dict[Text, Any] + +def re_sub(action: Text, match_obj: re.Match[Text]) -> Text: ... + +def convert(action: Text, args: Namespace) -> None: ... + +def get_parser() -> ArgumentParser: ... + +def main(argv: List[Text]) -> None: ... diff --git a/tests/test_cmd.py b/tests/test_cmd.py index 195715c9..0e93e135 100644 --- a/tests/test_cmd.py +++ b/tests/test_cmd.py @@ -3,11 +3,22 @@ from __future__ import unicode_literals -from pypinyin.runner import get_parser +import sys +from pypinyin import runner +from pypinyin.tools import toneconvert +from pytest import mark -def test_default(): - options = get_parser().parse_args(['你好']) +class Buffer(object): + def __init__(self): + self._data = [] + + def write(self, data): + self._data.append(data) + + +def test_runner_default(): + options = runner.get_parser().parse_args(['你好']) assert options.func == 'pinyin' assert options.style == 'zh4ao' assert options.separator == '-' @@ -16,12 +27,10 @@ def test_default(): assert options.errors == 'default' -def test_custom(): - options = get_parser().parse_args(['--func', 'slug', - '--style', 'zhao', - '--separator', ' ', - '--errors', 'ignore', - '--heteronym', '你好啊']) +def test_runner_custom(): + options = runner.get_parser().parse_args([ + '--func', 'slug', '--style', 'zhao', '--separator', ' ', + '--errors', 'ignore', '--heteronym', '你好啊']) assert options.func == 'slug' assert options.style == 'zhao' assert options.separator == ' ' @@ -30,6 +39,24 @@ def test_custom(): assert options.hans == ['你好啊'] +@mark.parametrize('args,output', [ + [['to-normal', 'yí,yì'], ['yi,yi', '\n']], + [['to-tone', 'yi2,yi4'], ['yí,yì', '\n']], + [['to-tone', 'hao3'], ['hǎo', '\n']], + [['to-tone', 'zhong4 xin1'], ['zhòng xīn', '\n']], + [['to-tone2', 'hǎo'], ['ha3o', '\n']], + [['to-tone3', 'hǎo'], ['hao3', '\n']], +]) +def test_toneconvert_default(args, output): + buf = Buffer() + sys.stdout = sys.stderr = buf + toneconvert.main(args) + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + + assert buf._data == output + + if __name__ == '__main__': import pytest pytest.cmdline.main()