Skip to content

Commit

Permalink
增加新的命令行工具: python -m pypinyin.tools.toneconvert
Browse files Browse the repository at this point in the history
提供一个用于转换拼音风格的命令行辅助工具

$ python -m pypinyin.tools.toneconvert to-tone 'zhong4 xin1'
zhòng xīn
  • Loading branch information
mozillazg committed Mar 10, 2024
1 parent 2fba056 commit ba2a7c1
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 10 deletions.
4 changes: 3 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ Python 3(Python 2 下把 ``'中心'`` 替换为 ``u'中心'`` 即可):
$ pypinyin 音乐
yīn yuè
$ pypinyin -h
$ python -m pypinyin.tools.toneconvert to-tone 'zhong4 xin1'
zhòng xīn
文档
Expand Down
32 changes: 32 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,10 @@
命令行工具
------------


pypinyin
~~~~~~~~~~~

程序内置了一个命令行工具 ``pypinyin`` :

.. code-block:: console
Expand Down Expand Up @@ -283,4 +287,32 @@ CYRILLIC_FIRST :py:attr:`~pypinyin.Style.CYRILLIC_FIRST`
================== =========================================


toneconvert
~~~~~~~~~~~~~

通过 ``python -m pypinyin.tools.toneconvert`` 命令可以运行一个辅助转换拼音风格的工具::


$ python -m pypinyin.tools.toneconvert to-tone 'zhong4 xin1'
zhòng xīn

**注意**: 当输入包含多个拼音时,必须使用空格或英文逗号分隔,该工具不支持多个拼音连在一起的输入。

该工具支持的只命令如下::

$ python -m pypinyin.tools.toneconvert -h

usage: toneconvert.py [-h] {to-normal,to-tone,to-tone2,to-tone3} ...

options:
-h, --help show this help message and exit

subcommands:
{to-normal,to-tone,to-tone2,to-tone3}
to-normal call pypinyin.contrib.tone_convert.to_normal() with inputs
to-tone call pypinyin.contrib.tone_convert.to_tone() with inputs
to-tone2 call pypinyin.contrib.tone_convert.to_tone2() with inputs
to-tone3 call pypinyin.contrib.tone_convert.to_tone3() with inputs


.. _《汉语拼音方案》: http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html
Empty file added pypinyin/tools/__init__.py
Empty file.
93 changes: 93 additions & 0 deletions pypinyin/tools/toneconvert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals
from argparse import ArgumentParser
from functools import partial
import re
import sys

from pypinyin.compat import PY2
from pypinyin.style._constants import PHONETIC_SYMBOL_DICT
from pypinyin.contrib.tone_convert import (
to_normal,
to_tone,
to_tone2,
to_tone3,
# to_initials,
# to_finals,
# to_finals_tone,
# to_finals_tone2,
# to_finals_tone3,
)

re_pinyin = re.compile(
r'(?m)(^|\s|,)([1-5a-zêü{0}]+)'.format(
re.escape(
''.join(x for x in PHONETIC_SYMBOL_DICT if len(x) == 1)
)
)
)
ACTIONS = {
'to_normal': to_normal,
'to_tone': to_tone,
'to_tone2': to_tone2,
'to_tone3': to_tone3,
# 'to_initials': to_initials,
# 'to_finals': to_finals,
# 'to_finals_tone': to_finals_tone,
# 'to_finals_tone2': to_finals_tone2,
# 'to_finals_tone3': to_finals_tone3,
}


def re_sub(action, match_obj):
func = ACTIONS[action]
converted = func(match_obj.group(2))
return '{0}{1}'.format(match_obj.group(1), converted)


def convert(action, args):
inputs = args.inputs
for item in inputs:
result = re_pinyin.sub(lambda m: re_sub(action, m), item)
print(result)


def get_parser():
parser = ArgumentParser()

if PY2 or sys.version_info < (3, 7):
subparser = parser.add_subparsers()
else:
subparser = parser.add_subparsers(required=True, title='subcommands')

for key in ACTIONS.keys():
name = key.replace('_', '-')
func = partial(convert, key)
p = subparser.add_parser(
name,
help='call pypinyin.contrib.tone_convert.{}() with inputs'.format(key))
p.set_defaults(func=func)
p.add_argument('inputs', nargs='+')

return parser


def main(argv):
argv = argv[:]

if not sys.stdin.isatty():
pipe_data = sys.stdin.read().strip()
else:
pipe_data = ''
if pipe_data:
argv.append(pipe_data)

parser = get_parser()
args = parser.parse_args(argv)
args.func(args)


if __name__ == '__main__':
main(sys.argv[1:])
14 changes: 14 additions & 0 deletions pypinyin/tools/toneconvert.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from argparse import ArgumentParser, Namespace
import re
from typing import Union, Text, ByteString, Dict, Any, List

re_pinyin = ... # type: Any
ACTIONS = ... # type: Dict[Text, Any]

def re_sub(action: Text, match_obj: re.Match[Text]) -> Text: ...

def convert(action: Text, args: Namespace) -> None: ...

def get_parser() -> ArgumentParser: ...

def main(argv: List[Text]) -> None: ...
45 changes: 36 additions & 9 deletions tests/test_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,22 @@

from __future__ import unicode_literals

from pypinyin.runner import get_parser
import sys
from pypinyin import runner
from pypinyin.tools import toneconvert
from pytest import mark


def test_default():
options = get_parser().parse_args(['你好'])
class Buffer(object):
def __init__(self):
self._data = []

def write(self, data):
self._data.append(data)


def test_runner_default():
options = runner.get_parser().parse_args(['你好'])
assert options.func == 'pinyin'
assert options.style == 'zh4ao'
assert options.separator == '-'
Expand All @@ -16,12 +27,10 @@ def test_default():
assert options.errors == 'default'


def test_custom():
options = get_parser().parse_args(['--func', 'slug',
'--style', 'zhao',
'--separator', ' ',
'--errors', 'ignore',
'--heteronym', '你好啊'])
def test_runner_custom():
options = runner.get_parser().parse_args([
'--func', 'slug', '--style', 'zhao', '--separator', ' ',
'--errors', 'ignore', '--heteronym', '你好啊'])
assert options.func == 'slug'
assert options.style == 'zhao'
assert options.separator == ' '
Expand All @@ -30,6 +39,24 @@ def test_custom():
assert options.hans == ['你好啊']


@mark.parametrize('args,output', [
[['to-normal', 'yí,yì'], ['yi,yi', '\n']],
[['to-tone', 'yi2,yi4'], ['yí,yì', '\n']],
[['to-tone', 'hao3'], ['hǎo', '\n']],
[['to-tone', 'zhong4 xin1'], ['zhòng xīn', '\n']],
[['to-tone2', 'hǎo'], ['ha3o', '\n']],
[['to-tone3', 'hǎo'], ['hao3', '\n']],
])
def test_toneconvert_default(args, output):
buf = Buffer()
sys.stdout = sys.stderr = buf
toneconvert.main(args)
sys.stdout = sys.__stdout__
sys.stderr = sys.__stderr__

assert buf._data == output


if __name__ == '__main__':
import pytest
pytest.cmdline.main()

0 comments on commit ba2a7c1

Please sign in to comment.