From 3a382bc3c87b22054c74cbac381817eb8c4b820e Mon Sep 17 00:00:00 2001 From: goddamnVincent <1262780247@qq.com> Date: Tue, 3 Dec 2024 20:02:43 +0800 Subject: [PATCH] 'update20241203' --- python-api-examples/streaming_server.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/python-api-examples/streaming_server.py b/python-api-examples/streaming_server.py index 5691f67c3..ef6bac8e3 100755 --- a/python-api-examples/streaming_server.py +++ b/python-api-examples/streaming_server.py @@ -229,6 +229,28 @@ def add_hotwords_args(parser: argparse.ArgumentParser): --hotwords-file is given. """, ) + parser.add_argument( + "--modeling-unit", + type=str, + default='cjkchar', + help=""" + The modeling unit of the used model. Current supported units are: + - cjkchar(for Chinese) + - bpe(for English like languages) + - cjkchar+bpe(for multilingual models) + """, + ) + parser.add_argument( + "--bpe-vocab", + type=str, + default='', + help=""" + The bpe vocabulary generated by sentencepiece toolkit. + It is only used when modeling-unit is bpe or cjkchar+bpe. + if you can’t find bpe.vocab in the model directory, please run: + python script/export_bpe_vocab.py --bpe-model exp/bpe.model + """, + ) def add_modified_beam_search_args(parser: argparse.ArgumentParser): @@ -409,6 +431,8 @@ def create_recognizer(args) -> sherpa_onnx.OnlineRecognizer: rule2_min_trailing_silence=args.rule2_min_trailing_silence, rule3_min_utterance_length=args.rule3_min_utterance_length, provider=args.provider, + modeling_unit=args.modeling_unit, + bpe_vocab=args.bpe_vocab ) elif args.paraformer_encoder: recognizer = sherpa_onnx.OnlineRecognizer.from_paraformer(