From 4b76a01c85f54596a26001a1b6dedcec72784238 Mon Sep 17 00:00:00 2001 From: xiongxinlei Date: Thu, 21 Apr 2022 16:19:01 +0800 Subject: [PATCH] update en readme.md, test=doc --- demos/streaming_asr_server/README.md | 407 +++++++++++------- .../server/bin/paddlespeech_client.py | 2 +- .../server/tests/asr/online/README.md | 35 ++ .../server/tests/asr/online/README_cn.md | 42 ++ .../tests/asr/online/microphone_client.py | 161 +++++++ 5 files changed, 497 insertions(+), 150 deletions(-) create mode 100644 paddlespeech/server/tests/asr/online/README.md create mode 100644 paddlespeech/server/tests/asr/online/README_cn.md create mode 100644 paddlespeech/server/tests/asr/online/microphone_client.py diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md index 0323d3983ab..68c3b0453bf 100644 --- a/demos/streaming_asr_server/README.md +++ b/demos/streaming_asr_server/README.md @@ -3,7 +3,7 @@ # Speech Server ## Introduction -This demo is an implementation of starting the voice service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python. +This demo is an implementation of starting the streaming speech service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python. ## Usage @@ -14,17 +14,16 @@ It is recommended to use **paddlepaddle 2.2.1** or above. You can choose one way from meduim and hard to install paddlespeech. ### 2. Prepare config File -The configuration file can be found in `conf/application.yaml` . -Among them, `engine_list` indicates the speech engine that will be included in the service to be started, in the format of `_`. -At present, the speech tasks integrated by the service include: asr (speech recognition), tts (text to sppech) and cls (audio classification). -Currently the engine type supports two forms: python and inference (Paddle Inference) +The configuration file can be found in `conf/ws_application.yaml` 和 `conf/ws_conformer_application.yaml`. + +At present, the speech tasks integrated by the model include: DeepSpeech2 and conformer. The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. Here are sample files for thisASR client demo that can be downloaded: ```bash -wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav ``` ### 3. Server Usage @@ -32,7 +31,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ```bash # start the service - paddlespeech_server start --config_file ./conf/application.yaml + paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml ``` Usage: @@ -41,19 +40,72 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee paddlespeech_server start --help ``` Arguments: - - `config_file`: yaml file of the app, defalut: ./conf/application.yaml + - `config_file`: yaml file of the app, defalut: ./conf/ws_conformer_application.yaml - `log_file`: log file. Default: ./log/paddlespeech.log Output: ```bash - [2022-02-23 11:17:32] [INFO] [server.py:64] Started server process [6384] - INFO: Waiting for application startup. - [2022-02-23 11:17:32] [INFO] [on.py:26] Waiting for application startup. - INFO: Application startup complete. - [2022-02-23 11:17:32] [INFO] [on.py:38] Application startup complete. - INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) - [2022-02-23 11:17:32] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) - + [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance + [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu + [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking... + [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine + [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success + [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully. + INFO: Started server process [11173] + [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173] + INFO: Waiting for application startup. + [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup. + INFO: Application startup complete. + [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete. + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + infos = await tasks.gather(*fs, loop=self) + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + await tasks.sleep(0, loop=self) + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) ``` - Python API @@ -62,21 +114,73 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee server_executor = ServerExecutor() server_executor( - config_file="./conf/application.yaml", + config_file="./conf/ws_conformer_application.yaml", log_file="./log/paddlespeech.log") ``` Output: ```bash - INFO: Started server process [529] - [2022-02-23 14:57:56] [INFO] [server.py:64] Started server process [529] - INFO: Waiting for application startup. - [2022-02-23 14:57:56] [INFO] [on.py:26] Waiting for application startup. - INFO: Application startup complete. - [2022-02-23 14:57:56] [INFO] [on.py:38] Application startup complete. - INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) - [2022-02-23 14:57:56] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) - + [2022-04-21 15:52:18,126] [ INFO] - create the online asr engine instance + [2022-04-21 15:52:18,127] [ INFO] - paddlespeech_server set the device: cpu + [2022-04-21 15:52:18,128] [ INFO] - Load the pretrained model, tag = conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,128] [ INFO] - File /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz md5 checking... + [2022-04-21 15:52:18,727] [ INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/model.yaml + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:18,727] [ INFO] - /home/users/xiongxinlei/.paddlespeech/models/conformer_online_multicn-zh-16k/exp/chunk_conformer/checkpoints/multi_cn.pdparams + [2022-04-21 15:52:19,446] [ INFO] - start to create the stream conformer asr engine + [2022-04-21 15:52:19,473] [ INFO] - model name: conformer_online + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + set kaiming_uniform + [2022-04-21 15:52:21,731] [ INFO] - create the transformer like model success + [2022-04-21 15:52:21,733] [ INFO] - Initialize ASR server engine successfully. + INFO: Started server process [11173] + [2022-04-21 15:52:21] [INFO] [server.py:75] Started server process [11173] + INFO: Waiting for application startup. + [2022-04-21 15:52:21] [INFO] [on.py:45] Waiting for application startup. + INFO: Application startup complete. + [2022-04-21 15:52:21] [INFO] [on.py:59] Application startup complete. + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1460: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + infos = await tasks.gather(*fs, loop=self) + /home/users/xiongxinlei/.conda/envs/paddlespeech/lib/python3.9/asyncio/base_events.py:1518: DeprecationWarning: The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10. + await tasks.sleep(0, loop=self) + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-04-21 15:52:21] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) ``` @@ -84,13 +188,13 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee **Note:** The response time will be slightly longer when using the client for the first time - Command Line (Recommended) ``` - paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav + paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav ``` Usage: ```bash - paddlespeech_client asr --help + paddlespeech_client asr_online --help ``` Arguments: - `server_ip`: server ip. Default: 127.0.0.1 @@ -102,8 +206,69 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee Output: ```bash - [2022-02-23 18:11:22,819] [ INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '我认为跑步最重要的就是给我带来了身体健康'}} - [2022-02-23 18:11:22,820] [ INFO] - time cost 0.689145 s. + [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"} + [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 + [2022-04-21 15:59:12,884] [ INFO] - Response time 9.051567 s. ``` @@ -125,122 +290,66 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee Output: ```bash - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '我认为跑步最重要的就是给我带来了身体健康'}} - ``` - -### 5. TTS Client Usage -**Note:** The response time will be slightly longer when using the client for the first time -- Command Line (Recommended) - ```bash - paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav - ``` - Usage: - - ```bash - paddlespeech_client tts --help - ``` - Arguments: - - `server_ip`: server ip. Default: 127.0.0.1 - - `port`: server port. Default: 8090 - - `input`(required): Input text to generate. - - `spk_id`: Speaker id for multi-speaker text to speech. Default: 0 - - `speed`: Audio speed, the value should be set between 0 and 3. Default: 1.0 - - `volume`: Audio volume, the value should be set between 0 and 3. Default: 1.0 - - `sample_rate`: Sampling rate, choice: [0, 8000, 16000], the default is the same as the model. Default: 0 - - `output`: Output wave filepath. Default: None, which means not to save the audio to the local. - - Output: - ```bash - [2022-02-23 15:20:37,875] [ INFO] - {'description': 'success.'} - [2022-02-23 15:20:37,875] [ INFO] - Save synthesized audio successfully on output.wav. - [2022-02-23 15:20:37,875] [ INFO] - Audio duration: 3.612500 s. - [2022-02-23 15:20:37,875] [ INFO] - Response time: 0.348050 s. - - ``` - -- Python API - ```python - from paddlespeech.server.bin.paddlespeech_client import TTSClientExecutor - import json - - ttsclient_executor = TTSClientExecutor() - res = ttsclient_executor( - input="您好,欢迎使用百度飞桨语音合成服务。", - server_ip="127.0.0.1", - port=8090, - spk_id=0, - speed=1.0, - volume=1.0, - sample_rate=0, - output="./output.wav") - - response_dict = res.json() - print(response_dict["message"]) - print("Save synthesized audio successfully on %s." % (response_dict['result']['save_path'])) - print("Audio duration: %f s." %(response_dict['result']['duration'])) - ``` - - Output: - ```bash - {'description': 'success.'} - Save synthesized audio successfully on ./output.wav. - Audio duration: 3.612500 s. - - ``` - -### 6. CLS Client Usage -**Note:** The response time will be slightly longer when using the client for the first time -- Command Line (Recommended) - ``` - paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input ./zh.wav - ``` - - Usage: - - ```bash - paddlespeech_client cls --help - ``` - Arguments: - - `server_ip`: server ip. Default: 127.0.0.1 - - `port`: server port. Default: 8090 - - `input`(required): Audio file to be classified. - - `topk`: topk scores of classification result. - - Output: - ```bash - [2022-03-09 20:44:39,974] [ INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}} - [2022-03-09 20:44:39,975] [ INFO] - Response time 0.104360 s. - - - ``` - -- Python API - ```python - from paddlespeech.server.bin.paddlespeech_client import CLSClientExecutor - import json - - clsclient_executor = CLSClientExecutor() - res = clsclient_executor( - input="./zh.wav", - server_ip="127.0.0.1", - port=8090, - topk=1) - print(res.json()) - ``` - - Output: - ```bash - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}} - - ``` - - -## Models supported by the service -### ASR model -Get all models supported by the ASR service via `paddlespeech_server stats --task asr`, where static models can be used for paddle inference inference. - -### TTS model -Get all models supported by the TTS service via `paddlespeech_server stats --task tts`, where static models can be used for paddle inference inference. - -### CLS model -Get all models supported by the CLS service via `paddlespeech_server stats --task cls`, where static models can be used for paddle inference inference. + [2022-04-21 15:59:03,904] [ INFO] - receive msg={"status": "ok", "signal": "server_ready"} + [2022-04-21 15:59:03,960] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,973] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:03,987] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,000] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,012] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,024] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,036] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,047] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,607] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,620] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,633] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,645] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,657] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,669] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:04,680] [ INFO] - receive msg={'asr_results': ''} + [2022-04-21 15:59:05,176] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,185] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,192] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,200] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,208] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,216] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,224] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,232] [ INFO] - receive msg={'asr_results': '我认为跑'} + [2022-04-21 15:59:05,724] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,732] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,740] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,747] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,755] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,763] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:05,770] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的'} + [2022-04-21 15:59:06,271] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,279] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,287] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,294] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,302] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,310] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,318] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,326] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是'} + [2022-04-21 15:59:06,833] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,842] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,850] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,858] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,866] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,874] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:06,882] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给'} + [2022-04-21 15:59:07,400] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,408] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,416] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,424] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,432] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,440] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,447] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,455] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了'} + [2022-04-21 15:59:07,984] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:07,992] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,001] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,008] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,016] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} + [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 + ``` \ No newline at end of file diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 522c7863ecb..d7858be6a5e 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -35,7 +35,7 @@ from paddlespeech.server.utils.util import wav2base64 __all__ = [ - 'TTSClientExecutor', 'ASRClientExecutor', 'ASRClientExecutor', + 'TTSClientExecutor', 'ASRClientExecutor', 'ASROnlineClientExecutor', 'CLSClientExecutor' ] diff --git a/paddlespeech/server/tests/asr/online/README.md b/paddlespeech/server/tests/asr/online/README.md new file mode 100644 index 00000000000..e1e4d9506bb --- /dev/null +++ b/paddlespeech/server/tests/asr/online/README.md @@ -0,0 +1,35 @@ +([简体中文](./README_cn.md)|English) + +# Speech Service + +## Introduction + +This document introduces a client for streaming asr service: microphone + + +## Usage +### 1. Install +Refer [Install](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + + **paddlepaddle 2.2.1** 或以上版本。 +It is recommended to use **paddlepaddle 2.2.1** or above. +You can choose one way from meduim and hard to install paddlespeech. + + +### 2. Prepare config File + + +The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. + +Here are sample files for thisASR client demo that can be downloaded: +```bash +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav +``` + +### 2. Streaming ASR Client Usage + +- microphone + ``` + python microphone_client.py + + ``` diff --git a/paddlespeech/server/tests/asr/online/README_cn.md b/paddlespeech/server/tests/asr/online/README_cn.md new file mode 100644 index 00000000000..46dff250eaa --- /dev/null +++ b/paddlespeech/server/tests/asr/online/README_cn.md @@ -0,0 +1,42 @@ +([English](./README.md)|中文) + +# 语音服务 + +## 介绍 +本文档介绍如何使用流式ASR的一种不同客户端:麦克风。 + + +## 使用方法 +### 1. 安装 +请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + +推荐使用 **paddlepaddle 2.2.1** 或以上版本。 +你可以从 medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 + + +### 2. 准备测试文件 + +这个 ASR client 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。 + +可以下载此 ASR client的示例音频: +```bash +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav +``` + +### 2. 流式 ASR 客户端使用方法 + +- Python模拟流式服务命令行 + ``` + + # 流式ASR + paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8091 --input ./zh.wav + + ``` + + +- 麦克风 + ``` + # 直接调用麦克风设备 + python microphone_client.py + + ``` diff --git a/paddlespeech/server/tests/asr/online/microphone_client.py b/paddlespeech/server/tests/asr/online/microphone_client.py new file mode 100644 index 00000000000..2ceaf6d03a0 --- /dev/null +++ b/paddlespeech/server/tests/asr/online/microphone_client.py @@ -0,0 +1,161 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +record wave from the mic +""" +import asyncio +import json +import logging +import threading +import wave +from signal import SIGINT +from signal import SIGTERM + +import pyaudio +import websockets + + +class ASRAudioHandler(threading.Thread): + def __init__(self, url="127.0.0.1", port=8091): + threading.Thread.__init__(self) + self.url = url + self.port = port + self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr" + self.fileName = "./output.wav" + self.chunk = 5120 + self.format = pyaudio.paInt16 + self.channels = 1 + self.rate = 16000 + self._running = True + self._frames = [] + self.data_backup = [] + + def startrecord(self): + """ + start a new thread to record wave + """ + threading._start_new_thread(self.recording, ()) + + def recording(self): + """ + recording wave + """ + self._running = True + self._frames = [] + p = pyaudio.PyAudio() + stream = p.open( + format=self.format, + channels=self.channels, + rate=self.rate, + input=True, + frames_per_buffer=self.chunk) + while (self._running): + data = stream.read(self.chunk) + self._frames.append(data) + self.data_backup.append(data) + + stream.stop_stream() + stream.close() + p.terminate() + + def save(self): + """ + save wave data + """ + p = pyaudio.PyAudio() + wf = wave.open(self.fileName, 'wb') + wf.setnchannels(self.channels) + wf.setsampwidth(p.get_sample_size(self.format)) + wf.setframerate(self.rate) + wf.writeframes(b''.join(self.data_backup)) + wf.close() + p.terminate() + + def stoprecord(self): + """ + stop recording + """ + self._running = False + + async def run(self): + aa = input("是否开始录音? (y/n)") + if aa.strip() == "y": + self.startrecord() + logging.info("*" * 10 + "开始录音,请输入语音") + + async with websockets.connect(self.url) as ws: + # 发送开始指令 + audio_info = json.dumps( + { + "name": "test.wav", + "signal": "start", + "nbest": 5 + }, + sort_keys=True, + indent=4, + separators=(',', ': ')) + await ws.send(audio_info) + msg = await ws.recv() + logging.info("receive msg={}".format(msg)) + + # send bytes data + logging.info("结束录音请: Ctrl + c。继续请按回车。") + try: + while True: + while len(self._frames) > 0: + await ws.send(self._frames.pop(0)) + msg = await ws.recv() + logging.info("receive msg={}".format(msg)) + except asyncio.CancelledError: + # quit + # send finished + audio_info = json.dumps( + { + "name": "test.wav", + "signal": "end", + "nbest": 5 + }, + sort_keys=True, + indent=4, + separators=(',', ': ')) + await ws.send(audio_info) + msg = await ws.recv() + logging.info("receive msg={}".format(msg)) + + self.stoprecord() + logging.info("*" * 10 + "录音结束") + self.save() + elif aa.strip() == "n": + exit() + else: + print("无效输入!") + exit() + + +if __name__ == "__main__": + + logging.basicConfig(level=logging.INFO) + logging.info("asr websocket client start") + + handler = ASRAudioHandler("127.0.0.1", 8091) + loop = asyncio.get_event_loop() + main_task = asyncio.ensure_future(handler.run()) + for signal in [SIGINT, SIGTERM]: + loop.add_signal_handler(signal, main_task.cancel) + try: + loop.run_until_complete(main_task) + finally: + loop.close() + + logging.info("asr websocket client finished")