Merge pull request #1737 from Honei/server

[asr][websocket]add streaming asr demo
PaddlePaddle · Apr 21, 2022 · 08e0cf2 · 08e0cf2
2 parents fb69086 + 56751a1
commit 08e0cf2
Show file tree

Hide file tree

Showing 37 changed files with 931 additions and 168 deletions.
diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md
diff --git a/demos/streaming_asr_server/README_cn.md b/demos/streaming_asr_server/README_cn.md
diff --git a/demos/streaming_asr_server/conf/ws_application.yaml b/demos/streaming_asr_server/conf/ws_application.yaml
@@ -0,0 +1,47 @@
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 0.0.0.0
+port: 8090
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# task choices = ['asr_online', 'tts_online']
+# protocol = ['websocket', 'http'] (only one can be selected).
+# websocket only support online engine type.
+protocol: 'websocket'
+engine_list: ['asr_online']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### ASR #########################################
+################### speech task: asr; engine_type: online #######################
+asr_online:
+    model_type: 'deepspeech2online_aishell'
+    am_model: # the pdmodel file of am static model [optional]
+    am_params:  # the pdiparams file of am static model [optional]
+    lang: 'zh'
+    sample_rate: 16000
+    cfg_path: 
+    decode_method: 
+    force_yes: True
+
+    am_predictor_conf:
+        device:  # set 'gpu:id' or 'cpu'
+        switch_ir_optim: True
+        glog_info: False  # True -> print glog
+        summary: True  # False -> do not show predictor config
+
+    chunk_buffer_conf:
+        frame_duration_ms: 80
+        shift_ms: 40
+        sample_rate: 16000
+        sample_width: 2
+        window_n: 7     # frame
+        shift_n: 4      # frame
+        window_ms: 20   # ms
+        shift_ms: 10    # ms
diff --git a/demos/streaming_asr_server/conf/ws_conformer_application.yaml b/demos/streaming_asr_server/conf/ws_conformer_application.yaml
@@ -0,0 +1,45 @@
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 0.0.0.0
+port: 8090
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# task choices = ['asr_online', 'tts_online']
+# protocol = ['websocket', 'http'] (only one can be selected).
+# websocket only support online engine type.
+protocol: 'websocket'
+engine_list: ['asr_online']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### ASR #########################################
+################### speech task: asr; engine_type: online #######################
+asr_online:
+    model_type: 'conformer_online_multicn'
+    am_model: # the pdmodel file of am static model [optional]
+    am_params:  # the pdiparams file of am static model [optional]
+    lang: 'zh'
+    sample_rate: 16000
+    cfg_path: 
+    decode_method: 
+    force_yes: True
+    device: # cpu or gpu:id
+    am_predictor_conf:
+        device:  # set 'gpu:id' or 'cpu'
+        switch_ir_optim: True
+        glog_info: False  # True -> print glog
+        summary: True  # False -> do not show predictor config
+
+    chunk_buffer_conf:
+        window_n: 7     # frame
+        shift_n: 4      # frame
+        window_ms: 25   # ms
+        shift_ms: 10    # ms
+        sample_rate: 16000
+        sample_width: 2
diff --git a/demos/streaming_asr_server/run.sh b/demos/streaming_asr_server/run.sh
@@ -0,0 +1,2 @@
+# start the streaming asr service
+paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml
diff --git a/demos/streaming_asr_server/test.sh b/demos/streaming_asr_server/test.sh
@@ -0,0 +1,5 @@
+# download the test wav
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav 
+
+# read the wav and pass it to service
+python3 websocket_client.py --wavfile ./zh.wav
diff --git a/...speech/server/tests/asr/online/web/app.py → demos/streaming_asr_server/web/app.py b/...speech/server/tests/asr/online/web/app.py → demos/streaming_asr_server/web/app.py
diff --git a/.../tests/asr/online/web/paddle_web_demo.png → ...eaming_asr_server/web/paddle_web_demo.png b/.../tests/asr/online/web/paddle_web_demo.png → ...eaming_asr_server/web/paddle_web_demo.png
diff --git a/...ech/server/tests/asr/online/web/readme.md → demos/streaming_asr_server/web/readme.md b/...ech/server/tests/asr/online/web/readme.md → demos/streaming_asr_server/web/readme.md
diff --git a/...nline/web/static/css/font-awesome.min.css → ...erver/web/static/css/font-awesome.min.css b/...nline/web/static/css/font-awesome.min.css → ...erver/web/static/css/font-awesome.min.css
diff --git a/...tests/asr/online/web/static/css/style.css → ...aming_asr_server/web/static/css/style.css b/...tests/asr/online/web/static/css/style.css → ...aming_asr_server/web/static/css/style.css
diff --git a/...r/online/web/static/fonts/FontAwesome.otf → ...r_server/web/static/fonts/FontAwesome.otf b/...r/online/web/static/fonts/FontAwesome.otf → ...r_server/web/static/fonts/FontAwesome.otf
diff --git a/.../web/static/fonts/fontawesome-webfont.eot → .../web/static/fonts/fontawesome-webfont.eot b/.../web/static/fonts/fontawesome-webfont.eot → .../web/static/fonts/fontawesome-webfont.eot
diff --git a/.../web/static/fonts/fontawesome-webfont.svg → .../web/static/fonts/fontawesome-webfont.svg b/.../web/static/fonts/fontawesome-webfont.svg → .../web/static/fonts/fontawesome-webfont.svg
diff --git a/.../web/static/fonts/fontawesome-webfont.ttf → .../web/static/fonts/fontawesome-webfont.ttf b/.../web/static/fonts/fontawesome-webfont.ttf → .../web/static/fonts/fontawesome-webfont.ttf
diff --git a/...web/static/fonts/fontawesome-webfont.woff → ...web/static/fonts/fontawesome-webfont.woff b/...web/static/fonts/fontawesome-webfont.woff → ...web/static/fonts/fontawesome-webfont.woff
diff --git a/...eb/static/fonts/fontawesome-webfont.woff2 → ...eb/static/fonts/fontawesome-webfont.woff2 b/...eb/static/fonts/fontawesome-webfont.woff2 → ...eb/static/fonts/fontawesome-webfont.woff2
diff --git a/...ne/web/static/image/PaddleSpeech_logo.png → ...er/web/static/image/PaddleSpeech_logo.png b/...ne/web/static/image/PaddleSpeech_logo.png → ...er/web/static/image/PaddleSpeech_logo.png
diff --git a/...line/web/static/image/voice-dictation.svg → ...rver/web/static/image/voice-dictation.svg b/...line/web/static/image/voice-dictation.svg → ...rver/web/static/image/voice-dictation.svg
diff --git a/...r/online/web/static/js/SoundRecognizer.js → ...r_server/web/static/js/SoundRecognizer.js b/...r/online/web/static/js/SoundRecognizer.js → ...r_server/web/static/js/SoundRecognizer.js
diff --git a/.../online/web/static/js/jquery-3.2.1.min.js → ..._server/web/static/js/jquery-3.2.1.min.js b/.../online/web/static/js/jquery-3.2.1.min.js → ..._server/web/static/js/jquery-3.2.1.min.js
diff --git a/...line/web/static/js/recorder/engine/mp3.js → ...rver/web/static/js/recorder/engine/mp3.js b/...line/web/static/js/recorder/engine/mp3.js → ...rver/web/static/js/recorder/engine/mp3.js
diff --git a/...line/web/static/js/recorder/engine/pcm.js → ...rver/web/static/js/recorder/engine/pcm.js b/...line/web/static/js/recorder/engine/pcm.js → ...rver/web/static/js/recorder/engine/pcm.js
diff --git a/...line/web/static/js/recorder/engine/wav.js → ...rver/web/static/js/recorder/engine/wav.js b/...line/web/static/js/recorder/engine/wav.js → ...rver/web/static/js/recorder/engine/wav.js
diff --git a/...er/extensions/frequency.histogram.view.js → ...er/extensions/frequency.histogram.view.js b/...er/extensions/frequency.histogram.view.js → ...er/extensions/frequency.histogram.view.js
diff --git a/.../static/js/recorder/extensions/lib.fft.js → .../static/js/recorder/extensions/lib.fft.js b/.../static/js/recorder/extensions/lib.fft.js → .../static/js/recorder/extensions/lib.fft.js
diff --git a/...e/web/static/js/recorder/recorder-core.js → ...r/web/static/js/recorder/recorder-core.js b/...e/web/static/js/recorder/recorder-core.js → ...r/web/static/js/recorder/recorder-core.js
diff --git a/...er/tests/asr/online/web/static/paddle.ico → ...treaming_asr_server/web/static/paddle.ico b/...er/tests/asr/online/web/static/paddle.ico → ...treaming_asr_server/web/static/paddle.ico
diff --git a/...tests/asr/online/web/templates/index.html → ...aming_asr_server/web/templates/index.html b/...tests/asr/online/web/templates/index.html → ...aming_asr_server/web/templates/index.html
diff --git a/demos/streaming_asr_server/websocket_client.py b/demos/streaming_asr_server/websocket_client.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+import argparse
+import asyncio
+import codecs
+import logging
+import os
+
+from paddlespeech.cli.log import logger
+from paddlespeech.server.utils.audio_handler import ASRAudioHandler
+
+
+def main(args):
+    logger.info("asr websocket client start")
+    handler = ASRAudioHandler("127.0.0.1", 8090)
+    loop = asyncio.get_event_loop()
+
+    # support to process single audio file
+    if args.wavfile and os.path.exists(args.wavfile):
+        logger.info(f"start to process the wavscp: {args.wavfile}")
+        result = loop.run_until_complete(handler.run(args.wavfile))
+        result = result["asr_results"]
+        logger.info(f"asr websocket client finished : {result}")
+
+    # support to process batch audios from wav.scp 
+    if args.wavscp and os.path.exists(args.wavscp):
+        logging.info(f"start to process the wavscp: {args.wavscp}")
+        with codecs.open(args.wavscp, 'r', encoding='utf-8') as f,\
+             codecs.open("result.txt", 'w', encoding='utf-8') as w:
+            for line in f:
+                utt_name, utt_path = line.strip().split()
+                result = loop.run_until_complete(handler.run(utt_path))
+                result = result["asr_results"]
+                w.write(f"{utt_name} {result}\n")
+
+
+if __name__ == "__main__":
+    logger.info("Start to do streaming asr client")
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--wavfile",
+        action="store",
+        help="wav file path ",
+        default="./16_audio.wav")
+    parser.add_argument(
+        "--wavscp", type=str, default=None, help="The batch audios dict text")
+    args = parser.parse_args()
+
+    main(args)
diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py
@@ -30,11 +30,14 @@
 from ..util import cli_client_register
 from ..util import stats_wrapper
 from paddlespeech.cli.log import logger
-from paddlespeech.server.tests.asr.online.websocket_client import ASRAudioHandler
+from paddlespeech.server.utils.audio_handler import ASRAudioHandler
 from paddlespeech.server.utils.audio_process import wav2pcm
 from paddlespeech.server.utils.util import wav2base64
 
-__all__ = ['TTSClientExecutor', 'ASRClientExecutor', 'CLSClientExecutor']
+__all__ = [
+    'TTSClientExecutor', 'ASRClientExecutor', 'ASROnlineClientExecutor',
+    'CLSClientExecutor'
+]
 
 
 @cli_client_register(
@@ -236,11 +239,11 @@ def __call__(self,
 @cli_client_register(
     name='paddlespeech_client.asr_online',
     description='visit asr online service')
-class ASRClientExecutor(BaseExecutor):
+class ASROnlineClientExecutor(BaseExecutor):
     def __init__(self):
-        super(ASRClientExecutor, self).__init__()
+        super(ASROnlineClientExecutor, self).__init__()
         self.parser = argparse.ArgumentParser(
-            prog='paddlespeech_client.asr', add_help=True)
+            prog='paddlespeech_client.asr_online', add_help=True)
         self.parser.add_argument(
             '--server_ip', type=str, default='127.0.0.1', help='server ip')
         self.parser.add_argument(
@@ -305,6 +308,7 @@ def __call__(self,
 
         return res['asr_results']
 
+
 @cli_client_register(
     name='paddlespeech_client.cls', description='visit cls service')
 class CLSClientExecutor(BaseExecutor):

diff --git a/paddlespeech/server/conf/ws_conformer_application.yaml b/paddlespeech/server/conf/ws_conformer_application.yaml
@@ -29,7 +29,7 @@ asr_online:
     cfg_path: 
     decode_method: 
     force_yes: True
-
+    device:  # cpu or gpu:id
     am_predictor_conf:
         device:  # set 'gpu:id' or 'cpu'
         switch_ir_optim: True

diff --git a/paddlespeech/server/engine/asr/online/asr_engine.py b/paddlespeech/server/engine/asr/online/asr_engine.py
@@ -1028,6 +1028,17 @@ def init(self, config: dict) -> bool:
         self.output = ""
         self.executor = ASRServerExecutor()
         self.config = config
+        try:
+            if self.config.get("device", None):
+                self.device = self.config.device
+            else:
+                self.device = paddle.get_device()
+            logger.info(f"paddlespeech_server set the device: {self.device}")
+            paddle.set_device(self.device)
+        except BaseException:
+            logger.error(
+                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
+            )
 
         self.executor._init_from_path(
             model_type=self.config.model_type,

diff --git a/paddlespeech/server/tests/asr/online/README.md b/paddlespeech/server/tests/asr/online/README.md
@@ -0,0 +1,35 @@
+([简体中文](./README_cn.md)|English)
+
+# Speech Service
+
+## Introduction
+
+This document introduces a client for streaming asr service: microphone
+
+
+## Usage
+### 1. Install
+Refer [Install](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
+
+ **paddlepaddle 2.2.1** 或以上版本。
+It is recommended to use **paddlepaddle 2.2.1** or above.
+You can choose one way from meduim and hard to install paddlespeech.
+
+
+### 2. Prepare config File
+
+
+The input of  ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
+
+Here are sample files for thisASR client demo that can be downloaded:
+```bash
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+```
+
+### 2. Streaming ASR Client Usage
+
+- microphone
+   ```
+   python microphone_client.py
+
+   ```
diff --git a/paddlespeech/server/tests/asr/online/README_cn.md b/paddlespeech/server/tests/asr/online/README_cn.md
@@ -1,9 +1,9 @@
-([简体中文](./README_cn.md)|English)
+([English](./README.md)|中文)
 
 # 语音服务
 
 ## 介绍
-本文档介绍如何使用流式ASR的三种不同客户端:网页、麦克风、Python模拟流式服务。 
+本文档介绍如何使用流式ASR的一种不同客户端:麦克风。 
 
 
 ## 使用方法
@@ -20,7 +20,7 @@
 
 可以下载此 ASR client的示例音频：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
 ```
 
 ### 2. 流式 ASR 客户端使用方法
@@ -40,10 +40,3 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
    python microphone_client.py
 
    ```
-
-
-- 网页
-   ```
-   # 进入web目录后参考相关readme.md
-
-   ```
diff --git a/paddlespeech/server/tests/asr/online/__init__.py b/paddlespeech/server/tests/asr/online/__init__.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# start the streaming asr service
		paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml