Skip to content

Commit

Permalink
Merge pull request #248 from 2DIPW/dev-vits
Browse files Browse the repository at this point in the history
feat: 增加VITS文本转语音的支持
  • Loading branch information
wzpan authored Apr 24, 2023
2 parents 57a7518 + ca3566f commit e18bc67
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 1 deletion.
33 changes: 32 additions & 1 deletion robot/TTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pypinyin import lazy_pinyin
from pydub import AudioSegment
from abc import ABCMeta, abstractmethod
from .sdk import TencentSpeech, AliSpeech, XunfeiSpeech, atc
from .sdk import TencentSpeech, AliSpeech, XunfeiSpeech, atc, VITSClient
import requests
from xml.etree import ElementTree

Expand Down Expand Up @@ -408,6 +408,37 @@ def get_speech(self, phrase):
else:
logger.critical(f"{self.SLUG} 合成失败!", stack_info=True)

class VITS(AbstractTTS):
"""
VITS 语音合成
需要自行搭建vits-simple-api服务器:https://github.com/Artrajz/vits-simple-api
server_url : 服务器url,如http://127.0.0.1:23456
api_key : 若服务器配置了API Key,在此填入
speaker_id : 说话人ID,由所使用的模型决定
length : 调节语音长度,相当于调节语速,该数值越大语速越慢。
noise : 噪声
noisew : 噪声偏差
max : 分段阈值,按标点符号分段,加起来大于max时为一段文本。max<=0表示不分段。
timeout: 响应超时时间,根据vits-simple-api服务器性能不同配置合理的超时时间。
"""

SLUG = "VITS"

def __init__(self, server_url, api_key, speaker_id, length, noise, noisew, max, timeout, **args):
super(self.__class__, self).__init__()
self.server_url, self.api_key, self.speaker_id, self.length, self.noise, self.noisew, self.max, self.timeout = (
server_url, api_key, speaker_id, length, noise, noisew, max, timeout)

@classmethod
def get_config(cls):
return config.get("VITS", {})

def get_speech(self, phrase):
result = VITSClient.tts(phrase, self.server_url, self.api_key, self.speaker_id, self.length, self.noise,
self.noisew, self.max, self.timeout)
tmpfile = utils.write_temp_file(result, ".wav")
logger.info(f"{self.SLUG} 语音合成成功,合成路径:{tmpfile}")
return tmpfile

def get_engine_by_slug(slug=None):
"""
Expand Down
24 changes: 24 additions & 0 deletions robot/sdk/VITSClient.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# coding: utf-8
# !/usr/bin/env python3

"""VITS TTS API"""

import requests


def tts(text, server_url, api_key, speaker_id, length, noise, noisew, max, timeout):
data = {
"text": text,
"id": speaker_id,
"format": "wav",
"lang": "auto",
"length": length,
"noise": noise,
"noisew": noisew,
"max": max
}
headers = {"X-API-KEY": api_key}
url = f"{server_url}/voice"
res = requests.post(url=url, data=data, headers=headers, timeout=timeout)
res.raise_for_status()
return res.content
21 changes: 21 additions & 0 deletions static/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ lru_cache:
# azure-tts - 微软语音合成
# mac-tts - macOS 系统自带TTS(mac 系统推荐)
# edge-tts - 基于 Edge 的 TTS(推荐)
# VITS - 基于 VITS 的AI语音合成
tts_engine: edge-tts

# 语音识别服务配置
Expand Down Expand Up @@ -179,6 +180,26 @@ edge-tts:
# 中文推荐 `zh` 开头的音色
voice: zh-CN-XiaoxiaoNeural

# 基于 VITS 的AI语音合成
VITS:
# 需要自行搭建vits-simple-api服务器:https://github.com/Artrajz/vits-simple-api
# server_url: 服务器url(格式为http://{IP地址}:{端口},不带最后的斜杠),如http://127.0.0.1:23456
# api_key: 若服务器配置了API Key,在此填入
# speaker_id: 说话人ID,由所使用的模型决定
# length: 调节语音长度,相当于调节语速,该数值越大语速越慢。
# noise: 噪声
# noisew: 噪声偏差
# max: 分段阈值,按标点符号分段,加起来大于max时为一段文本。max<=0表示不分段。
# timeout: 响应超时时间(秒),根据vits-simple-api服务器性能不同配置合理的超时时间。
server_url: "http://127.0.0.1:23456"
api_key: "api_key"
speaker_id: 0
length: 1.0
noise: 0.667
noisew: 0.8
max: 50
timeout: 60

# NLU 引擎
# 可选值:
# unit - 百度 UNIT
Expand Down

0 comments on commit e18bc67

Please sign in to comment.