-
Notifications
You must be signed in to change notification settings - Fork 0
/
customTextToSpeech.py
229 lines (151 loc) · 7.05 KB
/
customTextToSpeech.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import os
from dimits.utils import download, logger
from dimits.ttsmodel import TextToSpeechModel as ttsm
import requests
import shutil
from huggingface_hub import hf_hub_url,snapshot_download,hf_hub_download
import platform
import requests
from pathlib import Path
from huggingface_hub import HfFileSystem
class Dimits():
"""Dimits"""
def __init__(self, voice: str, verbose: bool = True, modelDirectory: str = None):
"""
Initialize a new instance of Dimits with the provided voice and verbosity.
Args:
voice (str): The voice to use for text-to-speech.
verbose (bool): Whether to print verbose output.
model (str): represents the local path to the model file. If not provided (i.e., None), the default behavior is to utilize the model hosted on GitHub.
Returns:
None
Example:
>>> from Dimits import Dimits
>>> dt = Dimits("en_US-amy-low")
"""
self.verbose = verbose
# Define the URL for the latest release of piper
arch = str(self._get_os())
if self._get_os() == 'unsupported_machine':
logger(err= f'{arch} Detected', verbose=verbose)
return
home = Path.home()
self.voice = voice
self.folder = 'piper'
self._repo_id = "rhasspy/piper-voices"
self.parent_destn = modelDirectory if modelDirectory is not None else os.path.join(home, self.folder)
if not os.path.exists(self.parent_destn):
os.mkdir(self.parent_destn)
# Download the voice if it does not exist
self._download_voice(voice)
# Set the path to the ONNX voice file
self.voice_onnx = voice
self.voice_onnx = os.path.join(
self.parent_destn, str(self.voice_onnx) + '.onnx')
logger('Using ' + str(self.voice_onnx), verbose=verbose)
@staticmethod
def _get_os() -> str:
if (platform.system() in ['Windows', 'Linux']):
return platform.system();
else:
return "unsupported_machine"
@staticmethod
def list_voice() -> list[str]:
"""
Method `list_voice()` Lists the supported voice as per in from https://api.github.com/repos/rhasspy/piper/releases
Args:
None
Returns:
list[str] : Supported voice
"""
fs = HfFileSystem()
return [_.split("/")[-1].split(".")[0] for _ in fs.glob(f"{self._repo_id}/**.onnx", detail=False)]
# return
def _download_voice(self, name, verbose: bool = True):
print(name)
locale, p, pitch = name.split('-')
lang, cc = locale.split("_")
filename = f"""{name}.onnx"""
filename2 = f"""{name}.onnx.json"""
filepath = os.path.join(self.parent_destn, filename)
filepath2 = os.path.join(self.parent_destn, filename2)
if not os.path.exists(filepath) or not os.path.exists(filepath2) :
url =hf_hub_url(repo_id=self._repo_id,filename=f"{lang}/{locale}/{p}/{pitch}/{name}.onnx")
url2 = hf_hub_url(repo_id=self._repo_id,filename=f"{lang}/{locale}/{p}/{pitch}/{name}.onnx.json")
download(url, filepath, filename, verbose)
download(url2, filepath2, filename2, verbose)
def text_2_audio_file(self, text: str, filename: str, directory: str, format: str = 'wav') -> str:
r"""
Convert the provided text to a human-sounding audio file using a text-to-speech engine, and save it to disk.
Args:
text (str): The text to be converted to audio.
filename (str): The name of the output audio file, without the file extension.
format (str): The file format of the output audio file (e.g. 'wav', 'mp3', 'ogg', etc.).
directory (str): The directory in which to save the output audio file.
Returns:
str: The path of audio file
Example:
>>> from Dimits import Dimits
>>> dt = ... #Initialize here
>>> dt.text_to_audio("Hello World", "hello_world", "wav", "/path/to/output/directory/")
>>> # Outputs an audio file named "hello_world.wav" in the directory "/path/to/output/directory/".
"""
if self._get_os() == 'unsupported_machine': return
if not os.path.exists(directory):
os.mkdir(directory)
filepath = os.path.join(directory, f'{filename}.{format}')
out_bin = ttsm(self.voice_onnx).synthesize(text,length_scale=1.0, noise_scale=1.0, noise_w=1.0)
return out_bin
def text_file_2_audio_file(self, text_file_path: str, audio_filename: str, directory: str, audio_format: str='wav', encoding='utf8', **kwargs) -> str:
"""
Convert the provided text files to audio file
Args:
text_file_path (str): Path of the file to be read
audio_filename (str): The name of the audio file to be saved
directory (str): The directory to which audio file is extracted
audio_format (str): The format of audio file
encoding (str): encoding of the text file to be read utf8, utf16
Returns:
str: audio file path
Example:
>>> from dimits import Dimits
>>> dd = ...
>>> dd.text_file_2_audio_file(file_path,'hello', dir,)
"""
if self._get_os() == 'unsupported_machine': return
with open(text_file_path, encoding=encoding) as f:
return self.text_2_audio_file(f.readline(), audio_filename, directory, audio_format, **kwargs)
def text_2_speech(self, text: str, engine: str = None , **kwargs: str) -> None:
r"""
Convert the provided text to human-sounding audio and play the audio file on-the-go.
Args:
text (str): The text to be converted to audio.
engine (str): The text-to-speech engine to play the audio file .
Defaults to 'aplay'.
Returns:
None
Example:
>>> from dimits import Dimits
>>> dt = ... #Initialize here
>>> dt.text_2_speech("Hello World", "aplay"")
>>> # plays the audio to the default audio output device using the 'alsa' engine.
"""
if self._get_os() == 'unsupported_machine': return
cache_dir = os.path.join(self.parent_destn, 'cache')
if os.path.isdir(cache_dir):
shutil.rmtree(cache_dir)
os.mkdir(cache_dir)
filepath = self.text_2_audio_file(
text, 'main', directory=cache_dir, )
if platform.system() == 'Linux':
os.system(f'killall {engine}')
if engine == None: engine = 'aplay'
if engine == 'aplay':
os.system(f'{engine} {filepath}')
else:
pass
elif platform.system() == 'Windows':
if engine == None: engine = 'System.Media.SoundPlayer'
if engine == 'System.Media.SoundPlayer':
cmd = f"""powershell (New-Object {engine} {filepath}).PlaySync()"""
os.system(cmd )