-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsynthesis.py
executable file
·79 lines (60 loc) · 2.14 KB
/
synthesis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import torch
import torch.nn as nn
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import time
import os
from fastspeech import FastSpeech
from text import text_to_sequence
import hparams as hp
import utils
import audio as Audio
import glow
import squeezewave.inference as sq_infer
import time
import sys
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def get_FastSpeech(num):
checkpoint_path = "checkpoint_" + str(num) + ".pth.tar"
model = nn.DataParallel(FastSpeech()).to(device)
model.load_state_dict(torch.load(os.path.join(
hp.checkpoint_path, checkpoint_path),map_location=device)['model'])
model.eval()
return model
def synthesis(model, text, alpha=1.0):
text = np.array(text_to_sequence(text, hp.text_cleaners))
text = np.stack([text])
src_pos = np.array([i+1 for i in range(text.shape[1])])
src_pos = np.stack([src_pos])
with torch.no_grad():
sequence = torch.autograd.Variable(
torch.from_numpy(text)).long()
src_pos = torch.autograd.Variable(
torch.from_numpy(src_pos)).long()
mel, mel_postnet = model.module.forward(sequence, src_pos, alpha=alpha)
#script for generating torch script
#traced_script_module = torch.jit.trace(model,(sequence,src_pos))
#traced_script_module.save("traced_fastspeech_model.pt")
return mel[0].cpu().transpose(0, 1), \
mel_postnet[0].cpu().transpose(0, 1), \
mel.transpose(1, 2), \
mel_postnet.transpose(1, 2)
if __name__ == "__main__":
# Test
num = 112000
alpha = 1.0
model = get_FastSpeech(num)
squeeze_wave = utils.get_squeezewave()
words = sys.argv[1]
start = time.time()
mel, mel_postnet, mel_torch, mel_postnet_torch = synthesis(
model, words, alpha=alpha)
fastspeech =time.time()
if not os.path.exists("results"):
os.mkdir("results")
sq_infer.inference(mel_postnet_torch, squeeze_wave, os.path.join(
"results", words + "_" + str(num) + "_squeezewave.wav"))
end = time.time()
print("Speech synthesis time: ")
print(end-start)