Skip to content

Commit

Permalink
Merge pull request #30 from santi-pdp/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
JianyuanZhong authored Aug 29, 2019
2 parents 40ca27e + 523cfe5 commit 71396db
Show file tree
Hide file tree
Showing 9 changed files with 456 additions and 91 deletions.
110 changes: 110 additions & 0 deletions cfg/workers5reg_r3.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
{
"regr":[
{"num_outputs":1,
"dropout":0,
"hidden_layers": 1,
"name":"cchunk",
"type":"decoder",
"hidden_size": 64,
"fmaps":[512,256,128],
"strides":[4,4,10],
"kwidths":[30,30,30],
"loss":"L1Loss"
},
{"num_outputs":1025,
"dropout":0,
"hidden_size": 256,
"hidden_layers": 1,
"name":"lps",
"r":3,
"loss":"MSELoss",
"skip":false
},
{"num_outputs":40,
"dropout":0,
"hidden_size": 256,
"hidden_layers": 1,
"name":"fbank",
"r":3,
"loss":"MSELoss",
"skip":false
},
{"num_outputs":40,
"dropout":0,
"hidden_size": 256,
"hidden_layers": 1,
"name":"gtn",
"r":3,
"loss":"MSELoss",
"skip":false
},
{"num_outputs":20,
"dropout":0,
"hidden_size": 256,
"hidden_layers": 1,
"name":"mfcc",
"r":3,
"loss":"MSELoss",
"skip":false
},
{"num_outputs":4,
"dropout":0,
"hidden_size": 256,
"hidden_layers": 1,
"name":"prosody",
"r":3,
"loss":"MSELoss",
"skip":false
}
],
"cls":[
{"num_outputs":1,
"dropout":0,
"hidden_size": 256,
"hidden_layers": 1,
"name":"spc",
"type":"spc",
"loss":"BCEWithLogitsLoss",
"skip":false
},
{"num_outputs":1,
"dropout":0,
"hidden_size": 256,
"hidden_layers": 1,
"name":"mi",
"loss":"BCEWithLogitsLoss",
"skip":false,
"keys":["chunk",
"chunk_ctxt",
"chunk_rand"]
},
{"num_outputs":1,
"dropout":0,
"hidden_size": 256,
"hidden_layers": 1,
"name":"cmi",
"loss":"BCEWithLogitsLoss",
"skip":false,
"keys":["chunk",
"chunk_ctxt",
"chunk_rand"]
},
{"num_outputs":1,
"dropout":0,
"hidden_size": 256,
"hidden_layers": 1,
"name":"overlap",
"loss":"BCEWithLogitsLoss",
"skip":false
},
{"num_outputs":1,
"dropout":0,
"hidden_size": 256,
"hidden_layers": 1,
"name":"gap",
"type":"gap",
"loss":"BCEWithLogitsLoss",
"skip":false
}
]
}
13 changes: 11 additions & 2 deletions make_trainset_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,13 @@ def build_dataset_providers(opts):
#KaldiMFCC(kaldi_root=opts.kaldi_root, hop=opts.hop_size, win=opts.win_size),
#KaldiPLP(kaldi_root=opts.kaldi_root, hop=opts.hop_size, win=opts.win_size),
#Prosody(hop=opts.hop_size)
LPS(hop=opts.LPS_hop,win=opts.LPS_win),
LPS(hop=opts.LPS_hop,win=opts.LPS_win,der_order=opts.LPS_der_order),
Gammatone(hop=opts.gammatone_hop,win=opts.gammatone_win,der_order=opts.gammatone_der_order),
#LPC(hop=opts.LPC_hop),
FBanks(hop=opts.fbanks_hop,win=opts.fbanks_win,der_order=opts.fbanks_der_order),
MFCC(hop=opts.mfccs_hop,win=opts.mfccs_win,order=opts.mfccs_order,der_order=opts.mfccs_der_order),
KaldiMFCC(kaldi_root=opts.kaldi_root, hop=opts.kaldimfccs_hop, win=opts.kaldimfccs_win,num_mel_bins=opts.kaldimfccs_num_mel_bins,num_ceps=opts.kaldimfccs_num_ceps,der_order=opts.kaldimfccs_der_order),
MFCC_librosa(hop=opts.mfccs_librosa_hop,win=opts.mfccs_librosa_win,order=opts.mfccs_librosa_order,der_order=opts.mfccs_librosa_der_order,n_mels=opts.mfccs_librosa_n_mels,htk=opts.mfccs_librosa_htk),
#KaldiMFCC(kaldi_root=opts.kaldi_root, hop=opts.kaldimfccs_hop, win=opts.kaldimfccs_win,num_mel_bins=opts.kaldimfccs_num_mel_bins,num_ceps=opts.kaldimfccs_num_ceps,der_order=opts.kaldimfccs_der_order),
#KaldiPLP(kaldi_root=opts.kaldi_root, hop=opts.kaldiplp_hop, win=opts.kaldiplp_win),
Prosody(hop=opts.prosody_hop, win=opts.prosody_win)
])
Expand Down Expand Up @@ -112,6 +113,7 @@ def extract_stats(opts):
# setting hop/wlen for each features
parser.add_argument('--LPS_hop', type=int, default=160)
parser.add_argument('--LPS_win', type=int, default=400)
parser.add_argument('--LPS_der_order', type=int, default=0)
parser.add_argument('--gammatone_hop', type=int, default=160)
parser.add_argument('--gammatone_win', type=int, default=400)
parser.add_argument('--gammatone_der_order', type=int, default=0)
Expand All @@ -133,6 +135,13 @@ def extract_stats(opts):
parser.add_argument('--kaldimfccs_num_ceps', type=int, default=20)
parser.add_argument('--kaldiplp_hop', type=int, default=160)
parser.add_argument('--kaldiplp_win', type=int, default=400)

parser.add_argument('--mfccs_librosa_hop', type=int, default=160)
parser.add_argument('--mfccs_librosa_win', type=int, default=400)
parser.add_argument('--mfccs_librosa_order', type=int, default=20)
parser.add_argument('--mfccs_librosa_der_order', type=int, default=0)
parser.add_argument('--mfccs_librosa_n_mels', type=int, default=40)
parser.add_argument('--mfccs_librosa_htk', type=int, default=True)


parser.add_argument('--ihm2sdm', type=str, default=None,
Expand Down
1 change: 1 addition & 0 deletions pase/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(self, batching_keys=['cchunk',
'gtn',
'fbank',
'mfcc',
'mfcc_librosa',
'prosody',
'kaldimfcc',
'kaldiplp'],
Expand Down
41 changes: 37 additions & 4 deletions pase/losses.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,39 @@
import torch
import torch.nn as nn


class RegressionLoss(object):
import torch.nn.functional as F


class ContextualizedLoss(object):
""" With a possible composition of r
consecutive frames
"""

def __init__(self, criterion, r=None):
self.criterion = criterion
self.r = r

def contextualize_r(self, tensor):
if self.r is None:
return tensor
assert isinstance(self.r, int), type(self.r)
# ensure it is a 3-D tensor
assert len(tensor.shape) == 3, tensor.shape
# pad tensor in the edges with zeros
pad_ = F.pad(tensor, (self.r // 2, self.r // 2))
pt = []
# Santi:
# TODO: improve this with some proper transposition and stuff
# rather than looping, at the expense of more memory I guess
for t in range(pad_.size(2) - (self.r - 1)):
chunk = pad_[:, :, t:t+self.r].contiguous().view(pad_.size(0),
-1).unsqueeze(2)
pt.append(chunk)
pt = torch.cat(pt, dim=2)
return pt

def __call__(self, pred, gtruth):
loss = self.criterion(pred, gtruth)
gtruth_r = self.contextualize_r(gtruth)
loss = self.criterion(pred, gtruth_r)
return loss


Expand Down Expand Up @@ -184,3 +212,8 @@ def forward(self, iteration, x_fake, x_real,
else:
return {'g_loss':g_real_loss}

if __name__ == '__main__':
loss = ContextualizedLoss(nn.MSELoss(), r=3)
pred = torch.randn(1, 3, 5)
gtruth= torch.randn(1, 1, 5)
loss(pred, gtruth)
8 changes: 6 additions & 2 deletions pase/models/Minions/minions.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ def __init__(self, num_inputs,
loss=None,
loss_weight=1.,
keys=None,
r=1,
name='MLPMinion'):
super().__init__(name=name)
# Implemented with Conv1d layers to not
Expand All @@ -446,7 +447,6 @@ def __init__(self, num_inputs,
assert context % 2 != 0, context
self.context = context
self.tie_context_weights = tie_context_weights
self.num_outputs = num_outputs
self.dropout = dropout
self.skip = skip
self.hidden_size = hidden_size
Expand All @@ -456,6 +456,10 @@ def __init__(self, num_inputs,
self.keys = keys
if keys is None:
keys = [name]
# r frames predicted at once in the output
self.r = r
# multiplies number of output dims
self.num_outputs = num_outputs * r
self.blocks = nn.ModuleList()
ninp = num_inputs
for hi in range(hidden_layers):
Expand All @@ -468,7 +472,7 @@ def __init__(self, num_inputs,
# in case context has been assigned,
# it is overwritten to 1
context = 1
self.W = nn.Conv1d(ninp, num_outputs, context,
self.W = nn.Conv1d(ninp, self.num_outputs, context,
padding=context//2)
self.sg = ScaleGrad()

Expand Down
8 changes: 5 additions & 3 deletions pase/models/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,13 @@ def format_frontend_output(y, data_fmt, mode):

def build_rnn_block(in_size, rnn_size, rnn_layers, rnn_type,
bidirectional=True,
dropout=0):
dropout=0, use_cuda=True):
if (rnn_type.lower() == 'qrnn') and QRNN is not None:
if bidirectional:
print('WARNING: QRNN ignores bidirectional flag')
rnn_size = 2 * rnn_size
rnn = QRNN(in_size, rnn_size, rnn_layers, dropout=dropout, window=2)
rnn = QRNN(in_size, rnn_size, rnn_layers, dropout=dropout, window=2,
use_cuda=use_cuda)
elif rnn_type.lower() == 'lstm' or rnn_type.lower() == 'gru':
rnn = getattr(nn, rnn_type.upper())(in_size, rnn_size, rnn_layers,
dropout=dropout,
Expand Down Expand Up @@ -442,7 +443,8 @@ def __init__(self, ninp, fmaps,

def forward(self, x):
h = self.deconv(x)
if self.stride % 2 != 0 and self.kwidth % 2 == 0: # and self.stride > self.kwidth:
if (self.stride % 2 != 0 and self.kwidth % 2 == 0) or \
(self.stride % 2 == 0 and self.kwidth % 2 != 0): # and self.stride > self.kwidth:
h = h[:, :, :-1]
h = forward_norm(h, self.norm)
h = forward_activation(self.act, h)
Expand Down
Loading

0 comments on commit 71396db

Please sign in to comment.