-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[TTS]add starganv2 vc trainer (#3143)
* add starganv2 vc trainer * fix StarGANv2VCUpdater and losses * fix StarGANv2VCEvaluator * add some typehint
- Loading branch information
1 parent
54ef90f
commit 72aa19c
Showing
6 changed files
with
911 additions
and
97 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,123 @@ | ||
generator_params: | ||
########################################################### | ||
# FEATURE EXTRACTION SETTING # | ||
########################################################### | ||
# 其实没用上,其实用的是 16000 | ||
sr: 24000 | ||
n_fft: 2048 | ||
win_length: 1200 | ||
hop_length: 300 | ||
n_mels: 80 | ||
########################################################### | ||
# MODEL SETTING # | ||
########################################################### | ||
generator_params: | ||
dim_in: 64 | ||
style_dim: 64 | ||
max_conv_dim: 512 | ||
w_hpf: 0 | ||
F0_channel: 256 | ||
mapping_network_params: | ||
mapping_network_params: | ||
num_domains: 20 # num of speakers in StarGANv2 | ||
latent_dim: 16 | ||
style_dim: 64 # same as style_dim in generator_params | ||
hidden_dim: 512 # same as max_conv_dim in generator_params | ||
style_encoder_params: | ||
style_encoder_params: | ||
dim_in: 64 # same as dim_in in generator_params | ||
style_dim: 64 # same as style_dim in generator_params | ||
num_domains: 20 # same as num_domains in generator_params | ||
max_conv_dim: 512 # same as max_conv_dim in generator_params | ||
discriminator_params: | ||
discriminator_params: | ||
dim_in: 64 # same as dim_in in generator_params | ||
num_domains: 20 # same as num_domains in mapping_network_params | ||
max_conv_dim: 512 # same as max_conv_dim in generator_params | ||
n_repeat: 4 | ||
|
||
asr_params: | ||
input_dim: 80 | ||
hidden_dim: 256 | ||
n_token: 80 | ||
token_embedding_dim: 256 | ||
|
||
########################################################### | ||
# ADVERSARIAL LOSS SETTING # | ||
########################################################### | ||
loss_params: | ||
g_loss: | ||
lambda_sty: 1. | ||
lambda_cyc: 5. | ||
lambda_ds: 1. | ||
lambda_norm: 1. | ||
lambda_asr: 10. | ||
lambda_f0: 5. | ||
lambda_f0_sty: 0.1 | ||
lambda_adv: 2. | ||
lambda_adv_cls: 0.5 | ||
norm_bias: 0.5 | ||
d_loss: | ||
lambda_reg: 1. | ||
lambda_adv_cls: 0.1 | ||
lambda_con_reg: 10. | ||
|
||
adv_cls_epoch: 50 | ||
con_reg_epoch: 30 | ||
|
||
|
||
########################################################### | ||
# DATA LOADER SETTING # | ||
########################################################### | ||
batch_size: 5 # Batch size. | ||
num_workers: 2 # Number of workers in DataLoader. | ||
|
||
########################################################### | ||
# OPTIMIZER & SCHEDULER SETTING # | ||
########################################################### | ||
generator_optimizer_params: | ||
beta1: 0.0 | ||
beta2: 0.99 | ||
weight_decay: 1e-4 | ||
epsilon: 1e-9 | ||
generator_scheduler_params: | ||
max_learning_rate: 2e-4 | ||
phase_pct: 0.0 | ||
divide_factor: 1 | ||
total_steps: 200000 # train_max_steps | ||
end_learning_rate: 2e-4 | ||
style_encoder_optimizer_params: | ||
beta1: 0.0 | ||
beta2: 0.99 | ||
weight_decay: 1e-4 | ||
epsilon: 1e-9 | ||
style_encoder_scheduler_params: | ||
max_learning_rate: 2e-4 | ||
phase_pct: 0.0 | ||
divide_factor: 1 | ||
total_steps: 200000 # train_max_steps | ||
end_learning_rate: 2e-4 | ||
mapping_network_optimizer_params: | ||
beta1: 0.0 | ||
beta2: 0.99 | ||
weight_decay: 1e-4 | ||
epsilon: 1e-9 | ||
mapping_network_scheduler_params: | ||
max_learning_rate: 2e-6 | ||
phase_pct: 0.0 | ||
divide_factor: 1 | ||
total_steps: 200000 # train_max_steps | ||
end_learning_rate: 2e-6 | ||
discriminator_optimizer_params: | ||
beta1: 0.0 | ||
beta2: 0.99 | ||
weight_decay: 1e-4 | ||
epsilon: 1e-9 | ||
discriminator_scheduler_params: | ||
max_learning_rate: 2e-4 | ||
phase_pct: 0.0 | ||
divide_factor: 1 | ||
total_steps: 200000 # train_max_steps | ||
end_learning_rate: 2e-4 | ||
|
||
########################################################### | ||
# TRAINING SETTING # | ||
########################################################### | ||
max_epoch: 150 | ||
num_snapshots: 5 | ||
seed: 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.