diff --git a/config/config-rnn-summarization.yml b/config/config-rnn-summarization.yml new file mode 100644 index 0000000000..3abbbcf51b --- /dev/null +++ b/config/config-rnn-summarization.yml @@ -0,0 +1,31 @@ +data: data/cnndm/CNNDM +save_model: models/cnndm +save_checkpoint_steps: 10000 +keep_checkpoint: 10 +seed: 3435 +train_steps: 100000 +valid_steps: 10000 +report_every: 100 + +encoder_type: brnn +word_vec_size: 128 +rnn_size: 512 +layers: 1 + +optim: adagrad +learning_rate: 0.15 +adagrad_accumulator_init: 0.1 +max_grad_norm: 2 + +batch_size: 16 +dropout: 0.0 + +copy_attn: 'true' +global_attention: mlp +reuse_copy_attn: 'true' +bridge: 'true' + +world_size: 2 +gpu_ranks: +- 0 +- 1 diff --git a/config/config-transformer-base-1GPU.yml b/config/config-transformer-base-1GPU.yml new file mode 100644 index 0000000000..90e36b06dd --- /dev/null +++ b/config/config-transformer-base-1GPU.yml @@ -0,0 +1,42 @@ +data: exp/dataset.de-en +save_model: exp/model.de-en +save_checkpoint_steps: 10000 +keep_checkpoint: 10 +seed: 3435 +train_steps: 500000 +valid_steps: 10000 +warmup_steps: 8000 +report_every: 100 + +decoder_type: transformer +encoder_type: transformer +word_vec_size: 512 +rnn_size: 512 +layers: 6 +transformer_ff: 2048 +heads: 8 + +accum_count: 8 +optim: adam +adam_beta1: 0.9 +adam_beta2: 0.998 +decay_method: noam +learning_rate: 2.0 +max_grad_norm: 0.0 + +batch_size: 4096 +batch_type: tokens +normalization: tokens +dropout: 0.1 +label_smoothing: 0.1 + +max_generator_batches: 2 + +param_init: 0.0 +param_init_glorot: 'true' +position_encoding: 'true' + +world_size: 1 +gpu_ranks: +- 0 + diff --git a/config/config-transformer-base-4GPU.yml b/config/config-transformer-base-4GPU.yml new file mode 100644 index 0000000000..bd40323525 --- /dev/null +++ b/config/config-transformer-base-4GPU.yml @@ -0,0 +1,45 @@ +data: exp/dataset.de-en +save_model: exp/model.de-en +save_checkpoint_steps: 10000 +keep_checkpoint: 10 +seed: 3435 +train_steps: 200000 +valid_steps: 10000 +warmup_steps: 8000 +report_every: 100 + +decoder_type: transformer +encoder_type: transformer +word_vec_size: 512 +rnn_size: 512 +layers: 6 +transformer_ff: 2048 +heads: 8 + +accum_count: 2 +optim: adam +adam_beta1: 0.9 +adam_beta2: 0.998 +decay_method: noam +learning_rate: 2.0 +max_grad_norm: 0.0 + +batch_size: 4096 +batch_type: tokens +normalization: tokens +dropout: 0.1 +label_smoothing: 0.1 + +max_generator_batches: 2 + +param_init: 0.0 +param_init_glorot: 'true' +position_encoding: 'true' + +world_size: 4 +gpu_ranks: +- 0 +- 1 +- 2 +- 3 +