-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add some dist-training robust cases into fluid benchmark test #11207
Changes from 5 commits
72bb214
3bd8f9e
2da0ef7
3bf93b3
8041e8d
4dd0ded
7e0afd5
e67392e
9c2e68d
d11e2bf
2da70cc
95cbb43
e140844
4779338
0a90eee
c950d22
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,10 @@ | |
import paddle.fluid.core as core | ||
import paddle.fluid.framework as framework | ||
from paddle.fluid.executor import Executor | ||
from models.model_base import get_decay_learning_rate | ||
from models.model_base import get_regularization | ||
from models.model_base import set_error_clip | ||
from models.model_base import set_gradient_clip | ||
|
||
|
||
def lstm_step(x_t, hidden_t_prev, cell_t_prev, size): | ||
|
@@ -50,7 +54,7 @@ def linear(inputs): | |
|
||
|
||
def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim, | ||
target_dict_dim, is_generating, beam_size, max_length): | ||
target_dict_dim, is_generating, beam_size, max_length, args): | ||
"""Construct a seq2seq network.""" | ||
|
||
def bi_lstm_encoder(input_seq, gate_size): | ||
|
@@ -99,6 +103,8 @@ def bi_lstm_encoder(input_seq, gate_size): | |
size=decoder_size, | ||
bias_attr=False, | ||
act='tanh') | ||
set_error_clip(args.error_clip_method, encoded_proj.name, | ||
args.error_clip_min, args.error_clip_max) | ||
|
||
def lstm_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, | ||
decoder_boot, decoder_size): | ||
|
@@ -211,12 +217,24 @@ def get_model(args): | |
dict_size, | ||
False, | ||
beam_size=beam_size, | ||
max_length=max_length) | ||
max_length=max_length, | ||
args=args) | ||
|
||
# clone from default main program | ||
inference_program = fluid.default_main_program().clone() | ||
|
||
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) | ||
# set gradient clip | ||
set_gradient_clip(args.gradient_clip_method, args.gradient_clip_norm) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a way that we can disable these settings if the args is empty? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if clip_method in args is None, these settings will be disabled, and if user do NOT specify the args the code was like below
|
||
|
||
optimizer = fluid.optimizer.Adam( | ||
learning_rate=get_decay_learning_rate( | ||
decay_method=args.learning_rate_decay_method, | ||
learning_rate=args.learning_rate, | ||
decay_steps=args.learning_rate_decay_steps, | ||
decay_rate=args.learning_rate_decay_rate), | ||
regularization=get_regularization( | ||
regularizer_method=args.weight_decay_regularizer_method, | ||
regularizer_coeff=args.weight_decay_regularizer_coeff)) | ||
|
||
train_batch_generator = paddle.batch( | ||
paddle.reader.shuffle( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import numpy as np | ||
import argparse | ||
|
||
import paddle.fluid as fluid | ||
from paddle.fluid.regularizer import L1DecayRegularizer | ||
from paddle.fluid.regularizer import L2DecayRegularizer | ||
from paddle.fluid.clip import GradientClipByNorm | ||
from paddle.fluid.clip import GradientClipByGlobalNorm | ||
from paddle.fluid.clip import ErrorClipByValue | ||
|
||
__all__ = [ | ||
'get_decay_learning_rate', | ||
'get_regularization', | ||
'set_error_clip', | ||
'set_gradient_clip', | ||
] | ||
|
||
|
||
def get_decay_learning_rate(decay_method, | ||
learning_rate=0.001, | ||
decay_steps=100000, | ||
decay_rate=0.5, | ||
staircase=True): | ||
if not decay_method: | ||
return learning_rate | ||
else: | ||
decay_op = getattr(fluid.layers, "%s_decay" % decay_method) | ||
return decay_op( | ||
learning_rate=learning_rate, | ||
decay_steps=decay_steps, | ||
decay_rate=decay_rate) | ||
|
||
|
||
def get_regularization(regularizer_method, regularizer_coeff=0.1): | ||
if not regularizer_method: | ||
return None | ||
else: | ||
RegularizerClazz = globals()["%sDecayRegularizer" % regularizer_method] | ||
regularizer = RegularizerClazz(regularization_coeff=regularizer_coeff) | ||
return regularizer | ||
|
||
|
||
def set_error_clip(clip_method, | ||
layer_name, | ||
clip_min=-1e-6, | ||
clip_max=2e-6, | ||
program=None): | ||
assert clip_min < clip_max | ||
if not clip_method: | ||
return None | ||
else: | ||
ClipClazz = globals()["ErrorClipBy%s" % clip_method] | ||
if not program: | ||
prog = fluid.default_main_program() | ||
else: | ||
prog = program | ||
prog.block(0).var(layer_name).set_error_clip( | ||
ClipClazz( | ||
max=clip_max, min=clip_min)) | ||
|
||
|
||
def set_gradient_clip(clip_method, clip_norm=1.): | ||
if not clip_method: | ||
return None | ||
else: | ||
ClipClazz = globals()["GradientClipBy%s" % clip_method] | ||
fluid.clip.set_gradient_clip(ClipClazz(clip_norm=clip_norm)) | ||
return clip_method |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
model_base is not uploaded?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for review, I added the benchmark/fluid/models/model_base.py file in next commit