Skip to content

Commit

Permalink
Fix unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
chengduoZH committed Jul 9, 2018
1 parent 7bae8c8 commit 623f412
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,11 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
#ifdef PADDLE_WITH_CUDA
use_gpu = nccl_ctxs_ != nullptr;
#endif
bool insert_bcast_for_cpu =
!use_gpu && !strategy_.share_parameter_between_cards_;
bool insert_bcast_for_gpu = use_gpu;

if (use_gpu || !(use_gpu || strategy_.share_parameter_between_cards_)) {
if (insert_bcast_for_gpu || insert_bcast_for_cpu) {
// Insert BCast Ops
for (size_t dev_id = 0; dev_id < bcast_var_name_set.size(); ++dev_id) {
auto &to_bcast_set = bcast_var_name_set[dev_id];
Expand Down
16 changes: 14 additions & 2 deletions python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def check_network_convergence(self,
feed_dict=None,
seed=None,
use_parallel_executor=True,
balance_parameter_opt_between_cards=False):
use_reduce=False,
share_parameter_between_cards=False):
def run_executor(exe, feed, fetch_list, program=None):
if isinstance(exe, fluid.ParallelExecutor):
res = exe.run(fetch_list=fetch_list, feed=feed)
Expand All @@ -53,19 +54,30 @@ def run_executor(exe, feed, fetch_list, program=None):
with fluid.program_guard(main, startup):
if seed is not None:
startup.random_seed = seed
main.random_seed = seed

loss = method(use_feed=feed_dict is not None)
adam = fluid.optimizer.Adam()
adam.minimize(loss)

if memory_opt:
fluid.memory_optimize(main)

place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
startup_exe = fluid.Executor(place)
startup_exe.run(startup)
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.allow_op_delay = allow_op_delay

build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce if balance_parameter_opt_between_cards else fluid.BuildStrategy.ReduceStrategy.AllReduce
build_strategy.share_parameter_between_cards = share_parameter_between_cards
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce \
if use_reduce else fluid.BuildStrategy.ReduceStrategy.AllReduce

if share_parameter_between_cards:
assert use_reduce and not use_cuda, \
"If share_parameter_between_cards is True, reduce_strategy " \
"must be Reduce and use_cuda must be false."

if use_parallel_executor:
exe = fluid.ParallelExecutor(
Expand Down
61 changes: 36 additions & 25 deletions python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,9 @@ def setUpClass(cls):
MNIST_RECORDIO_FILE, reader, feeder)

def check_simple_fc_convergence(self,
balance_parameter_opt_between_cards,
use_cuda=True):
use_cuda,
use_reduce=False,
share_parameter_between_cards=False):
self.check_network_convergence(simple_fc_net, use_cuda=use_cuda)
self.check_network_convergence(
simple_fc_net, use_cuda=use_cuda, allow_op_delay=True)
Expand All @@ -115,20 +116,24 @@ def check_simple_fc_convergence(self,
feed_dict={"image": img,
"label": label},
use_cuda=use_cuda,
balance_parameter_opt_between_cards=balance_parameter_opt_between_cards
)
use_reduce=use_reduce,
share_parameter_between_cards=share_parameter_between_cards)

def test_simple_fc(self):
self.check_simple_fc_convergence(False, use_cuda=True)
self.check_simple_fc_convergence(False, use_cuda=False)
# use_cuda
self.check_simple_fc_convergence(True)
self.check_simple_fc_convergence(False)

def test_simple_fc_with_new_strategy(self):
self.check_simple_fc_convergence(True, use_cuda=True)
self.check_simple_fc_convergence(True, use_cuda=False)
# use_cuda, use_reduce, share_parameter_between_cards
self.check_simple_fc_convergence(True, True)
self.check_simple_fc_convergence(False, True)
self.check_simple_fc_convergence(False, True, True)

def check_simple_fc_parallel_accuracy(self,
balance_parameter_opt_between_cards,
use_cuda=True):
use_cuda,
use_reduce=False,
share_parameter_between_cards=False):
img = np.zeros(shape=[32, 784], dtype='float32')
label = np.ones(shape=[32, 1], dtype='int64')
single_first_loss, single_last_loss = self.check_network_convergence(
Expand All @@ -145,24 +150,28 @@ def check_simple_fc_parallel_accuracy(self,
"label": label},
use_cuda=use_cuda,
use_parallel_executor=True,
balance_parameter_opt_between_cards=balance_parameter_opt_between_cards
)
use_reduce=use_reduce,
share_parameter_between_cards=share_parameter_between_cards)

for p_f in parallel_first_loss:
self.assertAlmostEquals(p_f, single_first_loss[0], delta=1e-6)
for p_l in parallel_last_loss:
self.assertAlmostEquals(p_l, single_last_loss[0], delta=1e-6)

def test_simple_fc_parallel_accuracy(self):
self.check_simple_fc_parallel_accuracy(False, use_cuda=True)
self.check_simple_fc_parallel_accuracy(False, use_cuda=False)
self.check_simple_fc_parallel_accuracy(True)
self.check_simple_fc_parallel_accuracy(False)

def test_simple_fc_parallel_accuracy_with_new_strategy(self):
self.check_simple_fc_parallel_accuracy(True, use_cuda=True)
self.check_simple_fc_parallel_accuracy(True, use_cuda=False)

def check_batchnorm_fc_convergence(
self, balance_parameter_opt_between_cards, use_cuda):
# use_cuda, use_reduce, share_parameter_between_cards
# self.check_simple_fc_parallel_accuracy(True, True)
# self.check_simple_fc_parallel_accuracy(False, True)
self.check_simple_fc_parallel_accuracy(False, True, True)

def check_batchnorm_fc_convergence(self,
use_cuda,
use_reduce=False,
share_parameter_between_cards=False):
self.check_network_convergence(fc_with_batchnorm, use_cuda=use_cuda)
img = np.zeros(shape=[32, 784], dtype='float32')
label = np.ones(shape=[32, 1], dtype='int64')
Expand All @@ -171,16 +180,18 @@ def check_batchnorm_fc_convergence(
feed_dict={"image": img,
"label": label},
use_cuda=use_cuda,
balance_parameter_opt_between_cards=balance_parameter_opt_between_cards
)
use_reduce=use_reduce,
share_parameter_between_cards=share_parameter_between_cards)

def test_batchnorm_fc(self):
self.check_batchnorm_fc_convergence(False, use_cuda=True)
self.check_batchnorm_fc_convergence(False, use_cuda=False)
self.check_batchnorm_fc_convergence(True)
self.check_batchnorm_fc_convergence(False)

def test_batchnorm_fc_with_new_strategy(self):
self.check_batchnorm_fc_convergence(True, use_cuda=True)
self.check_batchnorm_fc_convergence(True, use_cuda=False)
# use_cuda, use_reduce, share_parameter_between_cards
self.check_batchnorm_fc_convergence(True, True)
self.check_batchnorm_fc_convergence(False, True)
self.check_batchnorm_fc_convergence(False, True, True)


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,9 @@ def SE_ResNeXt50Small(batch_size=2, use_feed=False):

class TestResnet(TestParallelExecutorBase):
def check_resnet_convergence(self,
balance_parameter_opt_between_cards,
use_cuda=True,
use_cuda,
use_reduce=False,
share_parameter_between_cards=False,
iter=20):
os.environ['CPU_NUM'] = str(4)

Expand All @@ -145,16 +146,18 @@ def check_resnet_convergence(self,
iter=iter,
batch_size=batch_size,
use_cuda=use_cuda,
balance_parameter_opt_between_cards=balance_parameter_opt_between_cards
)
use_reduce=use_reduce,
share_parameter_between_cards=share_parameter_between_cards)

def test_resnet(self):
self.check_resnet_convergence(False, use_cuda=True)
self.check_resnet_convergence(False, use_cuda=False, iter=5)
self.check_resnet_convergence(True)
self.check_resnet_convergence(False, iter=5)

def test_resnet_with_new_strategy(self):
self.check_resnet_convergence(True, use_cuda=True)
self.check_resnet_convergence(True, use_cuda=False, iter=5)
# use_cuda, use_reduce, share_parameter_between_cards
self.check_resnet_convergence(True, True)
self.check_resnet_convergence(False, True, iter=5)
self.check_resnet_convergence(False, True, True, iter=5)


if __name__ == '__main__':
Expand Down

0 comments on commit 623f412

Please sign in to comment.