From 84d6434d53dbef47b5aa817c5ff25d236a59a83c Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 14 Aug 2017 20:58:57 +0800 Subject: [PATCH 01/21] Compare the gradient consistency between GPU and CPU calculations. --- paddle/operators/sigmoid_op.cc | 3 +- .../paddle/v2/framework/tests/CMakeLists.txt | 1 + .../v2/framework/tests/gradient_checker.py | 173 ++++++++---------- .../v2/framework/tests/test_sigmoid_op.py | 22 ++- 4 files changed, 98 insertions(+), 101 deletions(-) diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index a7dfb624e5b77..84601bd7334a9 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -44,7 +44,8 @@ class SigmoidOpGrad : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output(0)->Resize(ctx.Input(0)->dims()); + ctx.Output(framework::GradVarName("X")) + ->Resize(ctx.Input("Y")->dims()); } }; diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 96fad9b42e04a..4c088e7612a93 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -25,3 +25,4 @@ py_test(test_operator SRCS test_operator.py) # py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) py_test(test_uniform_random_op SRCS test_uniform_random_op.py) py_test(test_recurrent_op SRCS test_recurrent_op.py) +py_test(test_gradient_checker SRCS test_gradient_checker.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 501cf6110ff74..5f9e54837edae 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -1,6 +1,7 @@ import unittest import numpy +import itertools import paddle.v2.framework.core as core from paddle.v2.framework.op import Operator @@ -8,6 +9,7 @@ def create_op(op_type): + # TODO need to set attrs kwargs = dict() for in_name in Operator.get_op_input_names(op_type): kwargs[in_name] = in_name @@ -66,7 +68,6 @@ def get_numeric_gradient(op, local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace( )) - # TODO(yuyang18): Only CPU is support now. cpu_ctx = core.DeviceContext.create(core.CPUPlace()) def get_output(): @@ -109,12 +110,71 @@ def product(dim): class GradientChecker(unittest.TestCase): - def assert_is_close(self, numeric_grads, scope, max_relative_error, - msg_prefix): - for name in numeric_grads: - b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) - a = numeric_grads[name] + def get_grad(self, forward_op, backward_op, input_vars, grad_names, place): + scope = core.Scope() + ctx = core.DeviceContext.create(place) + inputs = forward_op.inputs() + in_names = [item for k in inputs for item in inputs[k]] + outputs = forward_op.outputs() + out_names = [item for k in outputs for item in outputs[k]] + + # create input var and set value + for name, value in input_vars.iteritems(): + if name not in in_names: + raise ValueError(name + "does not exist in Op's inputs.") + var = scope.new_var(name).get_tensor() + var.set_dims(value.shape) + var.set(value, place) + + # run forward op + for out_name in out_names: + scope.new_var(out_name) + forward_op.infer_shape(scope) + forward_op.run(scope, ctx) + + # set output var's shape + # set output grad to ones + for name in out_names: + out_tensor = scope.find_var(name).get_tensor() + grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() + grad_tensor.set_dims(out_tensor.shape()) + data = numpy.ones(out_tensor.shape(), dtype=numpy.float32) + grad_tensor.set(data, place) + + # run backward op + for name in backward_op.outputs(): + scope.new_var(name) + backward_op.infer_shape(scope) + backward_op.run(scope, ctx) + + outs = [ + numpy.array(scope.find_var(name).get_tensor()) + for name in grad_names + ] + return outs + + def compare_grad(self, forward_op, inputs): + backward_op = core.Operator.backward(forward_op, set()) + if not (core.is_compile_gpu() and backward_op.support_gpu()): + return + + outputs = backward_op.outputs() + out_names = [item for k in outputs for item in outputs[k]] + cpu_grads = self.get_grad(forward_op, backward_op, inputs, out_names, + core.CPUPlace()) + gpu_grads = self.get_grad(forward_op, backward_op, inputs, out_names, + core.GPUPlace(0)) + + for c_grad, g_grad, name in itertools.izip(cpu_grads, gpu_grads, + out_names): + self.assertTrue( + numpy.allclose(c_grad, g_grad), + "output name: " + name + " has diff") + + def assert_is_close(self, numeric_grads, analytic_grads, names, + max_relative_error, msg_prefix): + for a, b, name in itertools.izip(numeric_grads, analytic_grads, names): abs_a = numpy.abs(a) # if abs_a is nearly zero, then use abs error for a, not relative # error. @@ -159,106 +219,27 @@ def check_grad(self, inputs = forward_op.inputs() in_names = [item for k in inputs for item in inputs[k]] - outputs = forward_op.outputs() - out_names = [item for k in outputs for item in outputs[k]] - for no_grad in no_grad_set: if no_grad not in in_names: raise ValueError("no_grad should be in in_names") backward_op = core.Operator.backward(forward_op, no_grad_set) - bwd_outputs = backward_op.outputs() - bwd_out_names = [item for k in bwd_outputs for item in bwd_outputs[k]] - places = [core.CPUPlace()] if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu(): places.append(core.GPUPlace(0)) - numeric_grad = dict() - # get numeric gradient - for check_name in inputs_to_check: - numeric_grad[check_name] = \ - get_numeric_gradient(forward_op, input_vars, output_name, - check_name) + # get numerical gradients + numeric_grads = [ + get_numeric_gradient(forward_op, input_vars, output_name, name) + for name in inputs_to_check + ] - # get operator gradient according to different device + check_names = [grad_var_name(name) for name in inputs_to_check] for place in places: - scope = core.Scope() - ctx = core.DeviceContext.create(place) - - # create input var and set value - for name, value in input_vars.iteritems(): - if name not in in_names: - raise ValueError(name + " not in op.inputs_") - var = scope.new_var(name).get_tensor() - var.set_dims(value.shape) - var.set(value, place) - - # create output var - for out_name in out_names: - scope.new_var(out_name).get_tensor() - - # infer the shape of output var and compute/set value of output var - forward_op.infer_shape(scope) - forward_op.run(scope, ctx) - - # create output grad var - # set shape as the output var - # set value of this grad to ones - for name in out_names: - out_tensor = scope.find_var(name).get_tensor() - grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() - grad_tensor.set_dims(out_tensor.shape()) - data = 1.0 * numpy.ones(out_tensor.shape()) - grad_tensor.set(data, place) - - # create input grad var - for name in bwd_out_names: - scope.new_var(name).get_tensor() - - # infer the shape of input gradient var and compute/set it's value - # with backward op - backward_op.infer_shape(scope) - backward_op.run(scope, ctx) - - self.assert_is_close(numeric_grad, scope, max_relative_error, + # get analytical gradients according to different device + analytic_grads = self.get_grad(forward_op, backward_op, input_vars, + check_grad_names, place) + self.assert_is_close(numeric_grads, analytic_grads, check_names, + max_relative_error, "Gradient Check On %s" % str(place)) - - -if __name__ == '__main__': - - class GetNumericGradientTest(unittest.TestCase): - def test_add_op(self): - add_op = Operator('add_two', X="X", Y="Y", Out="Z") - x = numpy.random.random((10, 1)).astype("float32") - y = numpy.random.random((10, 1)).astype("float32") - - arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X') - self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2) - - def test_softmax_op(self): - def stable_softmax(x): - """Compute the softmax of vector x in a numerically stable way.""" - shiftx = x - numpy.max(x) - exps = numpy.exp(shiftx) - return exps / numpy.sum(exps) - - def label_softmax_grad(Y, dY): - dX = Y * 0.0 - for i in range(Y.shape[0]): - d = numpy.dot(Y[i, :], dY[i, :]) - dX[i, :] = Y[i, :] * (dY[i, :] - d) - return dX - - softmax_op = Operator("softmax", X="X", Y="Y") - - X = numpy.random.random((2, 2)).astype("float32") - Y = numpy.apply_along_axis(stable_softmax, 1, X) - dY = numpy.ones(Y.shape) - dX = label_softmax_grad(Y, dY) - - arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X') - numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2) - - unittest.main() diff --git a/python/paddle/v2/framework/tests/test_sigmoid_op.py b/python/paddle/v2/framework/tests/test_sigmoid_op.py index 2a57a41ed8b71..1a6d395be662b 100644 --- a/python/paddle/v2/framework/tests/test_sigmoid_op.py +++ b/python/paddle/v2/framework/tests/test_sigmoid_op.py @@ -1,6 +1,7 @@ import unittest -from op_test_util import OpTestMeta import numpy as np +from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op class TestSigmoidOp(unittest.TestCase): @@ -8,12 +9,25 @@ class TestSigmoidOp(unittest.TestCase): def setUp(self): self.type = "sigmoid" - self.inputs = {'X': np.random.random((32, 100)).astype("float32")} + self.inputs = {'X': np.random.random((15, 31)).astype("float32")} self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))} -#class TestSigmoidGradOp(unittest.TestCase): -#TODO(qingqing) add unit test +class TestSigmoidGradOp(GradientChecker): + def test_compare_grad(self): + op = create_op("sigmoid") + inputs = {"X": np.random.random((11, 17)).astype("float32")} + + # compare gpu and cpu results for backward op + self.compare_grad(op, inputs) + + def test_check_grad(self): + op = create_op("sigmoid") + inputs = {"X": np.random.random((11, 17)).astype("float32")} + + # check gradients + self.check_grad(op, inputs, set("X"), "Y") + if __name__ == '__main__': unittest.main() From 01d9134067852a1f9dfecf75f730f9fba14434e0 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 14 Aug 2017 21:01:24 +0800 Subject: [PATCH 02/21] Add test_gradient_checker.py --- .../framework/tests/test_gradient_checker.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 python/paddle/v2/framework/tests/test_gradient_checker.py diff --git a/python/paddle/v2/framework/tests/test_gradient_checker.py b/python/paddle/v2/framework/tests/test_gradient_checker.py new file mode 100644 index 0000000000000..e0b315120862b --- /dev/null +++ b/python/paddle/v2/framework/tests/test_gradient_checker.py @@ -0,0 +1,43 @@ +import unittest +import numpy +from paddle.v2.framework.op import Operator +from gradient_checker import GradientChecker +from gradient_checker import get_numeric_gradient + + +class GetNumericGradientTest(unittest.TestCase): + def test_add_op(self): + add_op = Operator('add_two', X="X", Y="Y", Out="Z") + x = numpy.random.random((10, 1)).astype("float32") + y = numpy.random.random((10, 1)).astype("float32") + + arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X') + self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-4) + + def test_softmax_op(self): + def stable_softmax(x): + """Compute the softmax of vector x in a numerically stable way.""" + shiftx = x - numpy.max(x) + exps = numpy.exp(shiftx) + return exps / numpy.sum(exps) + + def label_softmax_grad(Y, dY): + dX = Y * 0.0 + for i in range(Y.shape[0]): + d = numpy.dot(Y[i, :], dY[i, :]) + dX[i, :] = Y[i, :] * (dY[i, :] - d) + return dX + + softmax_op = Operator("softmax", X="X", Y="Y") + + X = numpy.random.random((2, 2)).astype("float32") + Y = numpy.apply_along_axis(stable_softmax, 1, X) + dY = numpy.ones(Y.shape) + dX = label_softmax_grad(Y, dY) + + arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X') + numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2) + + +if __name__ == '__main__': + unittest.main() From 9a0eedf5d4d32e0aaa80e554f608c56e6d36a798 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 14 Aug 2017 21:27:17 +0800 Subject: [PATCH 03/21] fix bug. --- python/paddle/v2/framework/tests/gradient_checker.py | 3 ++- python/paddle/v2/framework/tests/test_sigmoid_op.py | 11 +++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 5f9e54837edae..d251f14b9d158 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -156,6 +156,7 @@ def get_grad(self, forward_op, backward_op, input_vars, grad_names, place): def compare_grad(self, forward_op, inputs): backward_op = core.Operator.backward(forward_op, set()) + # return if not compile with GPU or not implementing GPU kernel if not (core.is_compile_gpu() and backward_op.support_gpu()): return @@ -239,7 +240,7 @@ def check_grad(self, for place in places: # get analytical gradients according to different device analytic_grads = self.get_grad(forward_op, backward_op, input_vars, - check_grad_names, place) + check_names, place) self.assert_is_close(numeric_grads, analytic_grads, check_names, max_relative_error, "Gradient Check On %s" % str(place)) diff --git a/python/paddle/v2/framework/tests/test_sigmoid_op.py b/python/paddle/v2/framework/tests/test_sigmoid_op.py index 1a6d395be662b..c3bd79f5dc0ba 100644 --- a/python/paddle/v2/framework/tests/test_sigmoid_op.py +++ b/python/paddle/v2/framework/tests/test_sigmoid_op.py @@ -17,15 +17,10 @@ class TestSigmoidGradOp(GradientChecker): def test_compare_grad(self): op = create_op("sigmoid") inputs = {"X": np.random.random((11, 17)).astype("float32")} - - # compare gpu and cpu results for backward op + # compare gpu and cpu results for backward op. + # skip this test if only compiling CPU version. self.compare_grad(op, inputs) - - def test_check_grad(self): - op = create_op("sigmoid") - inputs = {"X": np.random.random((11, 17)).astype("float32")} - - # check gradients + # check gradients self.check_grad(op, inputs, set("X"), "Y") From 8c653ba76a442a528c68240baf2d564971d5588d Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 16 Aug 2017 17:47:22 +0800 Subject: [PATCH 04/21] Complete remove std::shared_ptr --- paddle/framework/backward.cc | 40 +++++++++++++-------------- paddle/framework/op_registry.h | 11 ++++---- paddle/framework/op_registry_test.cc | 6 ++-- paddle/framework/pybind.cc | 37 +++++++++++-------------- paddle/operators/net_op.h | 41 +++++++++++++++++++++------- paddle/operators/net_op_test.cc | 23 +++++++--------- paddle/operators/recurrent_op.cc | 20 +++++++------- paddle/operators/recurrent_op.h | 24 +++++++++------- 8 files changed, 107 insertions(+), 95 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index c226e4e3d2a58..a1049f718dcd3 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -15,6 +15,8 @@ #include "paddle/framework/backward.h" #include +#include + #include "paddle/framework/op_registry.h" #include "paddle/operators/net_op.h" #include "paddle/operators/recurrent_op.h" @@ -43,11 +45,11 @@ static bool AllInSet( return all_in_set; } -static std::shared_ptr NOP() { - auto net_op = std::make_shared(); +static std::unique_ptr NOP() { + auto net_op = new operators::NetOp(); net_op->SetType("@NOP@"); net_op->CompleteAddOp(); - return net_op; + return std::unique_ptr(net_op); } // Get backward operator from a forward operator, a recursive implementation. @@ -62,11 +64,7 @@ static std::shared_ptr NOP() { // operator, in a complex situation, it maybe a NetOp. // // See Backward.h for details -static std::shared_ptr BackwardRecursive( - const OperatorBase& forwardOp, - std::unordered_set& no_grad_names, size_t& uniq_id); - -std::shared_ptr BackwardRecursive( +static std::unique_ptr BackwardRecursive( const OperatorBase& forwardOp, std::unordered_set& no_grad_names, size_t& uniq_id) { // If all input gradients of forwarding operator do not need to calculate, @@ -91,7 +89,7 @@ std::shared_ptr BackwardRecursive( } // Returned gradient network - auto net = std::make_shared(); + auto net = std::unique_ptr(); if (forwardOp.IsNetOp()) { // Because forwardOp is a net op, it can static_cast. @@ -105,14 +103,14 @@ std::shared_ptr BackwardRecursive( // reversely travel forwardNet and collect all duplicate outputs. for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend(); ++it, ++local_op_id) { - auto fwd = *it; + auto& fwd = *it; auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id); - net->AddOp(bwd); ForEachVarName(bwd->Outputs(), [&dup_output_ops, local_op_id](const std::string& out) { dup_output_ops[out].emplace_back(local_op_id); return false; }); + net->AddOp(std::move(bwd)); } // Get unique ID for this method. auto uid = uniq_id++; @@ -122,7 +120,7 @@ std::shared_ptr BackwardRecursive( // to handle this case. For each duplicate output, rename it to an alias // (original name with a offset), append an `add` op for its operator, // and finally sum all the alias variable to the final output variable y. - using Pos = std::pair>; + using Pos = std::pair>; std::list insert_position; for (auto& dup_output_op : dup_output_ops) { const std::string& name = dup_output_op.first; @@ -150,13 +148,13 @@ std::shared_ptr BackwardRecursive( [](const Pos& l, const Pos& r) { return l.first > r.first; }); for (auto& pos : insert_position) { - net->InsertOp(pos.first + 1, pos.second); + net->InsertOp(pos.first + 1, std::move(pos.second)); } } else { - std::shared_ptr grad_op = OpRegistry::CreateGradOp(forwardOp); + std::unique_ptr grad_op(OpRegistry::CreateGradOp(forwardOp)); - ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, - grad_op](const std::string& grad_input) { + ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op]( + const std::string& grad_input) { if (no_grad_names.count(grad_input)) { // +1 for \0 std::string prefix = grad_input.substr( @@ -190,20 +188,20 @@ std::shared_ptr BackwardRecursive( const auto& stepnet_op = *static_cast(&rnnop.stepnet()); // create stepnet's gradient op - auto grad_stepnet = BackwardRecursive(stepnet_op, no_grad_names, uniq_id); rnn_grad_op->set_stepnet( - std::static_pointer_cast(grad_stepnet)); + BackwardRecursive(stepnet_op, no_grad_names, uniq_id)); } if (net->ops_.empty()) { // Current no aux op is added to network return grad_op; } - net->AddOp(grad_op); + net->AddOp(std::move(grad_op)); } net->SetType("@GENERATED_BACKWARD@"); net->CompleteAddOp(); - return net; -} // namespace framework + return std::unique_ptr( + static_cast(net.release())); +} // See header for comments std::shared_ptr Backward( diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 4fa0a2750b239..f0cc0012e11de 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -174,7 +174,7 @@ class OpRegistry { } } - static std::shared_ptr CreateOp(const std::string& type, + static std::unique_ptr CreateOp(const std::string& type, const VarNameMap& inputs, const VarNameMap& outputs, AttributeMap attrs) { @@ -183,7 +183,7 @@ class OpRegistry { "Operator '%s' has not been registered.", type); it->second.checker_->Check(attrs); auto op = it->second.creator_(type, inputs, outputs, attrs); - return std::shared_ptr(op); + return std::unique_ptr(op); } static VarNameMap ConvertOpDescVarsToVarNameMap( @@ -199,7 +199,7 @@ class OpRegistry { return ret_val; } - static std::shared_ptr CreateOp(const OpDesc& op_desc) { + static std::unique_ptr CreateOp(const OpDesc& op_desc) { VarNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs()); VarNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs()); AttributeMap attrs; @@ -210,11 +210,10 @@ class OpRegistry { return CreateOp(op_desc.type(), inputs, outputs, attrs); } - static std::shared_ptr CreateGradOp(const OperatorBase& op) { + static std::unique_ptr CreateGradOp(const OperatorBase& op) { PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); - std::shared_ptr grad_op(BuildGradOp(&op)); - return grad_op; + return std::unique_ptr(BuildGradOp(&op)); } static std::unordered_map& op_info_map() { diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index 1a85d568350dc..50c45919c53af 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -76,8 +76,7 @@ TEST(OpRegistry, CreateOp) { attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_f(scale); - std::shared_ptr op = - paddle::framework::OpRegistry::CreateOp(op_desc); + auto op = paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::Scope scope; paddle::platform::CPUDeviceContext dev_ctx; op->Run(scope, dev_ctx); @@ -118,8 +117,7 @@ TEST(OpRegistry, DefaultValue) { ASSERT_TRUE(op_desc.IsInitialized()); - std::shared_ptr op = - paddle::framework::OpRegistry::CreateOp(op_desc); + auto op = paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::Scope scope; paddle::platform::CPUDeviceContext dev_ctx; op->Run(scope, dev_ctx); diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index fe0c87bc57082..2fc1e214b28d8 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -207,8 +207,7 @@ All parameter, weight, gradient are variables in Paddle. .def(py::init<>()) .def("__str__", string::to_string); - py::class_> operator_base( - m, "Operator"); + py::class_ operator_base(m, "Operator"); operator_base.def_static("create", [](py::bytes protobin) { OpDesc desc; @@ -228,25 +227,23 @@ All parameter, weight, gradient are variables in Paddle. ExposeOperator(operator_base); - py::class_> net(m, "Net"); + py::class_ net(m, "Net"); net.def_static("create", - []() -> std::shared_ptr { - auto retv = std::make_shared(); + []() -> operators::NetOp * { + auto *retv = new operators::NetOp; retv->SetType("plain_net"); return retv; }) - .def("add_op", &operators::NetOp::AddOp) + .def("add_op", [](operators::NetOp &self, + const OperatorBase &op) { self.AddOp(op); }) .def("add_op", - [](operators::NetOp &self, - const std::shared_ptr &net) -> void { - self.AddOp(std::static_pointer_cast(net)); + [](operators::NetOp &self, const operators::NetOp &net) -> void { + self.AddOp(net); }) .def("add_op", [](operators::NetOp &self, - const std::shared_ptr &rnn) -> void { - self.AddOp(std::static_pointer_cast(rnn)); - }) + const operators::RecurrentOp &rnn) -> void { self.AddOp(rnn); }) .def("complete_add_op", &operators::NetOp::CompleteAddOp) .def("complete_add_op", [](std::shared_ptr &self) { self->CompleteAddOp(); @@ -255,12 +252,11 @@ All parameter, weight, gradient are variables in Paddle. ExposeOperator(net); // recurrent_op - py::class_> - rnn(m, "RecurrentOp"); + py::class_ rnn(m, "RecurrentOp"); rnn.def_static( "create", - [](py::bytes protobin) -> std::shared_ptr { + [](py::bytes protobin) -> operators::RecurrentOp * { OpDesc desc; PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), "Cannot parse user input to OpDesc"); @@ -268,13 +264,12 @@ All parameter, weight, gradient are variables in Paddle. "User OpDesc is not initialized, reason %s", desc.InitializationErrorString()); auto rnn_op = OpRegistry::CreateOp(desc); - return std::dynamic_pointer_cast(rnn_op); + return static_cast(rnn_op.release()); }) - .def("set_stepnet", - [](operators::RecurrentOp &self, - const std::shared_ptr &net) -> void { - self.set_stepnet(net); - }); + .def("set_stepnet", [](operators::RecurrentOp &self, + const operators::NetOp &net) -> void { + self.set_stepnet(net.Clone()); + }); ExposeOperator(rnn); m.def("unique_integer", UniqueIntegerGenerator); diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 743f0e67dbeaa..2ec65c63f3fac 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -45,11 +45,11 @@ class NetOp : public framework::OperatorBase { : framework::OperatorBase( static_cast(o)) { this->ops_.reserve(o.ops_.size()); - std::transform(o.ops_.begin(), o.ops_.end(), std::back_inserter(this->ops_), - [](const std::shared_ptr& op) - -> std::shared_ptr { - return std::shared_ptr(op->Clone()); - }); + std::transform( + o.ops_.begin(), o.ops_.end(), std::back_inserter(this->ops_), + [](const std::unique_ptr& op) { + return std::unique_ptr(op->Clone()); + }); this->CompleteAddOp(); } @@ -86,21 +86,42 @@ class NetOp : public framework::OperatorBase { return true; } + void AddOp(const framework::OperatorBase& op) { AddOp(op.Clone()); } + /** * @brief Add an operator by ptr */ - void AddOp(const std::shared_ptr& op) { + void AddOp(framework::OperatorBase* op, bool own) { PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); - ops_.push_back(op); + if (!own) { + op = op->Clone().release(); + } + ops_.emplace_back(op); } - void InsertOp(size_t pos, const std::shared_ptr& op) { + void AddOp(std::unique_ptr&& op) { + AddOp(op.release(), true); + } + + void InsertOp(size_t pos, framework::OperatorBase* op, bool own) { PADDLE_ENFORCE(!add_op_done_, "Cannot InsertOp when this network is sealed"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range"); - ops_.insert(ops_.begin() + pos, op); + if (!own) { + op = op->Clone().release(); + } + ops_.insert(ops_.begin() + pos, + std::unique_ptr(op)); + } + + void InsertOp(size_t pos, std::unique_ptr&& op) { + InsertOp(pos, op.release(), true); + } + + void InsertOp(size_t pos, const framework::OperatorBase& op) { + InsertOp(pos, op.Clone()); } void CompleteAddOp(bool calculate = true); @@ -112,7 +133,7 @@ class NetOp : public framework::OperatorBase { std::unique_ptr Clone() const override; - std::vector> ops_; + std::vector> ops_; private: bool add_op_done_{false}; diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc index e28d4df6a5709..e9598610c0a74 100644 --- a/paddle/operators/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -38,15 +38,12 @@ TEST(OpKernel, all) { auto net = std::make_shared(); ASSERT_NE(net, nullptr); - auto op1 = std::shared_ptr( + net->AddOp(std::unique_ptr( new TestOp("test", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, - {{"Out", {"y"}}}, {})); - net->AddOp(op1); - - auto op2 = std::shared_ptr( + {{"Out", {"y"}}}, {}))); + net->AddOp(std::unique_ptr( new TestOp("test", {{"X", {"y"}}, {"W", {"w2"}}, {"b", {"b2"}}}, - {{"Out", {"z"}}}, {})); - net->AddOp(op2); + {{"Out", {"z"}}}, {}))); net->CompleteAddOp(); AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"}, @@ -61,21 +58,21 @@ TEST(OpKernel, all) { TEST(NetOp, insert_op) { NetOp net; - auto op1 = std::shared_ptr( + auto op1 = std::unique_ptr( new framework::NOP("empty", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, {{"Out", {"y"}}}, {})); - net.AddOp(op1); - net.InsertOp(0, op1); + net.AddOp(*op1); + net.InsertOp(0, *op1); ASSERT_EQ(2UL, net.ops_.size()); - net.InsertOp(2, op1); + net.InsertOp(2, std::move(op1)); ASSERT_EQ(3UL, net.ops_.size()); } TEST(NetOp, Clone) { NetOp net; net.AddOp( - std::shared_ptr(new framework::NOP{"empty", {}, {}, {}})); - net.AddOp(std::shared_ptr( + std::unique_ptr(new framework::NOP{"empty", {}, {}, {}})); + net.AddOp(std::unique_ptr( new framework::NOP{"empty2", {}, {}, {}})); net.CompleteAddOp(true); auto new_net_op = net.Clone(); diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 78ce0ba3c0fa4..aae78a1cecb8b 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -42,7 +42,7 @@ void RecurrentAlgorithm::InferShape(const Scope& scope) const { rnn::LinkMemories(step_scopes, arg_->memories, i, -1, true /*infer_shape_mode*/); } - (*stepnet_)->InferShape(*step_scopes[i]); + stepnet_->InferShape(*step_scopes[i]); } rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true /*infer_shape_mode*/); @@ -61,7 +61,7 @@ void RecurrentAlgorithm::Run(const Scope& scope, rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1, false /*infer_shape_mode*/); } - (*stepnet_)->Run(*step_scopes[step_id], dev_ctx); + stepnet_->Run(*step_scopes[step_id], dev_ctx); } rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false /*infer_shape_mode*/); @@ -76,15 +76,15 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { // Now all variables in scope must be created outside of op. PADDLE_ENFORCE_NOT_NULL(stepnet_); - PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "stepnet_ op has no outputs"); - PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "net_op has no outputs"); + PADDLE_ENFORCE(!stepnet_->Outputs().empty(), "stepnet_ op has no outputs"); + PADDLE_ENFORCE(!stepnet_->Outputs().empty(), "net_op has no outputs"); if (seq_len_ > step_scopes->size()) { for (size_t i = step_scopes->size(); i < seq_len_; ++i) { auto& step_scope = scope.NewScope(); // create step net's temp inputs - for (auto& input : (*stepnet_)->Inputs()) { + for (auto& input : stepnet_->Inputs()) { // the weight are located in parent scope for (auto& var_name : input.second) { if (!step_scope.FindVar(var_name)) { @@ -93,7 +93,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { } } // create stepnet's outputs - for (const auto& output : (*stepnet_)->Outputs()) { + for (const auto& output : stepnet_->Outputs()) { for (auto& var_name : output.second) { step_scope.NewVar(var_name); } @@ -136,7 +136,7 @@ RecurrentOp::RecurrentOp(const std::string& type, const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) { rnn::InitArgument(kArgName, &arg_, *this); - alg_.Init(&arg_, &stepnet_); + alg_.Init(&arg_, stepnet_.get()); } class RecurrentAlgorithmProtoAndCheckerMaker @@ -178,7 +178,7 @@ void RecurrentGradientAlgorithm::Run( rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, false /*infer_shape_mode*/); } - (*stepnet_)->Run(*step_scopes[step_id], dev_ctx); + stepnet_->Run(*step_scopes[step_id], dev_ctx); } LinkBootMemoryGradients(step_scopes[0], false); rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, @@ -215,7 +215,7 @@ void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const { rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, true /*infer_shape_mode*/); } - (*stepnet_)->InferShape(*step_scopes[step_id]); + stepnet_->InferShape(*step_scopes[step_id]); } rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true /*infer_shape_mode*/); @@ -228,7 +228,7 @@ RecurrentGradientOp::RecurrentGradientOp( const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) { rnn::InitArgument(kArgName, &arg_, *this); - alg_.Init(&arg_, &stepnet_); + alg_.Init(&arg_, stepnet_.get()); } } // namespace operators diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index 1d8a6973955cf..4d091aa21241e 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -34,7 +34,7 @@ class RecurrentAlgorithm { void Run(const framework::Scope& scope, const platform::DeviceContext& dev_ctx) const; - void Init(rnn::Argument* arg, std::shared_ptr* stepnet) { + void Init(rnn::Argument* arg, framework::OperatorBase* stepnet) { PADDLE_ENFORCE_NOT_NULL(stepnet, "stepnet should be set before."); arg_ = arg; stepnet_ = stepnet; @@ -63,7 +63,7 @@ class RecurrentAlgorithm { void InitMemories(framework::Scope* step_scopes, bool infer_shape_mode) const; private: - std::shared_ptr* stepnet_; + framework::OperatorBase* stepnet_; rnn::Argument* arg_; mutable size_t seq_len_; }; @@ -80,7 +80,7 @@ class RecurrentGradientAlgorithm { * operator. */ public: - void Init(rnn::Argument* arg, std::shared_ptr* stepnet) { + void Init(rnn::Argument* arg, framework::OperatorBase* stepnet) { PADDLE_ENFORCE_NOT_NULL(stepnet, "stepnet should be set before."); arg_ = std::move(arg); stepnet_ = stepnet; @@ -107,7 +107,7 @@ class RecurrentGradientAlgorithm { private: rnn::Argument* arg_; mutable size_t seq_len_; - std::shared_ptr* stepnet_; + framework::OperatorBase* stepnet_; }; class RecurrentOp : public framework::OperatorBase { @@ -133,15 +133,17 @@ class RecurrentOp : public framework::OperatorBase { alg_.Run(scope, dev_ctx); } - void set_stepnet(std::shared_ptr net) { stepnet_ = net; } - const NetOp& stepnet() const { return *stepnet_; } + void set_stepnet(std::unique_ptr net) { + stepnet_ = std::move(net); + } + const OperatorBase& stepnet() const { return *stepnet_; } static const rnn::ArgumentName kArgName; private: RecurrentAlgorithm alg_; rnn::Argument arg_; - std::shared_ptr stepnet_; + std::unique_ptr stepnet_; }; class RecurrentGradientOp : public framework::OperatorBase { @@ -171,12 +173,14 @@ class RecurrentGradientOp : public framework::OperatorBase { static const rnn::ArgumentName kArgName; - void set_stepnet(const std::shared_ptr& net) { stepnet_ = net; } - const NetOp& stepnet() const { return *stepnet_; } + void set_stepnet(std::unique_ptr net) { + stepnet_ = std::move(net); + } + const OperatorBase& stepnet() const { return *stepnet_; } private: RecurrentGradientAlgorithm alg_; - std::shared_ptr stepnet_; + std::unique_ptr stepnet_; rnn::Argument arg_; }; From 8f80f5bc794d8900f9d57b51eea167f4dde2903c Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 16 Aug 2017 19:46:12 +0800 Subject: [PATCH 05/21] FIX: Release CPU/GPU memory via deleter --- paddle/memory/memory.cc | 59 ++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 207025f9b1c64..5946c3ea4af5a 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -16,19 +16,31 @@ limitations under the License. */ #include "paddle/memory/detail/buddy_allocator.h" #include "paddle/memory/detail/system_allocator.h" -#include // for memcpy +#include // for transfrom +#include // for memcpy +#include // for call_once + +#include "glog/logging.h" namespace paddle { namespace memory { -detail::BuddyAllocator* GetCPUBuddyAllocator() { - static detail::BuddyAllocator* a = nullptr; - if (a == nullptr) { - a = new detail::BuddyAllocator(new detail::CPUAllocator, - platform::CpuMinChunkSize(), - platform::CpuMaxChunkSize()); - } - return a; +using BuddyAllocator = detail::BuddyAllocator; + +std::once_flag cpu_alloctor_flag; +std::once_flag gpu_alloctor_flag; + +BuddyAllocator* GetCPUBuddyAllocator() { + static std::unique_ptr a{ + nullptr, [](BuddyAllocator* p) { delete p; }}; + + std::call_once(cpu_alloctor_flag, [&]() { + a.reset(new BuddyAllocator(new detail::CPUAllocator, + platform::CpuMinChunkSize(), + platform::CpuMaxChunkSize())); + }); + + return a.get(); } template <> @@ -48,20 +60,31 @@ size_t Used(platform::CPUPlace place) { #ifndef PADDLE_ONLY_CPU -detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { - static detail::BuddyAllocator** as = NULL; - if (as == NULL) { +BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { + using BuddyAllocVec = std::vector; + static std::unique_ptr as{ + new std::vector, [](BuddyAllocVec* p) { + std::for_each(p->begin(), p->end(), + [](BuddyAllocator* p) { delete p; }); + }}; + + // GPU buddy alloctors + auto& alloctors = *as.get(); + + // GPU buddy allocator initialization + std::call_once(gpu_alloctor_flag, [&]() { int gpu_num = platform::GetDeviceCount(); - as = new detail::BuddyAllocator*[gpu_num]; + alloctors.reserve(gpu_num); for (int gpu = 0; gpu < gpu_num; gpu++) { platform::SetDeviceId(gpu); - as[gpu] = new detail::BuddyAllocator(new detail::GPUAllocator, - platform::GpuMinChunkSize(), - platform::GpuMaxChunkSize()); + alloctors.emplace_back(new BuddyAllocator(new detail::GPUAllocator, + platform::GpuMinChunkSize(), + platform::GpuMaxChunkSize())); } - } + }); + platform::SetDeviceId(gpu_id); - return as[gpu_id]; + return alloctors[gpu_id]; } template <> From f15e083098d94af00c02f44e32f0b8891c079f55 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 16 Aug 2017 21:24:12 +0800 Subject: [PATCH 06/21] Remove std::shared_ptr in Python & C++ * Also simplify pybind implementation by using OperatorBase as holder type. --- paddle/framework/backward.cc | 4 +- paddle/framework/backward.h | 2 +- paddle/framework/backward_test.cc | 3 +- paddle/framework/pybind.cc | 124 +++++++----------- paddle/operators/net_op.h | 4 +- paddle/operators/recurrent_op.cc | 20 +-- paddle/operators/recurrent_op.h | 10 +- .../v2/framework/tests/gradient_checker.py | 1 - 8 files changed, 71 insertions(+), 97 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index a1049f718dcd3..9d30887224fe0 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -89,7 +89,7 @@ static std::unique_ptr BackwardRecursive( } // Returned gradient network - auto net = std::unique_ptr(); + auto net = std::unique_ptr(new operators::NetOp()); if (forwardOp.IsNetOp()) { // Because forwardOp is a net op, it can static_cast. @@ -204,7 +204,7 @@ static std::unique_ptr BackwardRecursive( } // See header for comments -std::shared_ptr Backward( +std::unique_ptr Backward( const OperatorBase& forwardOp, const std::unordered_set& no_grad_vars) { std::unordered_set no_grad_names; diff --git a/paddle/framework/backward.h b/paddle/framework/backward.h index c181919dc165c..1ecf69881b312 100644 --- a/paddle/framework/backward.h +++ b/paddle/framework/backward.h @@ -20,7 +20,7 @@ namespace framework { // Create the backward operator from a forward operator. // TODO(yuyang18): Add more API reference comment. -extern std::shared_ptr Backward( +extern std::unique_ptr Backward( const OperatorBase& forwardOp, const std::unordered_set& no_grad_vars); } // namespace framework diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index d942604bf0599..1003b1ccd83c8 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -180,8 +180,7 @@ TEST(Backward, simple_op_not_need_grad) { auto no_input_gop = f::Backward(*fwd, {"x", "b"}); ASSERT_NE(no_input_gop, nullptr); ASSERT_TRUE(no_input_gop->IsNetOp()); - ASSERT_EQ(0UL, - std::static_pointer_cast(no_input_gop)->ops_.size()); + ASSERT_EQ(0UL, static_cast(no_input_gop.get())->ops_.size()); } TEST(Backward, net_fc_backward_normal) { diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 2fc1e214b28d8..f0114b9e4908d 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -48,29 +48,6 @@ namespace framework { using Tensor = framework::Tensor; -template -void ExposeOperator(ClassType &m) { - m.def("infer_shape", &ClassType::type::InferShape) - .def("run", &ClassType::type::Run) - .def("type", - [](const typename ClassType::type &op) -> std::string { - return op.Type(); - }) - .def("outputs", - [](const typename ClassType::type &op) - -> std::map> { - return op.Outputs(); - }) - .def("inputs", - [](const typename ClassType::type &op) { return op.Inputs(); }) - .def("__str__", &ClassType::type::DebugString) - .def("no_intermediate_outputs", - [](const typename ClassType::type &op) { - return op.OutputVars(false); - }) - .def("support_gpu", &ClassType::type::SupportGPU); -} - static size_t UniqueIntegerGenerator() { static std::atomic generator; return generator.fetch_add(1); @@ -207,70 +184,69 @@ All parameter, weight, gradient are variables in Paddle. .def(py::init<>()) .def("__str__", string::to_string); - py::class_ operator_base(m, "Operator"); - - operator_base.def_static("create", [](py::bytes protobin) { - OpDesc desc; - PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), - "Cannot parse user input to OpDesc"); - PADDLE_ENFORCE(desc.IsInitialized(), - "User OpDesc is not initialized, reason %s", - desc.InitializationErrorString()); - return OpRegistry::CreateOp(desc); - }); - - operator_base.def("backward", - [](const OperatorBase &forwardOp, - const std::unordered_set &no_grad_vars) { - return Backward(forwardOp, no_grad_vars); - }); - - ExposeOperator(operator_base); - - py::class_ net(m, "Net"); + py::class_(m, "Operator") + .def_static("create", + [](py::bytes protobin) { + OpDesc desc; + PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), + "Cannot parse user input to OpDesc"); + PADDLE_ENFORCE(desc.IsInitialized(), + "User OpDesc is not initialized, reason %s", + desc.InitializationErrorString()); + return OpRegistry::CreateOp(desc); + }) + .def("backward", + [](const OperatorBase &forwardOp, + const std::unordered_set &no_grad_vars) { + return Backward(forwardOp, no_grad_vars).release(); + }) + .def("infer_shape", &OperatorBase::InferShape) + .def("run", &OperatorBase::Run) + .def("type", + [](const OperatorBase &op) -> std::string { return op.Type(); }) + .def("outputs", + [](const OperatorBase &op) + -> std::map> { + return op.Outputs(); + }) + .def("inputs", [](const OperatorBase &op) { return op.Inputs(); }) + .def("__str__", &OperatorBase::DebugString) + .def("no_intermediate_outputs", + [](const OperatorBase &op) { return op.OutputVars(false); }) + .def("support_gpu", &OperatorBase::SupportGPU); - net.def_static("create", - []() -> operators::NetOp * { - auto *retv = new operators::NetOp; - retv->SetType("plain_net"); - return retv; - }) + py::class_(m, "Net") + .def_static("create", + []() -> operators::NetOp * { + auto *retv = new operators::NetOp; + retv->SetType("plain_net"); + return retv; + }) .def("add_op", [](operators::NetOp &self, const OperatorBase &op) { self.AddOp(op); }) - .def("add_op", - [](operators::NetOp &self, const operators::NetOp &net) -> void { - self.AddOp(net); - }) - .def("add_op", - [](operators::NetOp &self, - const operators::RecurrentOp &rnn) -> void { self.AddOp(rnn); }) .def("complete_add_op", &operators::NetOp::CompleteAddOp) .def("complete_add_op", [](std::shared_ptr &self) { self->CompleteAddOp(); }); - ExposeOperator(net); - // recurrent_op - py::class_ rnn(m, "RecurrentOp"); - - rnn.def_static( - "create", - [](py::bytes protobin) -> operators::RecurrentOp * { - OpDesc desc; - PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), - "Cannot parse user input to OpDesc"); - PADDLE_ENFORCE(desc.IsInitialized(), - "User OpDesc is not initialized, reason %s", - desc.InitializationErrorString()); - auto rnn_op = OpRegistry::CreateOp(desc); - return static_cast(rnn_op.release()); - }) + py::class_(m, "RecurrentOp") + .def_static( + "create", + [](py::bytes protobin) -> operators::RecurrentOp * { + OpDesc desc; + PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), + "Cannot parse user input to OpDesc"); + PADDLE_ENFORCE(desc.IsInitialized(), + "User OpDesc is not initialized, reason %s", + desc.InitializationErrorString()); + auto rnn_op = OpRegistry::CreateOp(desc); + return static_cast(rnn_op.release()); + }) .def("set_stepnet", [](operators::RecurrentOp &self, const operators::NetOp &net) -> void { self.set_stepnet(net.Clone()); }); - ExposeOperator(rnn); m.def("unique_integer", UniqueIntegerGenerator); diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 2ec65c63f3fac..ce7da1f383672 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -41,9 +41,7 @@ class NetOp : public framework::OperatorBase { NetOp(const std::string& type, const VarNameMap& inputs, const VarNameMap& outputs, const framework::AttributeMap& attrs); - NetOp(const NetOp& o) - : framework::OperatorBase( - static_cast(o)) { + NetOp(const NetOp& o) : framework::OperatorBase(o.type_, {}, {}, o.attrs_) { this->ops_.reserve(o.ops_.size()); std::transform( o.ops_.begin(), o.ops_.end(), std::back_inserter(this->ops_), diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index aae78a1cecb8b..78ce0ba3c0fa4 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -42,7 +42,7 @@ void RecurrentAlgorithm::InferShape(const Scope& scope) const { rnn::LinkMemories(step_scopes, arg_->memories, i, -1, true /*infer_shape_mode*/); } - stepnet_->InferShape(*step_scopes[i]); + (*stepnet_)->InferShape(*step_scopes[i]); } rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true /*infer_shape_mode*/); @@ -61,7 +61,7 @@ void RecurrentAlgorithm::Run(const Scope& scope, rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1, false /*infer_shape_mode*/); } - stepnet_->Run(*step_scopes[step_id], dev_ctx); + (*stepnet_)->Run(*step_scopes[step_id], dev_ctx); } rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false /*infer_shape_mode*/); @@ -76,15 +76,15 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { // Now all variables in scope must be created outside of op. PADDLE_ENFORCE_NOT_NULL(stepnet_); - PADDLE_ENFORCE(!stepnet_->Outputs().empty(), "stepnet_ op has no outputs"); - PADDLE_ENFORCE(!stepnet_->Outputs().empty(), "net_op has no outputs"); + PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "stepnet_ op has no outputs"); + PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "net_op has no outputs"); if (seq_len_ > step_scopes->size()) { for (size_t i = step_scopes->size(); i < seq_len_; ++i) { auto& step_scope = scope.NewScope(); // create step net's temp inputs - for (auto& input : stepnet_->Inputs()) { + for (auto& input : (*stepnet_)->Inputs()) { // the weight are located in parent scope for (auto& var_name : input.second) { if (!step_scope.FindVar(var_name)) { @@ -93,7 +93,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { } } // create stepnet's outputs - for (const auto& output : stepnet_->Outputs()) { + for (const auto& output : (*stepnet_)->Outputs()) { for (auto& var_name : output.second) { step_scope.NewVar(var_name); } @@ -136,7 +136,7 @@ RecurrentOp::RecurrentOp(const std::string& type, const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) { rnn::InitArgument(kArgName, &arg_, *this); - alg_.Init(&arg_, stepnet_.get()); + alg_.Init(&arg_, &stepnet_); } class RecurrentAlgorithmProtoAndCheckerMaker @@ -178,7 +178,7 @@ void RecurrentGradientAlgorithm::Run( rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, false /*infer_shape_mode*/); } - stepnet_->Run(*step_scopes[step_id], dev_ctx); + (*stepnet_)->Run(*step_scopes[step_id], dev_ctx); } LinkBootMemoryGradients(step_scopes[0], false); rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, @@ -215,7 +215,7 @@ void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const { rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, true /*infer_shape_mode*/); } - stepnet_->InferShape(*step_scopes[step_id]); + (*stepnet_)->InferShape(*step_scopes[step_id]); } rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true /*infer_shape_mode*/); @@ -228,7 +228,7 @@ RecurrentGradientOp::RecurrentGradientOp( const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) { rnn::InitArgument(kArgName, &arg_, *this); - alg_.Init(&arg_, stepnet_.get()); + alg_.Init(&arg_, &stepnet_); } } // namespace operators diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index 4d091aa21241e..bcfa817de8242 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -34,7 +34,8 @@ class RecurrentAlgorithm { void Run(const framework::Scope& scope, const platform::DeviceContext& dev_ctx) const; - void Init(rnn::Argument* arg, framework::OperatorBase* stepnet) { + void Init(rnn::Argument* arg, + std::unique_ptr* stepnet) { PADDLE_ENFORCE_NOT_NULL(stepnet, "stepnet should be set before."); arg_ = arg; stepnet_ = stepnet; @@ -63,7 +64,7 @@ class RecurrentAlgorithm { void InitMemories(framework::Scope* step_scopes, bool infer_shape_mode) const; private: - framework::OperatorBase* stepnet_; + std::unique_ptr* stepnet_; rnn::Argument* arg_; mutable size_t seq_len_; }; @@ -80,7 +81,8 @@ class RecurrentGradientAlgorithm { * operator. */ public: - void Init(rnn::Argument* arg, framework::OperatorBase* stepnet) { + void Init(rnn::Argument* arg, + std::unique_ptr* stepnet) { PADDLE_ENFORCE_NOT_NULL(stepnet, "stepnet should be set before."); arg_ = std::move(arg); stepnet_ = stepnet; @@ -107,7 +109,7 @@ class RecurrentGradientAlgorithm { private: rnn::Argument* arg_; mutable size_t seq_len_; - framework::OperatorBase* stepnet_; + std::unique_ptr* stepnet_; }; class RecurrentOp : public framework::OperatorBase { diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 501cf6110ff74..831c0f0f2a909 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -165,7 +165,6 @@ def check_grad(self, for no_grad in no_grad_set: if no_grad not in in_names: raise ValueError("no_grad should be in in_names") - backward_op = core.Operator.backward(forward_op, no_grad_set) bwd_outputs = backward_op.outputs() From ac02fb82d7a76fce4a870acc30891d657a83ab9c Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 17 Aug 2017 10:14:05 +0800 Subject: [PATCH 07/21] FIX: tensor memory must be gaven back to buddy allocator for free --- paddle/operators/gather_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc index d24d83f299fdb..0ae1e99452973 100644 --- a/paddle/operators/gather_test.cc +++ b/paddle/operators/gather_test.cc @@ -45,4 +45,8 @@ TEST(Gather, GatherData) { for (int i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4); for (int i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4); + + delete src; + delete index; + delete output; } From 2f7489fbd565caff0608214808ef682b6f46f984 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 17 Aug 2017 10:14:48 +0800 Subject: [PATCH 08/21] change use_pinned_memory to true for cpu --- paddle/memory/detail/system_allocator.cc | 2 +- paddle/memory/memory.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/memory/detail/system_allocator.cc b/paddle/memory/detail/system_allocator.cc index f61e67a329060..a270bd5958152 100644 --- a/paddle/memory/detail/system_allocator.cc +++ b/paddle/memory/detail/system_allocator.cc @@ -27,7 +27,7 @@ limitations under the License. */ // between host and device. Allocates too much would reduce the amount // of memory available to the system for paging. So, by default, we // should set false to use_pinned_memory. -DEFINE_bool(use_pinned_memory, false, "If set, allocate cpu pinned memory."); +DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory."); namespace paddle { namespace memory { diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 5946c3ea4af5a..684635405a5e4 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -63,7 +63,7 @@ size_t Used(platform::CPUPlace place) { BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { using BuddyAllocVec = std::vector; static std::unique_ptr as{ - new std::vector, [](BuddyAllocVec* p) { + new BuddyAllocVec, [](BuddyAllocVec* p) { std::for_each(p->begin(), p->end(), [](BuddyAllocator* p) { delete p; }); }}; From 33228cacddc5058feeb5444cf98dd25dc2ca77c0 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 17 Aug 2017 10:21:28 +0800 Subject: [PATCH 09/21] Fix typo error --- paddle/memory/memory.cc | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 684635405a5e4..99c62b50ee59e 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -27,14 +27,14 @@ namespace memory { using BuddyAllocator = detail::BuddyAllocator; -std::once_flag cpu_alloctor_flag; -std::once_flag gpu_alloctor_flag; +std::once_flag cpu_allocator_flag; +std::once_flag gpu_allocator_flag; BuddyAllocator* GetCPUBuddyAllocator() { static std::unique_ptr a{ nullptr, [](BuddyAllocator* p) { delete p; }}; - std::call_once(cpu_alloctor_flag, [&]() { + std::call_once(cpu_allocator_flag, [&]() { a.reset(new BuddyAllocator(new detail::CPUAllocator, platform::CpuMinChunkSize(), platform::CpuMaxChunkSize())); @@ -68,23 +68,23 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { [](BuddyAllocator* p) { delete p; }); }}; - // GPU buddy alloctors - auto& alloctors = *as.get(); + // GPU buddy allocators + auto& allocators = *as.get(); // GPU buddy allocator initialization - std::call_once(gpu_alloctor_flag, [&]() { + std::call_once(gpu_allocator_flag, [&]() { int gpu_num = platform::GetDeviceCount(); - alloctors.reserve(gpu_num); + allocators.reserve(gpu_num); for (int gpu = 0; gpu < gpu_num; gpu++) { platform::SetDeviceId(gpu); - alloctors.emplace_back(new BuddyAllocator(new detail::GPUAllocator, - platform::GpuMinChunkSize(), - platform::GpuMaxChunkSize())); + allocators.emplace_back(new BuddyAllocator(new detail::GPUAllocator, + platform::GpuMinChunkSize(), + platform::GpuMaxChunkSize())); } }); platform::SetDeviceId(gpu_id); - return alloctors[gpu_id]; + return allocators[gpu_id]; } template <> From ff5bfc1c89b5b52588a128e90399bdac804a0b44 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 17 Aug 2017 10:23:59 +0800 Subject: [PATCH 10/21] Google style for header file includes --- paddle/memory/memory.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 99c62b50ee59e..be346325c2bb7 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -13,14 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/memory/memory.h" -#include "paddle/memory/detail/buddy_allocator.h" -#include "paddle/memory/detail/system_allocator.h" #include // for transfrom #include // for memcpy #include // for call_once -#include "glog/logging.h" +#include "paddle/memory/detail/buddy_allocator.h" +#include "paddle/memory/detail/system_allocator.h" namespace paddle { namespace memory { From 3f9fe6248754bcfd85356174725c99a23e763c8a Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 17 Aug 2017 10:24:20 +0800 Subject: [PATCH 11/21] Fix typo error --- paddle/memory/memory.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index be346325c2bb7..dfe9f16f7467a 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/memory/memory.h" -#include // for transfrom +#include // for transform #include // for memcpy #include // for call_once From d8560ec2e819c5a708caf5e35f791571ea3628aa Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 17 Aug 2017 10:29:50 +0800 Subject: [PATCH 12/21] Fix scatter_test --- paddle/operators/scatter_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/operators/scatter_test.cc b/paddle/operators/scatter_test.cc index 4449ce6564396..26fdaff1460a2 100644 --- a/paddle/operators/scatter_test.cc +++ b/paddle/operators/scatter_test.cc @@ -49,4 +49,8 @@ TEST(scatter, ScatterUpdate) { EXPECT_EQ(output->data()[i], float(i - 4)); for (size_t i = 8; i < 16; ++i) EXPECT_EQ(p_output[i], float(0)); for (size_t i = 8; i < 16; ++i) EXPECT_EQ(output->data()[i], float(0)); + + delete src; + delete index; + delete output; } From 4b148d0afd9bdf255c0e69b406577e83ae156388 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 17 Aug 2017 10:59:10 +0800 Subject: [PATCH 13/21] Fix typo --- paddle/framework/operator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 90e30bee0a1ae..64481706528c0 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -119,7 +119,7 @@ class OperatorBase { protected: std::string type_; // NOTE: in case of OpGrad, inputs_ contains: - // I (Inputs)opear + // I (Inputs) // O (Outputs) // OG (Output Gradients) VarNameMap inputs_; From 225579b9d9ab28de046805f40301d68d9dd3b5cb Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 17 Aug 2017 11:10:32 +0800 Subject: [PATCH 14/21] Remove own for add_op * add_op could take a unique_ptr or a const reference. If unique_ptr is taken, the NetOp will take care of that operator's life cycle. If a const reference is taken, that op will be Cloned. --- paddle/operators/net_op.h | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index ce7da1f383672..e8720c9609979 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -89,33 +89,18 @@ class NetOp : public framework::OperatorBase { /** * @brief Add an operator by ptr */ - void AddOp(framework::OperatorBase* op, bool own) { + void AddOp(std::unique_ptr&& op) { PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); - if (!own) { - op = op->Clone().release(); - } - ops_.emplace_back(op); - } - - void AddOp(std::unique_ptr&& op) { - AddOp(op.release(), true); + ops_.push_back(std::move(op)); } - void InsertOp(size_t pos, framework::OperatorBase* op, bool own) { + void InsertOp(size_t pos, std::unique_ptr&& op) { PADDLE_ENFORCE(!add_op_done_, "Cannot InsertOp when this network is sealed"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range"); - if (!own) { - op = op->Clone().release(); - } - ops_.insert(ops_.begin() + pos, - std::unique_ptr(op)); - } - - void InsertOp(size_t pos, std::unique_ptr&& op) { - InsertOp(pos, op.release(), true); + ops_.insert(ops_.begin() + pos, std::move(op)); } void InsertOp(size_t pos, const framework::OperatorBase& op) { From a28a5564d26e9aeac48cb41f2f2bd40fcd73946a Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 17 Aug 2017 11:55:48 +0800 Subject: [PATCH 15/21] add more comments and fix code style. --- .../v2/framework/tests/gradient_checker.py | 64 +++++++++++++++---- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index d251f14b9d158..2c92dfa43e7ee 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -110,7 +110,24 @@ def product(dim): class GradientChecker(unittest.TestCase): - def get_grad(self, forward_op, backward_op, input_vars, grad_names, place): + def __get_gradient(self, forward_op, backward_op, input_value, grad_names, + place): + """Get the input gradients after running forward and backward operators + on the given places. + + :param forward_op: forward operator + :type forward_op: Operator + :param backward_op: backward operator + :type backward_op: Operator + :param input_value: input values. + :type input_value: dict{string:numpy.array} + :param grad_names: the names of returned input gradients. + :type input_value: a list of string + :param place: the device type. + :type place: CPUPlace or GPUPlace + :return: the input grdients of given grad_names. + :rtype: a list of numpy.array + """ scope = core.Scope() ctx = core.DeviceContext.create(place) @@ -120,7 +137,7 @@ def get_grad(self, forward_op, backward_op, input_vars, grad_names, place): out_names = [item for k in outputs for item in outputs[k]] # create input var and set value - for name, value in input_vars.iteritems(): + for name, value in input_value.iteritems(): if name not in in_names: raise ValueError(name + "does not exist in Op's inputs.") var = scope.new_var(name).get_tensor() @@ -154,7 +171,16 @@ def get_grad(self, forward_op, backward_op, input_vars, grad_names, place): ] return outs - def compare_grad(self, forward_op, inputs): + def compare_grad(self, forward_op, input_value): + """ Compare the input gradients between CPU and GPU for the given forward + operator. + + :param forward_op: forward operator + :type forward_op: Operator + :param input_value: input values. + :type input_value: dict{string:numpy.array} + :raises: AssertionError, there is different gradient value. + """ backward_op = core.Operator.backward(forward_op, set()) # return if not compile with GPU or not implementing GPU kernel if not (core.is_compile_gpu() and backward_op.support_gpu()): @@ -162,19 +188,31 @@ def compare_grad(self, forward_op, inputs): outputs = backward_op.outputs() out_names = [item for k in outputs for item in outputs[k]] - cpu_grads = self.get_grad(forward_op, backward_op, inputs, out_names, - core.CPUPlace()) - gpu_grads = self.get_grad(forward_op, backward_op, inputs, out_names, - core.GPUPlace(0)) + cpu_grads = self.get_grad(forward_op, backward_op, input_value, + out_names, core.CPUPlace()) + gpu_grads = self.get_grad(forward_op, backward_op, input_value, + out_names, core.GPUPlace(0)) for c_grad, g_grad, name in itertools.izip(cpu_grads, gpu_grads, out_names): self.assertTrue( - numpy.allclose(c_grad, g_grad), + numpy.allclose( + c_grad, g_grad, atol=1e-4), "output name: " + name + " has diff") - def assert_is_close(self, numeric_grads, analytic_grads, names, - max_relative_error, msg_prefix): + def __assert_is_close(self, numeric_grads, analytic_grads, names, + max_relative_error, msg_prefix): + """Use relative error for the comparison. + + :param numeric_grads: the numerical graidents. + :type numeric_grads: a list of numpy.array + :param analytic_grads: the analytical graidents. + :type analytic_grads: a list of numpy.array + :param name: the names of gradients, used to print for debug. + :type names: a list of string + :param msg_prefix: string info, used to print for debug. + :type msf_prefix: string + """ for a, b, name in itertools.izip(numeric_grads, analytic_grads, names): abs_a = numpy.abs(a) # if abs_a is nearly zero, then use abs error for a, not relative @@ -241,6 +279,6 @@ def check_grad(self, # get analytical gradients according to different device analytic_grads = self.get_grad(forward_op, backward_op, input_vars, check_names, place) - self.assert_is_close(numeric_grads, analytic_grads, check_names, - max_relative_error, - "Gradient Check On %s" % str(place)) + self.__assert_is_close(numeric_grads, analytic_grads, check_names, + max_relative_error, + "Gradient Check On %s" % str(place)) From 47f380bb4786f93aa95da809a8d7f18d862b78ca Mon Sep 17 00:00:00 2001 From: Yancey Date: Thu, 17 Aug 2017 14:16:04 +0800 Subject: [PATCH 16/21] fix ldconfig (#3547) --- paddle/scripts/docker/build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 7c12664aed795..2941662f349ba 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -146,7 +146,8 @@ RUN apt-get update &&\ pip install /*.whl; apt-get install -f -y && \ apt-get clean -y && \ rm -f /*.whl && \ - paddle version + paddle version && \ + ldconfig ${DOCKERFILE_CUDNN_DSO} ${DOCKERFILE_GPU_ENV} ADD go/cmd/pserver/pserver /usr/bin/ From 5181aefc6bf6d1af1a769879f8cddc9ae9bc2a20 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 17 Aug 2017 14:18:51 +0800 Subject: [PATCH 17/21] tune max relative error for sigmoid op unit test. --- paddle/operators/sigmoid_op.h | 2 +- python/paddle/v2/framework/tests/gradient_checker.py | 12 ++++++------ python/paddle/v2/framework/tests/test_sigmoid_op.py | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/paddle/operators/sigmoid_op.h b/paddle/operators/sigmoid_op.h index 11ab923eb346c..b01a9b3f23283 100644 --- a/paddle/operators/sigmoid_op.h +++ b/paddle/operators/sigmoid_op.h @@ -37,7 +37,7 @@ class SigmoidKernel : public framework::OpKernel { auto Y = EigenVector::Flatten(*output); auto place = context.GetEigenDevice(); - Y.device(place) = 1.0 / (1.0 + (-1.0 * X).exp()); + Y.device(place) = 1. / (1. + (-X).exp()); } }; diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 2c92dfa43e7ee..12f302fe251f6 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -188,10 +188,10 @@ def compare_grad(self, forward_op, input_value): outputs = backward_op.outputs() out_names = [item for k in outputs for item in outputs[k]] - cpu_grads = self.get_grad(forward_op, backward_op, input_value, - out_names, core.CPUPlace()) - gpu_grads = self.get_grad(forward_op, backward_op, input_value, - out_names, core.GPUPlace(0)) + cpu_grads = self.__get_gradient(forward_op, backward_op, input_value, + out_names, core.CPUPlace()) + gpu_grads = self.__get_gradient(forward_op, backward_op, input_value, + out_names, core.GPUPlace(0)) for c_grad, g_grad, name in itertools.izip(cpu_grads, gpu_grads, out_names): @@ -277,8 +277,8 @@ def check_grad(self, check_names = [grad_var_name(name) for name in inputs_to_check] for place in places: # get analytical gradients according to different device - analytic_grads = self.get_grad(forward_op, backward_op, input_vars, - check_names, place) + analytic_grads = self.__get_gradient(forward_op, backward_op, + input_vars, check_names, place) self.__assert_is_close(numeric_grads, analytic_grads, check_names, max_relative_error, "Gradient Check On %s" % str(place)) diff --git a/python/paddle/v2/framework/tests/test_sigmoid_op.py b/python/paddle/v2/framework/tests/test_sigmoid_op.py index c3bd79f5dc0ba..273c2e5ab1a84 100644 --- a/python/paddle/v2/framework/tests/test_sigmoid_op.py +++ b/python/paddle/v2/framework/tests/test_sigmoid_op.py @@ -14,14 +14,14 @@ def setUp(self): class TestSigmoidGradOp(GradientChecker): - def test_compare_grad(self): + def test_grad(self): op = create_op("sigmoid") - inputs = {"X": np.random.random((11, 17)).astype("float32")} + inputs = {"X": np.random.uniform(0.1, 1, [11, 17]).astype("float32")} # compare gpu and cpu results for backward op. - # skip this test if only compiling CPU version. + # this test will be skiped if only compiling CPU version. self.compare_grad(op, inputs) # check gradients - self.check_grad(op, inputs, set("X"), "Y") + self.check_grad(op, inputs, set("X"), "Y", max_relative_error=0.007) if __name__ == '__main__': From 1365f2d15e6f1e02592a5cf5b5f5d07a0eb7f99c Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 17 Aug 2017 14:37:03 +0800 Subject: [PATCH 18/21] Remove R-Value reference in AddOp Fit Google C++ Style --- paddle/operators/net_op.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index e8720c9609979..885ac6eeca659 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -89,13 +89,13 @@ class NetOp : public framework::OperatorBase { /** * @brief Add an operator by ptr */ - void AddOp(std::unique_ptr&& op) { + void AddOp(std::unique_ptr op) { PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); ops_.push_back(std::move(op)); } - void InsertOp(size_t pos, std::unique_ptr&& op) { + void InsertOp(size_t pos, std::unique_ptr op) { PADDLE_ENFORCE(!add_op_done_, "Cannot InsertOp when this network is sealed"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); From 94b58a29d6613f528076269d1332ad9d2f43ec67 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 17 Aug 2017 14:58:21 +0800 Subject: [PATCH 19/21] Follow comments --- paddle/memory/memory.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index dfe9f16f7467a..c99cc541566d2 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -30,8 +30,7 @@ std::once_flag cpu_allocator_flag; std::once_flag gpu_allocator_flag; BuddyAllocator* GetCPUBuddyAllocator() { - static std::unique_ptr a{ - nullptr, [](BuddyAllocator* p) { delete p; }}; + static std::unique_ptr a{nullptr}; std::call_once(cpu_allocator_flag, [&]() { a.reset(new BuddyAllocator(new detail::CPUAllocator, From 017a3818dee89ec1cd2b73b31ced9f6c51a12c8e Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 17 Aug 2017 16:38:15 +0800 Subject: [PATCH 20/21] Add memory.h for unique_ptr --- paddle/memory/memory.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index c99cc541566d2..0266bf4f7d65c 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include // for transform #include // for memcpy +#include // for unique_ptr #include // for call_once #include "paddle/memory/detail/buddy_allocator.h" From 7f8c3f82145dd02cf7d136f27de42a6f0a56024b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 17 Aug 2017 18:02:20 +0800 Subject: [PATCH 21/21] Add MeanOp's Gradient Test And Fix Mean Op Gradient --- paddle/operators/mean_op.h | 3 ++- python/paddle/v2/framework/tests/test_mean_op.py | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index fcb703e63bd5a..9848af280b627 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -55,9 +55,10 @@ class MeanGradKernel : public framework::OpKernel { IG->mutable_data(context.GetPlace()); T ig_size = (T)framework::product(IG->dims()); + Eigen::DSizes bcast(ig_size); EigenVector::Flatten(*IG).device(context.GetEigenDevice()) = - EigenScalar::From(*OG) / ig_size; + (EigenVector::From(*OG) / ig_size).broadcast(bcast); } }; diff --git a/python/paddle/v2/framework/tests/test_mean_op.py b/python/paddle/v2/framework/tests/test_mean_op.py index b5d52b90567bc..f32b3160d651a 100644 --- a/python/paddle/v2/framework/tests/test_mean_op.py +++ b/python/paddle/v2/framework/tests/test_mean_op.py @@ -1,5 +1,6 @@ import unittest from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op import numpy as np @@ -12,5 +13,12 @@ def setUp(self): self.outputs = {'Out': np.mean(self.inputs['X'])} +class MeanGradOpTest(GradientChecker): + def test_normal(self): + op = create_op("mean") + inputs = {"X": np.random.random((10, 10)).astype("float32")} + self.check_grad(op, inputs, set("X"), "Out") + + if __name__ == '__main__': unittest.main()