-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding the implementation for rmsprop operator #4565
Changes from 2 commits
61c03f9
163d287
94855f4
fa12e51
0336304
f52cdaa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include "paddle/operators/rmsprop_op.h" | ||
|
||
namespace paddle { | ||
namespace operators { | ||
|
||
class RmspropOp : public framework::OperatorWithKernel { | ||
public: | ||
using framework::OperatorWithKernel::OperatorWithKernel; | ||
|
||
protected: | ||
void InferShape(framework::InferShapeContextBase *ctx) const override { | ||
PADDLE_ENFORCE(ctx->HasInput("Param"), | ||
"Input(Param) of RmspropOp should not be null."); | ||
PADDLE_ENFORCE(ctx->HasInput("Grad"), | ||
"Input(Grad) of RmspropOp should not be null."); | ||
PADDLE_ENFORCE(ctx->HasInput("Moment"), | ||
"Input(Moment) of RmspropOp should not be null."); | ||
PADDLE_ENFORCE(ctx->HasInput("LearningRate"), | ||
"Input(LearningRate) of RmspropOp should not be null."); | ||
|
||
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), | ||
"Output(param_out) of RmspropOp should not be null."); | ||
PADDLE_ENFORCE(ctx->HasOutput("MomentOut"), | ||
"Output(moment_out) of RmspropOp should not be null."); | ||
|
||
auto param_dim = ctx->GetInputDim("Param"); | ||
PADDLE_ENFORCE_EQ( | ||
param_dim, ctx->GetInputDim("Grad"), | ||
"Param and grad input of RmspropOp should have the same dimension."); | ||
PADDLE_ENFORCE_EQ( | ||
param_dim, ctx->GetInputDim("Moment"), | ||
"Param and moment input of RmspropOp should have the same dimension."); | ||
|
||
auto lr_dim = ctx->GetInputDim("LearningRate"); | ||
PADDLE_ENFORCE_EQ(framework::product(lr_dim), 1, | ||
"Learning Rate should be a scalar."); | ||
|
||
ctx->SetOutputDim("ParamOut", param_dim); | ||
ctx->SetOutputDim("MomentOut", param_dim); | ||
} | ||
}; | ||
|
||
class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { | ||
public: | ||
RmspropOpMaker(framework::OpProto *proto, | ||
framework::OpAttrChecker *op_checker) | ||
: OpProtoAndCheckerMaker(proto, op_checker) { | ||
AddInput("Param", "Input parameter"); | ||
AddInput("Grad", "Input gradient"); | ||
AddInput("Moment", "Second moment"); | ||
AddInput("LearningRate", "Learning Rate"); | ||
|
||
AddOutput("ParamOut", "Output parameter"); | ||
AddOutput("MomentOut", "Output second moment"); | ||
|
||
AddAttr<float>("epsilon", "Constant for numerical stability"); | ||
AddAttr<float>("decayRate", "Decay rate for moving average of gradients"); | ||
AddComment(R"DOC( | ||
|
||
RMSprop | ||
|
||
MomentOut = decayRate * Moment + (1 - decayRate) * Grad * Grad | ||
ParamOut = Param - LearningRate * Grad / (sqrt(MomentOut) + epsilon) | ||
|
||
The original slide(Slide 29 of | ||
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) | ||
does not have the epsilon attribute. It is added here for numerical stability | ||
to avoid division by zero. | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We'd better use the same common used parameter name since it is a popular optimizer. User used to the same name between frameworks. https://github.com/tensorflow/tensorflow/blob/994226a4a992c4a0205bca9e2f394cb644775ad7/tensorflow/core/ops/training_ops.cc#L1281 |
||
)DOC"); | ||
} | ||
}; | ||
} // namespace operators | ||
} // namespace paddle | ||
|
||
namespace ops = paddle::operators; | ||
REGISTER_OP_WITHOUT_GRADIENT(rmsprop, ops::RmspropOp, ops::RmspropOpMaker); | ||
REGISTER_OP_CPU_KERNEL(rmsprop, | ||
ops::RmspropOpKernel<paddle::platform::CPUPlace, float>); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#define EIGEN_USE_GPU | ||
#include "paddle/operators/rmsprop_op.h" | ||
|
||
namespace ops = paddle::operators; | ||
REGISTER_OP_GPU_KERNEL(rmsprop, | ||
ops::RmspropOpKernel<paddle::platform::GPUPlace, float>); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#pragma once | ||
#include "paddle/framework/eigen.h" | ||
#include "paddle/framework/op_registry.h" | ||
|
||
namespace paddle { | ||
namespace operators { | ||
|
||
using Tensor = framework::Tensor; | ||
template <typename T, int MajorType = Eigen::RowMajor, | ||
typename IndexType = Eigen::DenseIndex> | ||
using EigenVector = framework::EigenVector<T, MajorType, IndexType>; | ||
|
||
template <typename Place, typename T> | ||
class RmspropOpKernel : public framework::OpKernel<T> { | ||
public: | ||
void Compute(const framework::ExecutionContext& ctx) const override { | ||
auto param_out = ctx.Output<Tensor>("ParamOut"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here please use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, I just assumed for now that auto will resolve this by itself. But I see the point of make it more understandable for users. Will fix. |
||
auto moment_out = ctx.Output<Tensor>("MomentOut"); | ||
|
||
param_out->mutable_data<T>(ctx.GetPlace()); | ||
moment_out->mutable_data<T>(ctx.GetPlace()); | ||
|
||
float epsilon = ctx.Attr<float>("epsilon"); | ||
float decay = ctx.Attr<float>("decayRate"); | ||
|
||
auto p = EigenVector<T>::Flatten(*ctx.Input<Tensor>("Param")); | ||
auto g = EigenVector<T>::Flatten(*ctx.Input<Tensor>("Grad")); | ||
auto m = EigenVector<T>::Flatten(*ctx.Input<Tensor>("Moment")); | ||
float lr = ctx.Input<Tensor>("LearningRate")->data<float>()[0]; | ||
auto p_out = EigenVector<T>::Flatten(*param_out); | ||
auto m_out = EigenVector<T>::Flatten(*moment_out); | ||
auto place = ctx.GetEigenDevice<Place>(); | ||
|
||
m_out.device(place) = decay * m + (1 - decay) * g * g; | ||
p_out.device(place) = p - lr * g / (m_out.sqrt() + epsilon); | ||
} | ||
}; | ||
|
||
} // namespace operators | ||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import unittest | ||
import numpy as np | ||
from op_test import OpTest | ||
|
||
|
||
class TestRmspropOp(OpTest): | ||
def setUp(self): | ||
self.op_type = "rmsprop" | ||
|
||
param = np.random.random((123, 321)).astype("float32") | ||
grad = np.random.random((123, 321)).astype("float32") | ||
moment = np.zeros((123, 321)).astype("float32") | ||
learning_rate = np.array([0.01]).astype("float32") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is 'learning_rate' not an attribute? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As Abhinav commented above, I think I will retain this as an input for now, given that we are doing the same in all other PRs too. |
||
|
||
epsilon = 1e-6 | ||
decay_rate = 0.9 | ||
|
||
self.inputs = { | ||
'Param': param, | ||
'Grad': grad, | ||
'Moment': moment, | ||
'LearningRate': learning_rate | ||
} | ||
|
||
self.attrs = {'epsilon': epsilon, 'decayRate': decay_rate} | ||
|
||
moment_out = decay_rate * moment + (1 - decay_rate) * grad * grad | ||
param_out = param - learning_rate * grad / (np.sqrt(moment_out) + | ||
epsilon) | ||
|
||
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out} | ||
|
||
def test_check_output(self): | ||
self.check_output() | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
here is a typo. It is the
momentum
. And, the comment is not helpful, we should make the comment self-explained, in format of(type):comment
. e.g.(tensor): blabla
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a good point, will fix this.