-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding the implementation for rmsprop operator #4565
Changes from 1 commit
61c03f9
163d287
94855f4
fa12e51
0336304
f52cdaa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include "paddle/operators/rmsprop_op.h" | ||
|
||
namespace paddle { | ||
namespace operators { | ||
|
||
class RmspropOp : public framework::OperatorWithKernel { | ||
public: | ||
using framework::OperatorWithKernel::OperatorWithKernel; | ||
|
||
protected: | ||
void InferShape(framework::InferShapeContextBase *ctx) const override { | ||
PADDLE_ENFORCE(ctx->HasInput("Param"), | ||
"Input(param) of RmspropOp should not be null."); | ||
PADDLE_ENFORCE(ctx->HasInput("Grad"), | ||
"Input(grad) of RmspropOp should not be null."); | ||
PADDLE_ENFORCE(ctx->HasInput("Moment"), | ||
"Input(moment) of RmspropOp should not be null."); | ||
|
||
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), | ||
"Output(param_out) of RmspropOp should not be null."); | ||
PADDLE_ENFORCE(ctx->HasOutput("MomentOut"), | ||
"Output(moment_out) of RmspropOp should not be null."); | ||
|
||
auto param_dim = ctx->GetInputDim("Param"); | ||
PADDLE_ENFORCE_EQ( | ||
param_dim, ctx->GetInputDim("Grad"), | ||
"Param and grad input of RmspropOp should have the same dimension."); | ||
PADDLE_ENFORCE_EQ( | ||
param_dim, ctx->GetInputDim("Moment"), | ||
"Param and moment input of RmspropOp should have the same dimension."); | ||
|
||
ctx->SetOutputDim("ParamOut", param_dim); | ||
ctx->SetOutputDim("MomentOut", param_dim); | ||
} | ||
}; | ||
|
||
class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { | ||
public: | ||
RmspropOpMaker(framework::OpProto *proto, | ||
framework::OpAttrChecker *op_checker) | ||
: OpProtoAndCheckerMaker(proto, op_checker) { | ||
AddInput("Param", "Input parameter"); | ||
AddInput("Grad", "Input gradient"); | ||
AddInput("Moment", "Second moment"); | ||
|
||
AddOutput("ParamOut", "Output parameter"); | ||
AddOutput("MomentOut", "Output second moment"); | ||
|
||
AddAttr<float>("learningRate", "Learning rate"); | ||
AddAttr<float>("epsilon", "Constant for numerical stability"); | ||
AddAttr<float>("decayRate", "Decay rate for moving average of gradients"); | ||
AddComment(R"DOC( | ||
|
||
RMSprop | ||
|
||
MomentOut = decayRate * Moment + (1 - decayRate) * Grad * Grad | ||
ParamOut = Param - learningRate * Grad / (sqrt(MomentOut) + epsilon) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the Paddle old version, tensorflow, caffe2 had implemented rmsprop algorithm. They all follow the paper's formula parameter names, users used to use the same name between different version of our framework. |
||
|
||
The original slide(Slide 29 of | ||
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) | ||
does not have the epsilon attribute. It is added here for numerical stability | ||
to avoid division by zero. | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We'd better use the same common used parameter name since it is a popular optimizer. User used to the same name between frameworks. https://github.com/tensorflow/tensorflow/blob/994226a4a992c4a0205bca9e2f394cb644775ad7/tensorflow/core/ops/training_ops.cc#L1281 |
||
)DOC"); | ||
} | ||
}; | ||
} // namespace operators | ||
} // namespace paddle | ||
|
||
namespace ops = paddle::operators; | ||
REGISTER_OP_WITHOUT_GRADIENT(rmsprop, ops::RmspropOp, ops::RmspropOpMaker); | ||
REGISTER_OP_CPU_KERNEL(rmsprop, | ||
ops::RmspropOpKernel<paddle::platform::CPUPlace, float>); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#define EIGEN_USE_GPU | ||
#include "paddle/operators/rmsprop_op.h" | ||
|
||
namespace ops = paddle::operators; | ||
REGISTER_OP_GPU_KERNEL(rmsprop, | ||
ops::RmspropOpKernel<paddle::platform::GPUPlace, float>); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#pragma once | ||
#include "paddle/framework/eigen.h" | ||
#include "paddle/framework/op_registry.h" | ||
|
||
namespace paddle { | ||
namespace operators { | ||
|
||
using Tensor = framework::Tensor; | ||
template <typename T, int MajorType = Eigen::RowMajor, | ||
typename IndexType = Eigen::DenseIndex> | ||
using EigenVector = framework::EigenVector<T, MajorType, IndexType>; | ||
|
||
template <typename Place, typename T> | ||
class RmspropOpKernel : public framework::OpKernel<T> { | ||
public: | ||
void Compute(const framework::ExecutionContext& ctx) const override { | ||
auto param_out = ctx.Output<Tensor>("ParamOut"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here please use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, I just assumed for now that auto will resolve this by itself. But I see the point of make it more understandable for users. Will fix. |
||
auto moment_out = ctx.Output<Tensor>("MomentOut"); | ||
|
||
param_out->mutable_data<T>(ctx.GetPlace()); | ||
moment_out->mutable_data<T>(ctx.GetPlace()); | ||
|
||
float lr = ctx.Attr<float>("learningRate"); | ||
float epsilon = ctx.Attr<float>("epsilon"); | ||
float decay = ctx.Attr<float>("decayRate"); | ||
|
||
auto p = EigenVector<T>::Flatten(*ctx.Input<Tensor>("Param")); | ||
auto g = EigenVector<T>::Flatten(*ctx.Input<Tensor>("Grad")); | ||
auto m = EigenVector<T>::Flatten(*ctx.Input<Tensor>("Moment")); | ||
auto p_out = EigenVector<T>::Flatten(*param_out); | ||
auto m_out = EigenVector<T>::Flatten(*moment_out); | ||
auto place = ctx.GetEigenDevice<Place>(); | ||
|
||
m_out.device(place) = decay * m + (1 - decay) * g * g; | ||
p_out.device(place) = p - lr * g / (m_out.sqrt() + epsilon); | ||
} | ||
}; | ||
|
||
} // namespace operators | ||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import unittest | ||
import numpy as np | ||
from op_test import OpTest | ||
|
||
|
||
class TestRmspropOp(OpTest): | ||
def setUp(self): | ||
self.op_type = "rmsprop" | ||
|
||
param = np.random.random((123, 321)).astype("float32") | ||
grad = np.random.random((123, 321)).astype("float32") | ||
moment = np.zeros((123, 321)).astype("float32") | ||
|
||
learning_rate = 0.01 | ||
epsilon = 1e-6 | ||
decay_rate = 0.9 | ||
|
||
self.inputs = {'Param': param, 'Grad': grad, 'Moment': moment} | ||
|
||
self.attrs = { | ||
'learningRate': learning_rate, | ||
'epsilon': epsilon, | ||
'decayRate': decay_rate | ||
} | ||
|
||
moment_out = decay_rate * moment + (1 - decay_rate) * grad * grad | ||
param_out = param - learning_rate * grad / (np.sqrt(moment_out) + | ||
epsilon) | ||
|
||
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out} | ||
|
||
def test_check_output(self): | ||
self.check_output() | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
here is a typo. It is the
momentum
. And, the comment is not helpful, we should make the comment self-explained, in format of(type):comment
. e.g.(tensor): blabla
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a good point, will fix this.