-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enhance sequence_expand operator #9100
Changes from all commits
352fa41
bf3f56e
58730ba
3b03e37
2f2c5f5
2c22552
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,23 +17,79 @@ limitations under the License. */ | |
namespace paddle { | ||
namespace operators { | ||
|
||
using framework::Tensor; | ||
using framework::LoDTensor; | ||
|
||
class SequenceExpandOp : public framework::OperatorWithKernel { | ||
public: | ||
using framework::OperatorWithKernel::OperatorWithKernel; | ||
|
||
protected: | ||
void InferShape(framework::InferShapeContext* ctx) const override { | ||
PADDLE_ENFORCE(ctx->HasInput("X")); | ||
PADDLE_ENFORCE(ctx->HasOutput("Out")); | ||
PADDLE_ENFORCE(ctx->HasInput("Y")); | ||
framework::DDim out_dim; | ||
auto y_dim = ctx->GetInputDim("Y"); | ||
out_dim = ctx->GetInputDim("X"); | ||
out_dim[0] = y_dim[0]; | ||
ctx->ShareLoD("Y", "Out"); | ||
ctx->SetOutputDim("Out", out_dim); | ||
PADDLE_ENFORCE(ctx->HasInput("X"), | ||
"Input(X) of SequenceExpandOp should not be null."); | ||
PADDLE_ENFORCE(ctx->HasInput("Y"), | ||
"Input(Y) of SequenceExpandOp should not be null."); | ||
PADDLE_ENFORCE(ctx->HasOutput("Out"), | ||
"Output(Out) of SequenceExpandOp should not be null."); | ||
|
||
auto x_dims = ctx->GetInputDim("X"); | ||
auto out_dims = x_dims; | ||
int ref_level = ctx->Attrs().Get<int>("ref_level"); | ||
|
||
PADDLE_ENFORCE_GE(x_dims.size(), 2, | ||
"Dimension number of Input(X) should be at least 2."); | ||
|
||
if (ctx->IsRuntime()) { | ||
framework::Variable* x_var = | ||
boost::get<framework::Variable*>(ctx->GetInputVarPtrs("X")[0]); | ||
framework::Variable* y_var = | ||
boost::get<framework::Variable*>(ctx->GetInputVarPtrs("Y")[0]); | ||
|
||
auto& x_lod = x_var->Get<LoDTensor>().lod(); | ||
auto& y_lod = y_var->Get<LoDTensor>().lod(); | ||
|
||
PADDLE_ENFORCE_LE(x_lod.size(), 1, | ||
"Level number of Input(X)'s lod should not be " | ||
"greater than 1."); | ||
PADDLE_ENFORCE_GT(y_lod.size(), 0, | ||
"Level number of Input(Y)'s lod should be " | ||
"greater than 0."); | ||
PADDLE_ENFORCE( | ||
ref_level == -1 || | ||
(ref_level >= 0 && ref_level < static_cast<int>(y_lod.size())), | ||
"Invlid `ref_level`, which should be either equal to -1 " | ||
"or in [0, %d)", | ||
y_lod.size()); | ||
|
||
if (ref_level == -1) ref_level = y_lod.size() - 1; | ||
|
||
if (x_lod.size() > 0) { | ||
PADDLE_ENFORCE(x_lod[0].size() == y_lod[ref_level].size(), | ||
"Level number of Input(X)'s lod could be 0. Otherwise " | ||
"size of Input(X)'s first level lod should be equal to " | ||
"size of Input(Y)'s referred level lod."); | ||
} | ||
|
||
int64_t out_first_dim = 0; | ||
if (y_lod[ref_level].size() <= 1) { | ||
out_first_dim = x_dims[0]; | ||
} else { | ||
for (size_t i = 1; i < y_lod[ref_level].size(); ++i) { | ||
int x_seq_len = 1; | ||
if (x_lod.size() == 1) { | ||
x_seq_len = x_lod[0][i] - x_lod[0][i - 1]; | ||
} | ||
out_first_dim += | ||
(y_lod[ref_level][i] - y_lod[ref_level][i - 1]) * x_seq_len; | ||
} | ||
} | ||
out_dims[0] = out_first_dim; | ||
ctx->SetOutputDim("Out", out_dims); | ||
} else { | ||
out_dims[0] = -1; | ||
ctx->SetOutputDim("Out", out_dims); | ||
ctx->ShareLoD("X", /*->*/ "Out"); | ||
} | ||
} | ||
}; | ||
|
||
|
@@ -42,83 +98,81 @@ class SequenceExpandOpMaker : public framework::OpProtoAndCheckerMaker { | |
SequenceExpandOpMaker(OpProto* proto, OpAttrChecker* op_checker) | ||
: OpProtoAndCheckerMaker(proto, op_checker) { | ||
AddInput("X", | ||
"(Tensor or LoDTensor) The input(X) of this operator can be a " | ||
"LoDTensor or a base Tensor."); | ||
"(LoDTensor, default LoDTensor<float>) A 2-D LoDTensor whose lod " | ||
"level is at most 1."); | ||
AddInput("Y", | ||
"(LoDTensor)The reference input(Y) of sequence_expand op." | ||
"It must be a LoDTensor with k-level(k>0)." | ||
"The input(X) will be expanded according to LOD of input(Y)." | ||
"The element numbers of last level in input(Y) " | ||
"must be equal to dims[0] of input(X)."); | ||
"(LoDTensor, default LoDTensor<float>) Referred LoDTensor whose " | ||
"lod (specified level) is referred by Input(X)."); | ||
AddOutput("Out", | ||
"(LodTensor)The output of sequence_expand op." | ||
"The lod of output will be as same as input(Y)'s lod."); | ||
"(LodTensor, default LoDTensor<float>) Output LoDTensor which is " | ||
"generated from Input(X) by referring lod of Input(Y)."); | ||
AddAttr<int>("ref_level", "Specify lod level of Input(Y).").SetDefault(-1); | ||
AddComment(R"DOC( | ||
Sequence Expand Operator. | ||
|
||
This operator expands input(X) according to LOD of input(Y). | ||
This operator expands `X` according to specified level lod of `Y`. Current | ||
implementation constaints that lod level of `X` should be at most 1. Attribute | ||
`ref_level` is used to specify which level lod of `Y` is referred to expand `X`. | ||
If set `ref_level` to -1, then last level lod of `Y` would be referred. | ||
Please note, rank of `X` should be at least 2, when the rank exceeds 2, `X` | ||
would be viewed as a 2-D tensor. | ||
|
||
Following are cases to better explain how this works: | ||
|
||
Case 1: | ||
|
||
Given a 2-level LoDTensor input(X) | ||
X.lod = [[0, 2, 3], | ||
[0, 1, 3, 4]] | ||
X.data = [a, b, c, d] | ||
Given a 1-level LoDTensor input(X) | ||
X.lod = [[0, 2, 4]] | ||
X.data = [[a], [b], [c], [d]] | ||
X.dims = [4, 1] | ||
and input(Y) | ||
Y.lod = [[0, 2, 4], | ||
[0, 3, 6, 7, 8]] | ||
with condition len(Y.lod[-1]) -1 == X.dims[0] | ||
then we get 2-level LoDTensor | ||
Out.lod = [[0, 2, 4], | ||
[0, 3, 6, 7, 8]] | ||
Out.data = [a, a, a, b, b, b, c, d] | ||
ref_level: 0 | ||
then we get 1-level LoDTensor | ||
Out.lod = [[0, 2, 4, 6, 8]] | ||
Out.data = [[a], [b], [a], [b], [c], [d], [c], [d]] | ||
Out.dims = [8, 1] | ||
|
||
Case 2: | ||
|
||
Given 1-level LoDTensor input(X) | ||
X.lod = [[0, 1, 4]] | ||
X.data = [[a], [b], [c], [d]] | ||
X.dims = [4, 1] | ||
and input(Y) | ||
Y.lod = [[0, 2, 4], | ||
[0, 3, 6, 6, 8]] | ||
ref_level: 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what would happen if this if -1? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When set |
||
then we get 1-level LoDTensor | ||
Out.lod = [[0, 1, 2, 5, 8]] | ||
Out.data = [[a], [a], [b], [c], [d], [b], [c], [d]] | ||
Out.dims = [8, 1] | ||
|
||
Case 3: | ||
|
||
Given a common Tensor input(X) | ||
X.data = [a, b, c] | ||
X.data = [[a], [b], [c]] | ||
X.dims = [3, 1] | ||
and input(Y) | ||
Y.lod = [[0, 2, 3, 6]] | ||
with condition len(Y.lod[-1]) -1 == X.dims[0] | ||
then we get 1-level LoDTensor | ||
Out.lod = [[0, 2, 3, 6]] | ||
Out.data = [a, a, b, c, c, c] | ||
ref_level: -1 | ||
then we get a common Tensor | ||
Out.data = [[a], [a], [b], [c], [c], [c]] | ||
Out.dims = [6, 1] | ||
|
||
Case 3: | ||
Case 4: | ||
|
||
Given a common Tensor input(X) | ||
X.data = [[a, b], [c, d], [e, f]] | ||
X.dims = [3, 2] | ||
and input(Y) | ||
Y.lod = [[0, 2, 3, 6]] | ||
with condition len(Y.lod[-1]) -1 == X.dims[0] | ||
then we get 1-level LoDTensor | ||
Out.lod = [[0, 2, 3, 6]] | ||
Out.data = [[a,b], [a,b] [c,d], [e, f], [e, f], [e, f]] | ||
ref_level: 0 | ||
then we get a common LoDTensor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The LoD of output tensor should be given here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out is a Tensor. |
||
Out.data = [[a, b], [a, b] [c, d], [e, f], [e, f], [e, f]] | ||
Out.dims = [6, 2] | ||
|
||
Case 4: | ||
|
||
Given 2-level a LoDTensor input(X) | ||
X.lod = [[0, 2, 3], | ||
[0, 1, 3, 4]] | ||
X.data = [a, b, c, d] | ||
X.dims = [4, 1] | ||
and input(Y) | ||
Y.lod = [[0, 2, 4], | ||
[0, 3, 6, 6, 8]] | ||
with condition len(Y.lod[-1]) -1 == X.dims[0] | ||
then we get 2-level LoDTensor | ||
Out.lod = [[0, 2, 4], | ||
[0, 3, 6, 6, 8]] | ||
Out.data = [a, a, a, b, b, b, d, d] | ||
Out.dims = [8, 1] | ||
|
||
|
||
)DOC"); | ||
} | ||
}; | ||
|
@@ -129,12 +183,14 @@ class SequenceExpandOpGrad : public framework::OperatorWithKernel { | |
|
||
protected: | ||
void InferShape(framework::InferShapeContext* ctx) const override { | ||
PADDLE_ENFORCE(ctx->HasInput("X")); | ||
PADDLE_ENFORCE(ctx->HasInput("Out")); | ||
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); | ||
PADDLE_ENFORCE(ctx->HasInput("Out"), "Input(Out) should not be null."); | ||
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), | ||
"The input(Out@GRAD) should not be null"); | ||
"Input(Out@GRAD) should not be null."); | ||
|
||
auto x_dims = ctx->GetInputDim("X"); | ||
auto x_grad_name = framework::GradVarName("X"); | ||
|
||
if (ctx->HasOutput(x_grad_name)) { | ||
ctx->SetOutputDim(x_grad_name, x_dims); | ||
} | ||
|
@@ -149,7 +205,13 @@ REGISTER_OP(sequence_expand, ops::SequenceExpandOp, ops::SequenceExpandOpMaker, | |
sequence_expand_grad, ops::SequenceExpandOpGrad); | ||
REGISTER_OP_CPU_KERNEL( | ||
sequence_expand, | ||
ops::SequenceExpandKernel<paddle::platform::CPUDeviceContext, float>); | ||
ops::SequenceExpandKernel<paddle::platform::CPUDeviceContext, float>, | ||
ops::SequenceExpandKernel<paddle::platform::CPUDeviceContext, double>, | ||
ops::SequenceExpandKernel<paddle::platform::CPUDeviceContext, int>, | ||
ops::SequenceExpandKernel<paddle::platform::CPUDeviceContext, int64_t>); | ||
REGISTER_OP_CPU_KERNEL( | ||
sequence_expand_grad, | ||
ops::SequenceExpandGradKernel<paddle::platform::CPUDeviceContext, float>); | ||
ops::SequenceExpandGradKernel<paddle::platform::CPUDeviceContext, float>, | ||
ops::SequenceExpandGradKernel<paddle::platform::CPUDeviceContext, double>, | ||
ops::SequenceExpandGradKernel<paddle::platform::CPUDeviceContext, int>, | ||
ops::SequenceExpandGradKernel<paddle::platform::CPUDeviceContext, int64_t>); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
constaints -> requires
I feel the semantics of this op very difficult to understand. Is there an op in other framework that has the same effect?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think so, since LoDTensor is a peculiar concept in PaddlePaddle and this operator is dependent on the LoD of input variable.