Skip to content

Commit

Permalink
polish_g_to_l (#5367)
Browse files Browse the repository at this point in the history
  • Loading branch information
kexinzhao authored and wangkuiyi committed Nov 4, 2017
1 parent af760ea commit c0d2ca5
Show file tree
Hide file tree
Showing 11 changed files with 187 additions and 135 deletions.
23 changes: 20 additions & 3 deletions paddle/operators/gather_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,28 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The source input of gather op");
AddInput("Index", "The index input of gather op");
AddOutput("Out", "The output of add op");
AddOutput("Out", "The output of gather op");
AddComment(R"DOC(
Gather Operator by selecting from the first axis,
Gather Operator.
$Out = X[Index]$
Out is obtained by gathering entries of the outer-most dimension
of X indexed by Index and concatenate them together.
Example:
X = [[1, 2],
[3, 4],
[5, 6]]
Index = [[1, 2]]
Then:
Out = [[3, 4],
[5, 6]]
Out = X[Index]
)DOC");
}
};
Expand Down
34 changes: 24 additions & 10 deletions paddle/operators/gaussian_random_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,35 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
GaussianRandomOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "output matrix of random op");
AddComment(R"DOC(
GaussianRandom operator.
Use to initialize tensor with gaussian random generator.
)DOC");
AddOutput("Out", "Output matrix of gaussian random op");

AddAttr<std::vector<int>>("shape", "The dimension of random tensor.");
AddAttr<float>("mean", "mean of random tensor.").SetDefault(.0f);
AddAttr<float>("std", "std of random tensor.").SetDefault(1.0f);
AddAttr<std::vector<int>>("shape",
"(vector<int>) "
"The dimension of random tensor.");
AddAttr<float>("mean",
"(float, default 0.0) "
"mean of random tensor.")
.SetDefault(.0f);
AddAttr<float>("std",
"(float, default 1.0) "
"std of random tensor.")
.SetDefault(1.0f);
AddAttr<int>("seed",
"(int, default 0) "
"Random seed of generator."
"0 means use system wide seed")
"0 means use system wide seed.")
.SetDefault(0);
AddAttr<int>("data_type", "output data type")
AddAttr<int>("data_type",
"(int, default 5(FP32)) "
"Output data type.")
.SetDefault(framework::DataType::FP32);

AddComment(R"DOC(
GaussianRandom Operator.
Used to initialize tensors with gaussian random generator.
)DOC");
}
};

Expand Down
39 changes: 22 additions & 17 deletions paddle/operators/gru_unit_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,21 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("HiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"states of previous time step.");
AddInput("Weight",
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The elements continuous in memory can be divided into two parts. "
"The first part are weights of the update gate and reset gate "
"with shape [frame_size, frame_size * 2], and the second part are "
"weights of output candidate with shape [frame_size, frame_size]");
AddInput("Bias",
"(Tensor) Bias vector with shape [1, frame_size * 3] concating "
"bias of the update gate, reset gate and output candidate.")
AddInput(
"Weight",
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The elements continuous in memory can be divided into two parts. "
"The first part are weights of the update gate and reset gate "
"with shape [frame_size, frame_size * 2], and the second part are "
"weights of output candidate with shape [frame_size, frame_size].");
AddInput(
"Bias",
"(Tensor) Bias vector with shape [1, frame_size * 3] concatenating "
"bias of the update gate, reset gate and output candidate.")
.AsDispensable();
AddOutput("Gate",
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"output of update gate, reset gate and output candidate")
"output of update gate, reset gate and output candidate.")
.AsIntermediate();
AddOutput("ResetHiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
Expand All @@ -112,16 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(sigmoid)
.InEnum({identity, sigmoid, tanh, relu});
AddComment(R"DOC(
GRUUnitOp implements part calculations of the GRU unit as following:
GRUUnit Operator.
\f[
update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_prev)
\f]
This operator implements partial calculations of the GRU unit as follows:
$$
update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r) \\
output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev})
$$
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
)DOC");
}
};
Expand Down
6 changes: 4 additions & 2 deletions paddle/operators/huber_loss_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,12 @@ class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
"The shape is same as Input(X) and will be reused in backward.")
.AsIntermediate();
AddOutput("Out",
"The output tensor with shape [batch_size, 1] which represents "
"the huber loss.");
"The output tensor with shape [batch_size, 1] "
"which represents the huber loss.");
AddAttr<AttrType>("delta", "Hyper parameter in huber loss.");
AddComment(R"DOC(
HuberLoss Operator.
Huber loss is a loss function used in robust regression. We define X as the
input value and Y as the target value. Huber loss can evaluate the fitness of
X to Y. Different from MSE loss, Huber loss is more robust for outliers. The
Expand Down
12 changes: 8 additions & 4 deletions paddle/operators/increment_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,18 @@ class IncrementOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) The input tensor of increment operator");
AddOutput("Out", "(Tensor) The output tensor of increment operator.");
AddComment(R"DOC(Increment operator
The equation is: Out = X + step
)DOC");
AddAttr<AttrType>("step",
"(float, default 1.0) "
"The step size by which the "
"input tensor will be incremented.")
.SetDefault(1.0);
AddComment(R"DOC(
Increment Operator.
The equation is:
$$Out = X + step$$
)DOC");
}
};

Expand Down
2 changes: 1 addition & 1 deletion paddle/operators/l1_norm_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ L1 Norm Operator.
Computes the L1 norm of a tensor.
Out = sum (abs(X))
$$Out = \sum{|X|}$$
)DOC");
}
Expand Down
12 changes: 8 additions & 4 deletions paddle/operators/load_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,18 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker {
LoadOpProtoMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "The tensor need to be loaded");
AddComment(R"DOC(Load Operator
Load operator will load a tensor variable from disk file.
)DOC");
AddOutput("Out", "(Tensor) The tensor need to be loaded");
AddAttr<std::string>("file_path",
"(string) "
"Variable will be loaded from \"file_path\".")
.AddCustomChecker(
[](const std::string &path) { return !path.empty(); });
AddComment(R"DOC(
Load Operator.
Load operator will load a tensor variable from disk file.
)DOC");
}
};
} // namespace operators
Expand Down
26 changes: 16 additions & 10 deletions paddle/operators/lookup_table_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,21 +53,27 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("W",
"An input represents embedding tensors,"
" which is a learnable parameter.");
"An input represents embedding tensors, "
"which is a learnable parameter.");
AddInput("Ids",
"An input with type int32 or int64"
"contains the ids to be looked up in W."
"Ids must be a column vector with rank = 2."
"The 2nd dimension size must be 1");
AddOutput("Out", "The lookup results, which have the same type with W.");
AddAttr<bool>("is_sparse", "Sparse update").SetDefault(false);
"An input with type int32 or int64 "
"contains the ids to be looked up in W. "
"Ids must be a column vector with rank = 2. "
"The 2nd dimension size must be 1.");
AddOutput("Out", "The lookup results, which have the same type as W.");
AddAttr<bool>("is_sparse",
"(boolean, default false) "
"Sparse update")
.SetDefault(false);
AddComment(R"DOC(
Lookup Table Operator.
This operator is used to perform lookups on the parameter W,
then concatenated into a dense tensor.
The input `Ids` can carry the LoD (Level of Details) information,
or not. And the output only shares the LoD with input `Ids`.
The input Ids can carry the LoD (Level of Details) information,
or not. And the output only shares the LoD information with input Ids.
)DOC");
}
};
Expand Down
84 changes: 41 additions & 43 deletions paddle/operators/lrn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,72 +45,70 @@ class LRNOpMaker : public framework::OpProtoAndCheckerMaker {
public:
LRNOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", R"DOC(
(Tensor) The input of LRN operator. It must be a 4D tenor with NCHW format.
)DOC");

AddInput("X",
"(Tensor) The input of LRN operator. "
"It must be a 4D tenor with NCHW format.");
AddOutput("Out",
"(Tensor) The output of LRN operator, which is also the 4D "
"tensor with NCHW format.");
AddOutput("MidOut", R"Doc(
(Tensor)Middle result of lrn op.It's computed in forward process
and also used in backward process.
)Doc");

AddAttr<int>("n", R"DOC(
(int, default 5)n is “adjacent” kernel maps at the same spatial position.
)DOC")
AddOutput("MidOut",
"(Tensor) Middle result of LRN operator. It's computed in "
"forward process and also used in backward process.");

AddAttr<int>("n",
"(int default 5) "
"n is the \"adjacent\" kernel that maps "
"at the same spatial position.")
.SetDefault(5)
.GreaterThan(0);

AddAttr<T>("k", R"DOC(
(float, default 2.0)k is the bias.
)DOC")
AddAttr<T>("k",
"(float, default 2.0) "
"k is the bias.")
.SetDefault(2.0)
.GreaterThan(0.0);

AddAttr<T>("alpha", R"DOC(
(float, default 0.0001)alpha is the scale number.
)DOC")
AddAttr<T>("alpha",
"(float, default 0.0001) "
"alpha is the scale number.")
.SetDefault(0.0001)
.GreaterThan(0.0);

AddAttr<T>("beta", R"DOC(
(float, default 0.75)beta is the power number.
)DOC")
AddAttr<T>("beta",
"(float, default 0.75) "
"beta is the power number.")
.SetDefault(0.75)
.GreaterThan(0.0);

AddComment(R"DOC(
Local Response Normalization.
This Function comes from the paper
"ImageNet Classification with Deep Convolutional Neural Networks".
Local Response Normalization Operator.
The original formula is:
This operator comes from the paper
"ImageNet Classification with Deep Convolutional Neural Networks".
Input(i, x, y)
Output(i, x, y) = ----------------------------------------------
-- upper
(k + alpha * > (Input(j, x, y))^2) ^ (beta)
-- j = lower
The original formula is:
upper is `min(C, c + n/2)`
lower if `max(0, c - n/2)`
$$
Output(i, x, y) = Input(i, x, y) / \left(
k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)}
(Input(j, x, y))^2
\right)^{\beta}
$$
Function implementation:
Function implementation:
inputs and outpus is NCHW format, while input.shape.ndims() is equal 4.
And the meaning of each dimension(0-3) is respectively batch size,
feature maps, rows and columns.
Inputs and outpus are in NCHW format, while input.shape.ndims() equals 4.
And dimensions 0 ~ 3 represent batch size, feature maps, rows,
and columns, respectively.
Input and Output in the above formula is for each map(i) of one image, and
Input(i, x, y), Output(i, x, y) represents an element in an image.
Input and Output in the formula above is for each map(i) of one image, and
Input(i, x, y), Output(i, x, y) represents an element in an image.
C is the number of feature maps of one image, and n is a hyper-parameters
is configured when Function is initialized. The sum in the denominator
is the sum of the same position in the neighboring maps.
)DOC");
C is the number of feature maps of one image. n is a hyper-parameter
configured when operator is initialized. The sum in the denominator
is the sum of the same positions in the neighboring maps.
)DOC");
}
};

Expand Down
Loading

0 comments on commit c0d2ca5

Please sign in to comment.