diff --git a/paddle/operators/linear_chain_crf_op.cc b/paddle/operators/linear_chain_crf_op.cc index 6864e3b0b7ace6..bcb48e13bd948b 100644 --- a/paddle/operators/linear_chain_crf_op.cc +++ b/paddle/operators/linear_chain_crf_op.cc @@ -23,21 +23,21 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("Emission", - "(LoDTensor, default: LoDTensor). " - "A 2-D LoDTensor with shape [N x D] where N is the size of the " + "(LoDTensor, default LoDTensor) " + "A 2-D LoDTensor with shape [N x D], where N is the size of the " "mini-batch and D is the total tag number. The unscaled emission " "weight matrix for the linear chain CRF. "); AddInput("Transition", - "(Tensor, default: Tensor). A 2-D Tensor with shape " + "(Tensor, default Tensor) A 2-D Tensor with shape " "[(D + 2) x D]. The learnable parameter for the linear_chain_crf " "operator. See more details in the operator's comments."); AddInput("Label", - "(LoDTensor, default: LoDTensor). A LoDTensor with shape " + "(LoDTensor, default LoDTensor) A LoDTensor with shape " "[N x 1], where N is the total element number in a mini-batch. " "The ground truth."); AddOutput( "Alpha", - "(Tensor, default: Tensor). A 2-D Tensor with shape [N x D]. " + "(Tensor, default Tensor) A 2-D Tensor with shape [N x D]. " "The forward vectors for the entire batch. Denote it as \f$\alpha\f$. " "\f$\alpha$\f is a memo table used to calculate the normalization " "factor in CRF. \f$\alpha[k, v]$\f stores the unnormalized " @@ -49,26 +49,28 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { .AsIntermediate(); AddOutput( "EmissionExps", - "(Tensor, default: Tensor). A 2-D Tensor with shape [N x D]. " + "(Tensor, default Tensor) A 2-D Tensor with shape [N x D]. " "The exponentials of Input(Emission). This is an intermediate " "computational result in forward computation, and will be reused in " "backward computation.") .AsIntermediate(); AddOutput( "TransitionExps", - "(Tensor, default: Tensor). A 2-D Tensor with shape " + "(Tensor, default Tensor) A 2-D Tensor with shape " "[(D + 2) x D]. The exponentials of Input(Transition). This is an " "intermediate computational result in forward computation, and " "will be reused in backward computation.") .AsIntermediate(); AddOutput( "LogLikelihood", - "(Tensor, default: Tensor). The logarithm of the conditional " + "(Tensor, default Tensor) The logarithm of the conditional " "likelihood of each training sample in a mini-batch. This is a 2-D " "tensor with shape [S x 1], where S is the sequence number in a " "mini-batch. Note: S is equal to the sequence number in a mini-batch. " "The output is no longer a LoDTensor."); AddComment(R"DOC( +LinearChainCRF Operator. + Conditional Random Field defines an undirected probabilistic graph with nodes denoting random variables and edges denoting dependencies between these variables. CRF learns the conditional probability \f$P(Y|X)\f$, where @@ -82,29 +84,28 @@ and output must be linear sequences. Thus, the graph of such a CRF is a simple chain or a line, which results in the linear chain CRF. This operator implements the Forward-Backward algorithm for the linear chain -CRF. Please see http://www.cs.columbia.edu/~mcollins/fb.pdf and -http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for reference. +CRF. Please refer to http://www.cs.columbia.edu/~mcollins/fb.pdf and +http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for details. Equation: - -- Denote Input(Emission) to this operator as \f$x\f$ here. -- The first D values of Input(Transition) to this operator are for starting +1. Denote Input(Emission) to this operator as \f$x\f$ here. +2. The first D values of Input(Transition) to this operator are for starting weights, denoted as \f$a\f$ here. -- The next D values of Input(Transition) of this operator are for ending +3. The next D values of Input(Transition) of this operator are for ending weights, denoted as \f$b\f$ here. -- The remaning values of Input(Transition) are for transition weights, +4. The remaning values of Input(Transition) are for transition weights, denoted as \f$w\f$ here. -- Denote Input(Label) as \f$s\f$ here. +5. Denote Input(Label) as \f$s\f$ here. The probability of a sequence \f$s\f$ of length \f$L\f$ is defined as: -\f$P(s) = (1/Z) exp(a_{s_1} + b_{s_L} +\f$P(s) = (1/Z) \exp(a_{s_1} + b_{s_L} + \sum_{l=1}^L x_{s_l} + \sum_{l=2}^L w_{s_{l-1},s_l})\f$ where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$ over all possible sequences is \f$1\f$, and \f$x\f$ is the emission feature weight to the linear chain CRF. -Finaly, the linear chain CRF operator outputs the logarithm of the conditional +Finally, the linear chain CRF operator outputs the logarithm of the conditional likelihood of each training sample in a mini-batch. NOTE: diff --git a/paddle/operators/nccl_op.cc b/paddle/operators/nccl_op.cc index d39cb2fcf9cc20..66fcc09bc87786 100644 --- a/paddle/operators/nccl_op.cc +++ b/paddle/operators/nccl_op.cc @@ -48,12 +48,17 @@ class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddOutput("Communicator", "Create Communicator for communicating between gpus"); - AddAttr>("gpus", "gpu id lists"); - AddAttr("data_type", "output data type") + AddAttr>("gpus", "(vector) GPU id lists"); + AddAttr("data_type", + "(int, default 5 (FP32)) " + "Output data type") .SetDefault(framework::DataType::FP32); AddComment(R"DOC( - create communicator. - )DOC"); +NCCLInit Operator. + +Create communicator. + +)DOC"); } }; @@ -143,11 +148,15 @@ class NCCLAllReduceOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Communicator", "Communicator for communicating between gpus"); AddOutput("Out", "The output of AllReduce op"); AddAttr("reduction", + "(string, default 'ncclSum') " "{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.") .SetDefault("ncclSum"); AddComment(R"DOC( - AllReduce the input tensors. - )DOC"); +NCCLAllReduce Operator. + +AllReduce the input tensors. + +)DOC"); } }; @@ -161,14 +170,20 @@ class NCCLReduceOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Communicator", "Communicator for communicating between gpus"); AddOutput("Out", "The output of Reduce op"); AddAttr("reduction", + "(string, default 'ncclSum') " "{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.") .SetDefault("ncclSum"); AddAttr("root", - "root gpu of the parameter. if not " - "set(platform::kInvalidGPUId). hashed by name.") + "(int, default kInvalidGPUId) " + "Root gpu of the parameter. If not, " + "set(platform::kInvalidGPUId). Hashed by name.") .SetDefault(platform::kInvalidGPUId); AddComment(R"DOC( - Reduce the tensors)DOC"); +NCCLReduce Operator. + +Reduce the tensors. + +)DOC"); } }; @@ -182,12 +197,16 @@ class NCCLBcastOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Communicator", "Communicator for communicating between gpus"); AddOutput("Out", "The output of Bcast"); AddAttr("root", - "root gpu of the parameter. if not " - "set(platform::kInvalidGPUId). hashed by name.") + "(int, default kInvalidGPUId) " + "Root gpu of the parameter. If not, " + "set(platform::kInvalidGPUId). Hashed by name.") .SetDefault(platform::kInvalidGPUId); AddComment(R"DOC( - Bcast the tensors. - )DOC"); +NCCLBcast Operator. + +Bcast the tensors. + +)DOC"); } }; diff --git a/paddle/operators/pad_op.cc b/paddle/operators/pad_op.cc index 73a0b8baff5308..adb75df6ef10c5 100644 --- a/paddle/operators/pad_op.cc +++ b/paddle/operators/pad_op.cc @@ -54,41 +54,44 @@ class PadOpMaker : public framework::OpProtoAndCheckerMaker { "The input of pad op. " "The input should be a k-D tensor(k > 0 and k < 7)"); AddOutput("Out", - "The output of pad op." + "The output of pad op. " "A tensor with the same shape as X."); + AddAttr>( + "paddings", + "(vector) " + "A list to describe the padding rules for each dimension. " + "For 2-D image tensor, paddings=[0, 1, 2, 3] means " + "padding 0 row to top, 1 row to bottom, 2 columns to left " + "and 3 columns to right. Size of paddings should be equal to " + "2 * dimension size of the input tensor."); + AddAttr("pad_value", + "(float, default 0.0) " + "The value to fill the padded areas.") + .SetDefault(0.0f); AddComment(R"DOC( -Pad input into output, as specified by paddings and pad_value. The input should be a k-D tensor(k > 0 and k < 7). As an example: +Pad Operator. + +Pad input into output, as specified by paddings and pad_value. +The input should be a k-D tensor(k > 0 and k < 7). As an example: Given: X = [[1, 2], - [3, 4]] - -and + [3, 4]], -paddings = [0, 1, 1, 2] +paddings = [0, 1, 1, 2], and -pad_value = 0 +pad_value = 0, -then we get +we have: Out = [[0, 1, 2, 0, 0] [0, 3, 4, 0, 0] [0, 0, 0, 0, 0]] + )DOC"); - AddAttr>( - "paddings", - "A list to describes padding rules for each dimension." - " For 2-D image tensor, paddings=[0, 1, 2, 3] means" - " padding 0 row to top, 1 row to bottom, 2 columns to left" - " and 3 columns to right.Size of paddings should be equal to" - " 2 * dimension size of input tensor."); - AddAttr("pad_value", - "(float) default to 0; " - "The value to fill padded areas.") - .SetDefault(0.0f); } }; diff --git a/paddle/operators/pool_op.cc b/paddle/operators/pool_op.cc index 4d75c11bc81303..f58aab73386697 100644 --- a/paddle/operators/pool_op.cc +++ b/paddle/operators/pool_op.cc @@ -73,125 +73,138 @@ Pool2dOpMaker::Pool2dOpMaker(framework::OpProto *proto, AddInput( "X", "(Tensor) The input tensor of pooling operator. " - "The format of input tensor is NCHW. Where N is batch size, C is the " - "number of channels, H and W is the height and width of feature."); + "The format of input tensor is NCHW, where N is batch size, C is the " + "number of channels, H is the height of the feature, " + "and W is the width of the feature."); AddOutput("Out", - "(Tensor) The output tensor of pooling operator." - "The format of output tensor is also NCHW." - "Where N is batch size, C is " - "the number of channels, H and W is the height and " - "width of feature."); + "(Tensor) The output tensor of pooling operator. " + "The format of output tensor is also NCHW, " + "where N is batch size, C is the number of channels, " + "H is the height of the feature, " + "and W is the width of the feature."); AddAttr("poolingType", "(string), pooling type, can be \"max\" for max-pooling " "and \"avg\" for average-pooling.") .InEnum({"max", "avg"}); AddAttr>("ksize", - "(vector ), the pooling window size(height, width) " - "of pooling operator." + "(vector) The pooling window " + "size(height, width) of the pooling operator. " "If globalPooling = true, ksize and paddings will " "be ignored."); // TODO(Chengduo): Add checker. // (Currently, // TypedAttrChecker don't support vector type.) AddAttr("globalPooling", - "(bool default: false), whether to use the global pooling." + "(bool, default false) Whether to use the global pooling. " "If globalPooling = true, ksize and paddings will be ignored.") .SetDefault(false); - AddAttr>( - "strides", - "(vector, default:{1, 1}), strides(height, width) of pooling operator.") + AddAttr>("strides", + "(vector, default {1, 1}), strides(height, " + "width) of pooling operator.") .SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector defalut:{0,0}), paddings(height, width) of pooling operator." + "(vector, defalut {0,0}), paddings(height, width) of pooling " + "operator." "If globalPooling = true, paddings and ksize will be ignored.") .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) AddComment(R"DOC( +Pool2d Operator. + The pooling2d operation calculates the output based on the input, poolingType and ksize, strides, paddings parameters. -Input(X) and output(Out) are in NCHW format. Where N is batch size, C is the -number of channels, H and W is the height and width of feature. +Input(X) and output(Out) are in NCHW format, where N is batch size, C is the +number of channels, H is the height of the feature, and W is the width of the feature. Parameters(ksize, strides, paddings) are two elements. These two elements represent height and width, respectively. The input(X) size and output(Out) size may be different. Example: Input: - X shape: (N, C, H_in, W_in) + X shape: $(N, C, H_{in}, W_{in})$ Output: - Out shape: (N, C, H_out, W_out) - where - H_out = (H_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; - W_out = (W_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; + Out shape: $(N, C, H_{out}, W_{out})$ + where + $$ + H_{out} = (H_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\ + W_{out} = (W_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 + $$ + )DOC"); } Pool3dOpMaker::Pool3dOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput( - "X", - "(Tensor) The input tensor of pooling operator. " - "The format of input tensor is NCDHW. Where N is batch size, C is " - "the number of channels, D, H and W is the depth, height and width of " - "feature."); + AddInput("X", + "(Tensor) The input tensor of pooling operator. " + "The format of input tensor is NCDHW, where N is batch size, C is " + "the number of channels, and D, H and W is the depth, height and " + "width of " + "the feature, respectively."); AddOutput("Out", "(Tensor) The output tensor of pooling operator." - "The format of output tensor is also NCDHW." - "Where N is batch size, C is " - "the number of channels, D, H and W is the depth, height and " - "width of feature."); + "The format of output tensor is also NCDHW, " + "where N is batch size, C is " + "the number of channels, and D, H and W is the depth, height and " + "width of the feature, respectively."); AddAttr("poolingType", - "(string), pooling type, can be \"max\" for max-pooling " + "(string) Pooling type, can be \"max\" for max-pooling " "and \"avg\" for average-pooling.") .InEnum({"max", "avg"}); - AddAttr>("ksize", - "(vector ), the pooling window size(depth, height, " - "width) of pooling " - "operator." - "If globalPooling = true, ksize and paddings wille " - "be ignored."); // TODO(Chengduo): Add checker. - // (Currently, + AddAttr>( + "ksize", + "(vector) The pooling window size(depth, height, " + "width) of pooling operator. " + "If globalPooling = true, ksize and paddings will " + "be ignored."); // TODO(Chengduo): Add checker. + // (Currently, // TypedAttrChecker don't support vector type.) AddAttr("globalPooling", - "(bool default: false), whether to use the global pooling." + "(bool, default false) Whether to use the global pooling. " "If globalPooling = true, ksize and paddings wille be ignored.") .SetDefault(false); - AddAttr>("strides", - "(vector, default:{1,1,1}), strides(depth, height, " - "width) of pooling operator.") + AddAttr>( + "strides", + "(vector, default {1,1,1}) Strides(depth, height, " + "width) of the pooling operator.") .SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector defalut:{0,0,0}), paddings(depth, height, " - "width) of pooling operator." - "If globalPooling = true, ksize and paddings wille be ignored.") + "(vector, defalut {0,0,0}), paddings(depth, height, " + "width) of pooling operator. " + "If globalPooling = true, ksize and paddings will be ignored.") .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) AddComment(R"DOC( +Pool3d Operator. + The pooling3d operation calculates the output based on -the input, poolingType and ksize, strides, paddings parameters. -Input(X) and output(Out) are in NCDHW format. Where N is batch -size, C is the number of channels, D, H and W is the depth, height and -width of feature. Parameters(ksize, strides, paddings) are three elements. -These three elements represent depth, height and width, respectively. -The input(X) size and output(Out) size may be different. +the input, poolingType, ksize, strides, and paddings parameters. +Input(X) and output(Out) are in NCDHW format, where N is batch +size, C is the number of channels, and D, H and W are the depth, height and +width of the feature, respectively. Parameters(ksize, strides, paddings) +are three elements. These three elements represent depth, height and +width, respectively. The input(X) size and output(Out) size may be different. Example: Input: - X shape: (N, C, D_in, H_in, W_in) + X shape: $(N, C, D_{in}, H_{in}, W_{in})$ Output: - Out shape: (N, C, D_out, H_out, W_out) + Out shape: $(N, C, D_{out}, H_{out}, W_{out})$ where - D_out = (D_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; - H_out = (H_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; - W_out = (W_in - ksize[2] + 2 * paddings[2]) / strides[2] + 1; + $$ + D_{out} = (D_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\ + H_{out} = (H_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 \\ + W_{out} = (W_{in} - ksize[2] + 2 * paddings[2]) / strides[2] + 1 + $$ + )DOC"); } } // namespace operators diff --git a/paddle/operators/pool_with_index_op.cc b/paddle/operators/pool_with_index_op.cc index 95e896e7cc33b1..a31b3fcb7083ba 100644 --- a/paddle/operators/pool_with_index_op.cc +++ b/paddle/operators/pool_with_index_op.cc @@ -89,64 +89,73 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput( "X", - "(Tensor), the input tensor of pooling operator. " - "The format of input tensor is NCHW. Where N is batch size, C is the " - "number of channels, H and W is the height and width of image."); + "(Tensor) The input tensor of pooling operator. " + "The format of input tensor is NCHW, where N is batch size, C is the " + "number of channels, H is the height of the image, " + "and W is the width of the image."); AddOutput("Out", - "(Tensor), the output tensor of pooling operator." - "The format of output tensor is also NCHW." - "Where N is batch size, C is " - "the number of channels, H and W is the height and " - "width of image."); + "(Tensor) The output tensor of pooling operator. " + "The format of output tensor is also NCHW, " + "where N is batch size, C is " + "the number of channels, H is the height of the image " + "and W is the width of the image."); AddOutput("Mask", - "(Tensor), the Mask tensor of pooling operator." - "The format of output tensor is also NCHW." - "Where N is batch size, C is the number of channels, H and W " - "is the height and width of image." - "The value in it is the index in current feature map"); + "(Tensor) The Mask tensor of pooling operator." + "The format of output tensor is also NCHW, " + "where N is batch size, C is the number of channels, " + "H is the height of the image, " + "and W is the width of the image. " + "It represents the index in the current feature map."); AddAttr>("ksize", - "(vector ), the pooling window size(height, " - "width) of pooling operator." + "(vector) The pooling window size(height, " + "width) of pooling operator. " "If globalPooling = true, ksize and paddings " "will be ignored."); // TODO(Chengduo): Add // checker. (Currently, // TypedAttrChecker don't support vector type.) AddAttr( "globalPooling", - "(bool default: false), whether to use the global pooling." + "(bool, default false) Whether to use the global pooling. " "If globalPooling = true, ksize and paddings will be ignored.") .SetDefault(false); - AddAttr>( - "strides", - "(vector, default:{1, 1}), strides(height, width) of pooling operator.") + AddAttr>("strides", + "(vector, default {1, 1}), strides(height, " + "width) of pooling operator.") .SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector defalut:{0, 0}), paddings(height, width) of pooling operator." + "(vector, defalut {0, 0}), paddings(height, width) of pooling " + "operator. " "If globalPooling = true, paddings and will be ignored.") .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) AddComment(R"DOC( +MaxPool2d Operator. + The maxPooling2d with index operation calculates the output and the mask -based on the input and ksize, strides, paddings parameters. Input(X) and -output(Out, Mask) are in NCHW format. Where N is batch size, C is the -number of channels, H and W is the height and width of feature. +based on the input, ksize, strides, and paddings parameters. Input(X) and +output(Out, Mask) are in NCHW format, where N is batch size, C is the +number of channels, H is the height of the feature, +and W is the width of the feature. Parameters(ksize, strides, paddings) are two elements. These two elements represent height and width, respectively. The input(X) size and output(Out, Mask) size may be different. Example: Input: - X shape: (N, C, H_in, W_in) + X shape: $(N, C, H_{in}, W_{in})$ Output: - Out shape: (N, C, H_out, W_out) - Mask shape: (N, C, H_out, W_out) + Out shape: $(N, C, H_{out}, W_{out})$ + Mask shape: $(N, C, H_{out}, W_{out})$ where - H_out = (H_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; - W_out = (W_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; + $$ + H_{out} = (H_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\ + W_{out} = (W_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 + $$ + )DOC"); } }; @@ -156,70 +165,76 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { MaxPool3dWithIndexOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput( - "X", - "(Tensor), the input tensor of pooling operator. " - "The format of input tensor is NCDHW. Where N is batch size, C is " - "the number of channels, D, H and W is the depth, height and width of " - "image."); + AddInput("X", + "(Tensor) The input tensor of pooling operator. " + "The format of input tensor is NCDHW, where N is batch size, C is " + "the number of channels, and D, H and W are the depth, height and " + "width of " + "the image, respectively"); AddOutput("Out", - "(Tensor), the output tensor of pooling operator." - "The format of output tensor is also NCDHW." - "Where N is batch size, C is " - "the number of channels, D, H and W is the depth, height and " - "width of image."); + "(Tensor) The output tensor of pooling operator. " + "The format of output tensor is also NCDHW, " + "where N is the batch size, C is the number of channels, " + "and D, H and W are the depth, height and " + "width of the image, respectively."); AddOutput("Mask", - "(Tensor), the Mask tensor of pooling operator." - "The format of output tensor is also NCDHW." - "Where N is batch size, C is the number of channels, D, H and W " - "is the depth, height and width of image." - "The value in it is the index in current feature map"); + "(Tensor) The Mask tensor of pooling operator. " + "The format of output tensor is also NCDHW, " + "where N is the batch size, C is the number of channels, and " + "D, H and W are the depth, height and width " + "of the image, respectively. " + "It represents the index in the current feature map."); AddAttr>("ksize", - "(vector), the pooling window size(depth, " - "height, width) of pooling " - "operator." + "(vector) The pooling window size(depth, " + "height, width) of pooling operator. " "If globalPooling = true, ksize and paddings " "will be ignored."); // TODO(Chengduo): Add // checker. (Currently, // TypedAttrChecker don't support vector type.) AddAttr( "globalPooling", - "(bool default: false), whether to use the global pooling." + "(bool, default false) Whether to use the global pooling. " "If globalPooling = true, ksize and paddings will be ignored.") .SetDefault(false); AddAttr>("strides", - "(vector, default:{1,1,1}), strides(depth, " + "(vector, default {1,1,1}), strides(depth, " "height, width) of pooling operator.") .SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector defalut:{0,0,0}), paddings(depth, " - "height, width) of pooling operator." + "(vector, defalut {0,0,0}), paddings(depth, " + "height, width) of pooling operator. " "If globalPooling = true, paddings and ksize will be ignored.") .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) AddComment(R"DOC( +MaxPool3d Operator. + The maxpooling3d with index operation calculates the output and the mask based on the input and ksize, strides, paddings parameters. -Input(X) and output(Out, Mask) are in NCDHW format. Where N is batch -size, C is the number of channels, D, H and W is the depth, height and -width of feature. Parameters(ksize, strides, paddings) are three elements. +Input(X) and output(Out, Mask) are in NCDHW format, where N is batch +size, C is the number of channels, and D, H and W are the depth, height and +width of the feature, respectively. +Parameters(ksize, strides, paddings) are three elements. These three elements represent depth, height and width, respectively. The input(X) size and output(Out, Mask) size may be different. Example: Input: - X shape: (N, C, D_in, H_in, W_in) + X shape: $(N, C, D_{in}, H_{in}, W_{in})$ Output: - Out shape: (N, C, D_out, H_out, W_out) - Mask shape: (N, C, D_out, H_out, W_out) + Out shape: $(N, C, D_{out}, H_{out}, W_{out})$ + Mask shape: $(N, C, D_{out}, H_{out}, W_{out})$ where - D_out = (D_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; - H_out = (H_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; - W_out = (W_in - ksize[2] + 2 * paddings[2]) / strides[2] + 1; + $$ + D_{out} = (D_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\ + H_{out} = (H_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 \\ + W_{out} = (W_{in} - ksize[2] + 2 * paddings[2]) / strides[2] + 1 + $$ + )DOC"); } }; diff --git a/paddle/operators/precision_recall_op.cc b/paddle/operators/precision_recall_op.cc index 39da1e0bf89ce3..641f7135ded159 100644 --- a/paddle/operators/precision_recall_op.cc +++ b/paddle/operators/precision_recall_op.cc @@ -92,76 +92,78 @@ class PrecisionRecallOpMaker : public framework::OpProtoAndCheckerMaker { framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("MaxProbs", - "(Tensor, default Tensor), a 2-D tensor with shape N x 1, " + "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " "where N is the batch size. Each row contains the max probability " "of an instance which computed by the previous top_k (k=1) " "operator."); AddInput("Indices", - "(Tensor, default Tensor), a 2-D tensor with shape N x 1, " + "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " "where N is the batch size. Each row contains the corresponding " "index which computed by the previous top_k (k=1) operator."); AddInput("Labels", - "(Tensor, default Tensor), a 2-D tensor with shape N x 1, " + "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " "where N is the batch size. Each element is a label and the " "value should be in [0, class_number - 1]."); AddInput("Weights", - "(Tensor, default Tensor), a 2-D tensor with shape N x 1, " + "(Tensor, default Tensor) A 2-D tensor with shape N x 1, " "where N is the batch size. This input is optional. If provided, " "weight of instance would be considered when computing metrics.") .AsDispensable(); AddInput("StatesInfo", - "(Tensor, default Tensor), a 2-D tensor with shape D x 4, " + "(Tensor, default Tensor) A 2-D tensor with shape D x 4, " "where D is the number of classes. This input is optional. If " "provided, current state will be accumulated to this state and " - "the accumulation state will be as the output state.") + "the accumulation state will be the output state.") .AsDispensable(); AddOutput("BatchMetrics", - "(Tensor, default Tensor), a 1-D tensor with shape {6}." - "This output tensor contains metrics for current batch data." + "(Tensor, default Tensor) A 1-D tensor with shape {6}. " + "This output tensor contains metrics for current batch data. " "The layout is [macro average precision, macro average recall, " "macro f1 score, micro average precision, micro average recall, " - "micro f1 score]"); + "micro f1 score]."); AddOutput("AccumMetrics", - "(Tensor, default Tensor), a 1-D tensor with shape {6}." - "This output tensor contains metrics for accumulated data." + "(Tensor, default Tensor) A 1-D tensor with shape {6}. " + "This output tensor contains metrics for accumulated data. " "The layout is [macro average precision, macro average recall, " "macro f1 score, micro average precision, micro average recall, " - "micro f1 score]"); + "micro f1 score]."); AddOutput("AccumStatesInfo", - "(Tensor, default Tensor), a 2-D tensor with shape D x 4, " + "(Tensor, default Tensor) A 2-D tensor with shape D x 4, " "where D is equal to class number. This output tensor contains " "accumulated state variables used to compute metrics. The layout " "for each class is [true positives, false positives, " "true negatives, false negatives]."); - AddAttr("class_number", "Number of classes to be evaluated."); + AddAttr("class_number", "(int) Number of classes to be evaluated."); AddComment(R"DOC( -When given 'Input(Indices)' and 'Input(Labels)', this operator can be used +Precision Recall Operator. + +When given Input(Indices) and Input(Labels), this operator can be used to compute various metrics including: - - macro average precision - - macro average recall - - macro f1 score - - micro average precision - - micro average recall - - micro f1 score +1. macro average precision +2. macro average recall +3. macro f1 score +4. micro average precision +5. micro average recall +6. micro f1 score To compute the above metrics, we need to do statistics for true positives, -false positives and false negatives. Here count of true negatives is not +false positives and false negatives. Here the count of true negatives is not necessary, but counting it may provide potential usage and the cost is -trivial, so the operator also provides count of true negatives. +trivial, so the operator also provides the count of true negatives. We define state as a 2-D tensor with shape [class_number, 4]. Each row of a state contains statistic variables for corresponding class. Layout of each row is: TP(true positives), FP(false positives), TN(true negatives), -FN(false negatives). If 'Input(Weights)' provided, TP, FP, TN, FN will be -calculated by given weight instead of instance count. +FN(false negatives). If Input(Weights) is provided, TP, FP, TN, FN will be +calculated by given weight instead of the instance count. This operator also supports metrics computing for cross-batch situation. To -achieve this, 'Input(StatesInfo)' should be provided. State of current batch -data will be accumulated to 'Input(StatesInfo)' and 'Output(AccumStatesInfo)' +achieve this, Input(StatesInfo) should be provided. State of current batch +data will be accumulated to Input(StatesInfo) and Output(AccumStatesInfo) is the accumulation state. -'Output(BatchMetrics)' is metrics of current batch data while -'Output(AccumStatesInfo)' is metrics of accumulation data. +Output(BatchMetrics) is metrics of current batch data while +Output(AccumStatesInfo) is metrics of accumulation data. )DOC"); } diff --git a/paddle/operators/prelu_op.cc b/paddle/operators/prelu_op.cc index eef2e34eaacf59..055c471b4561e5 100644 --- a/paddle/operators/prelu_op.cc +++ b/paddle/operators/prelu_op.cc @@ -41,17 +41,24 @@ class PReluOpMaker : public framework::OpProtoAndCheckerMaker { PReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input tensor of prelu operator."); - AddInput("Alpha", "The alpha weight of PRelu operator."); - AddOutput("Out", "The output tensor of PRelu operator."); - AddComment(R"DOC(PRelu operator + AddInput("Alpha", "The alpha weight of prelu operator."); + AddOutput("Out", "The output tensor of prelu operator."); + AddComment(R"DOC( +PRelu Operator. The equation is: - f(x) = alpha * x , for x < 0 - f(x) = x , for x >= 0 +$$ +f(x) = +\begin{cases} +\alpha * x, \quad \text{if} \ x < 0 \\ +x, \qquad \text{if} \ x >= 0 +\end{cases} +$$ The input `X` can carry the LoD (Level of Details) information, -or not. And the output shares the LoD with input `X`. +or not. And the output shares the LoD information with input `X`. + )DOC"); } }; diff --git a/paddle/operators/proximal_adagrad_op.cc b/paddle/operators/proximal_adagrad_op.cc index 39fbf800031cd5..36e460103ab46b 100644 --- a/paddle/operators/proximal_adagrad_op.cc +++ b/paddle/operators/proximal_adagrad_op.cc @@ -83,22 +83,26 @@ class ProximalAdagradOpMaker : public framework::OpProtoAndCheckerMaker { "L1 regularization strength.") .SetDefault(0.0f); AddAttr("l2", - "(float, default 0.0)" + "(float, default 0.0) " "L2 regularization strength.") .SetDefault(0.0f); AddComment(R"DOC( +Proximal Adagrad Optimizer. -Optimizer that implements the proximal adagrad algorithm. +Optimizer that implements the proximal adagrad algorithm: -moment = moment + grad * grad -prox_param = param - learning_rate * grad * (1 / sqrt(moment)) -param = sign(prox_param) / (1 + learning_rate * l2) * - max { |prox_param| - learning_rate * l1 , 0 } +$$ +moment = moment + grad * grad \\ +prox\_param = param - learning\_rate * grad * (1 / \sqrt{moment}) \\ +param = sign(prox\_param) / (1 + learning\_rate * l2) * + \max(|prox\_param| - learning\_rate * l1 , 0) +$$ The paper that proposed Proximal GD: (http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf) Here, we use the adagrad learning rate as specified here: (http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) + )DOC"); } }; diff --git a/paddle/operators/proximal_gd_op.cc b/paddle/operators/proximal_gd_op.cc index e4b014b9f5866e..5693d0ec9ebf4c 100644 --- a/paddle/operators/proximal_gd_op.cc +++ b/paddle/operators/proximal_gd_op.cc @@ -67,19 +67,23 @@ class ProximalGDOpMaker : public framework::OpProtoAndCheckerMaker { "L1 regularization strength.") .SetDefault(0.0f); AddAttr("l2", - "(float, default 0.0)" + "(float, default 0.0) " "L2 regularization strength.") .SetDefault(0.0f); AddComment(R"DOC( +ProximalGD Operator. -Optimizer that implements the proximal gradient descent algorithm. +Optimizer that implements the proximal gradient descent algorithm: -prox_param = param - learning_rate * grad -param = sign(prox_param) / (1 + learning_rate * l2) * - max { |prox_param| - learning_rate * l1 , 0 } +$$ +prox\_param = param - learning\_rate * grad \\ +param = sign(prox\_param) / (1 + learning\_rate * l2) * + \max(|prox\_param| - learning\_rate * l1, 0) +$$ The paper that proposed Proximal Gradient Descent: (http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf) + )DOC"); } };