From 2ac3c8ee89e10718ea3515496233148e3b8a3039 Mon Sep 17 00:00:00 2001
From: Kavya Srinet <kavyasrinet@baidu.com>
Date: Sat, 4 Nov 2017 11:14:27 -0700
Subject: [PATCH 1/5] Doc fix for smooth L1 loss

---
 paddle/operators/smooth_l1_loss_op.cc | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/paddle/operators/smooth_l1_loss_op.cc b/paddle/operators/smooth_l1_loss_op.cc
index 758481943d463..61dff8d271141 100644
--- a/paddle/operators/smooth_l1_loss_op.cc
+++ b/paddle/operators/smooth_l1_loss_op.cc
@@ -77,14 +77,17 @@ class SmoothL1LossOpMaker : public framework::OpProtoAndCheckerMaker {
                       "A float scalar with default value 3.0.")
         .SetDefault(3.0);
     AddComment(R"DOC(
-Compute smooth l1 loss for input and target. The operator take the 1st
-dimension of input as batch size. For each instance, it will compute
-smooth l1 loss element by element first and sum all losses to one value.
-So the output shape is [batch_size, 1].
+Smooth L1 Loss Operator.
+
+This operator computes the smooth l1 loss for input and target.
+The operator takes the first dimension of input as the batch size.
+For each instance, it computes the smooth l1 loss, element by element first
+and then sums all the losses to one value. So the resulting output shape
+is [batch_size, 1].
 
 The equation is:
-loss = 0.5 * (sigma * (x-y))^2    if abs(x - y) < 1 / sigma^2
-       abs(x - y) - 0.5 / sigma^2 otherwise
+loss = $$0.5 * (\sigma * (x-y))^2$$   if $$|x - y| < 1 /({\sigma}^2)$$
+       $$\frac{|x - y| - 0.5}{{\sigma}^2}$$ otherwise
 
 )DOC");
   }

From 6b1bac8335fdc36ed2061c7d5ba17acc7c7c252c Mon Sep 17 00:00:00 2001
From: Kavya Srinet <kavyasrinet@baidu.com>
Date: Sat, 4 Nov 2017 11:18:12 -0700
Subject: [PATCH 2/5] Adding doc for softmax_op

---
 paddle/operators/softmax_op.cc | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc
index 00fd0b32a9b3c..93f89e33a73c5 100644
--- a/paddle/operators/softmax_op.cc
+++ b/paddle/operators/softmax_op.cc
@@ -44,20 +44,23 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
              "2-D with shape [batch_size, input_feature_dimensions].");
     AddOutput("Y", "The normalized values with the same shape as X.");
     AddComment(R"DOC(
-The input of softmax operator is a 2-D tensor with shape N x K (N is the
+Softmax Operator.
+
+The input of the softmax operator is a 2-D tensor with shape N x K (N is the
 batch_size, K is the dimension of input feature). The output tensor has the
 same shape as the input tensor.
 
 For each row of the input tensor, the softmax operator squashes the
 K-dimensional vector of arbitrary real values to a K-dimensional vector of real
-values in the range [0, 1] that add up to 1. Specifically, it computes the
-exponential of the given dimension and the sum of exponential values of all
-the other dimensions in the K-dimensional vector input. Then the ratio of the
-exponential of the given dimension and the sum of exponential values of all
-the other dimensions is the output of the softmax operator.
+values in the range [0, 1] that add up to 1.
+It computes the exponential of the given dimension and the sum of exponential
+values of all the other dimensions in the K-dimensional vector input.
+Then the ratio of the exponential of the given dimension and the sum of
+exponential values of all the other dimensions is the output of the softmax
+operator.
 
 For each row `i` and each column `j` in input X, we have:
-    Y[i, j] = exp(X[i, j]) / sum_j(exp(X[i, j]))
+    $$Y[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$
 
 )DOC");
   }

From 53c4288442dfbd01def5086f04903e1581c6b881 Mon Sep 17 00:00:00 2001
From: Kavya Srinet <kavyasrinet@baidu.com>
Date: Sat, 4 Nov 2017 11:22:37 -0700
Subject: [PATCH 3/5] Added doc for softmax_with_cross_entropy

---
 .../softmax_with_cross_entropy_op.cc          | 30 ++++++++++---------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc
index 50497da1b70d3..a006e0a595057 100644
--- a/paddle/operators/softmax_with_cross_entropy_op.cc
+++ b/paddle/operators/softmax_with_cross_entropy_op.cc
@@ -51,32 +51,34 @@ class SoftmaxWithCrossEntropyOpMaker
         "the given labels as soft labels.")
         .SetDefault(false);
     AddComment(R"DOC(
-Cross entropy loss with softmax are used as the output layer extensively. This
+Softmax With Cross Entropy Operator.
+
+Cross entropy loss with softmax is used as the output layer extensively. This
 operator computes the softmax normalized values for each row of the input
-tensor, after which cross-entropy loss is then computed. This provides a more
+tensor, after which cross-entropy loss is computed. This provides a more
 numerically stable gradient.
 
-Because this operators performs a softmax on logits internally, it expects
-unscaled logits. Please do not call this op with the output of softmax operator,
-which will produce incorrect results.
+Because this operator performs a softmax on logits internally, it expects
+unscaled logits. This operator should not be used with the output of
+softmax operator since that would produce incorrect results.
 
 When the attribute softLabel is set false, this operators expects mutually
-exclusive hard labels, each sample in a batch is in exactly one class with
-probabilities 1. Each sample in the batch with one and only one label.
+exclusive hard labels, each sample in a batch is in exactly one class with a
+probability of 1.0. Each sample in the batch will have a single label.
 
-Equation:
+The equation is as follows:
 
-1) hard label (one-hot label)
+1) Hard label (one-hot label, so every sample has exactly one class)
 
-Loss_j = \f$ -\text{Logit}_{Label_j} +
+$$Loss_j = \f$ -\text{Logit}_{Label_j} +
 \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right),
-j = 1, ..., K $\f
+j = 1, ..., K $\f$$
 
-2) soft label (a distribution over all classes)
+2) Soft label (each sample can have a distribution over all classes)
 
-Loss_j = \f$ -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i -
+$$Loss_j = \f$ -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i -
 \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right),
-j = 1,...,K $\f
+j = 1,...,K $\f$$
 
 )DOC");
   }

From 41413a67dbe97a0ca8195b18ca25ef673f433ef1 Mon Sep 17 00:00:00 2001
From: Kavya Srinet <kavyasrinet@baidu.com>
Date: Sat, 4 Nov 2017 11:25:09 -0700
Subject: [PATCH 4/5] Adding documentation for transpose_op

---
 paddle/operators/transpose_op.cc | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/paddle/operators/transpose_op.cc b/paddle/operators/transpose_op.cc
index d785e57c83043..94de3d5069017 100644
--- a/paddle/operators/transpose_op.cc
+++ b/paddle/operators/transpose_op.cc
@@ -32,7 +32,7 @@ class TransposeOp : public framework::OperatorWithKernel {
     size_t axis_size = axis.size();
 
     PADDLE_ENFORCE_EQ(x_rank, axis_size,
-                      "the input tensor's rank(%d) "
+                      "The input tensor's rank(%d) "
                       "should be equal to the axis's size(%d)",
                       x_rank, axis_size);
 
@@ -64,12 +64,14 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker {
     AddOutput("Out", "(Tensor)The output tensor");
     AddAttr<std::vector<int>>(
         "axis",
-        "(vector<int>)a list of values, and the size of the list should be "
+        "(vector<int>)A list of values, and the size of the list should be "
         "the same with the input tensor rank, the tensor will "
         "permute the axes according the the values given");
     AddComment(R"DOC(
-The Tensor will be permuted according to the axis values given.
-The op is very much like the numpy.transpose function in python
+Transpose Operator.
+
+The input tensor will be permuted according to the axis values given.
+The op functions similar to how numpy.transpose works in python.
 For example:
  >> input = numpy.arange(6).reshape((2,3))
  >> input
@@ -83,6 +85,7 @@ For example:
 		[2, 5]])
 So, given a input tensor of shape(N, C, H, W) and the axis is {0, 2, 3, 1},
 the output tensor shape will be (N, H, W, C)
+
 )DOC");
   }
 };

From 145cfb434a939ca7a318930a1055dba1ea8f279f Mon Sep 17 00:00:00 2001
From: Kavya Srinet <kavyasrinet@baidu.com>
Date: Sat, 4 Nov 2017 15:52:00 -0700
Subject: [PATCH 5/5] small change to restart TeamCity CI

---
 paddle/operators/smooth_l1_loss_op.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/operators/smooth_l1_loss_op.cc b/paddle/operators/smooth_l1_loss_op.cc
index 61dff8d271141..ebf7b43700a74 100644
--- a/paddle/operators/smooth_l1_loss_op.cc
+++ b/paddle/operators/smooth_l1_loss_op.cc
@@ -81,8 +81,8 @@ Smooth L1 Loss Operator.
 
 This operator computes the smooth l1 loss for input and target.
 The operator takes the first dimension of input as the batch size.
-For each instance, it computes the smooth l1 loss, element by element first
-and then sums all the losses to one value. So the resulting output shape
+For each instance, it computes the smooth l1 loss element by element first
+and then sums all the losses. So the resulting output shape
 is [batch_size, 1].
 
 The equation is: