diff --git a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py
new file mode 100644
index 0000000000000..9a97f57aaae5f
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py
@@ -0,0 +1,181 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import paddle
+import paddle.fluid as fluid
+import numpy as np
+import unittest
+
+
+def smooth_l1_loss_forward(val, delta):
+    abs_val = abs(val)
+    if abs_val <= delta:
+        return 0.5 * val * val
+    else:
+        return delta * (abs_val - 0.5 * delta)
+
+
+def smooth_l1_loss_np(input, label, reduction='mean', delta=1.0):
+    diff = input - label
+    out = np.vectorize(smooth_l1_loss_forward)(diff, delta)
+    if reduction == 'sum':
+        return np.sum(out)
+    elif reduction == 'mean':
+        return np.mean(out)
+    elif reduction == 'none':
+        return out
+
+
+class SmoothL1Loss(unittest.TestCase):
+    def setUp(self):
+        np.random.seed(123)
+
+    def test_smooth_l1_loss_mean(self):
+        input_np = np.random.random([100, 200]).astype(np.float32)
+        label_np = np.random.random([100, 200]).astype(np.float32)
+        prog = fluid.Program()
+        startup_prog = fluid.Program()
+        place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
+        ) else fluid.CPUPlace()
+        with fluid.program_guard(prog, startup_prog):
+            input = fluid.data(name='input', shape=[100, 200], dtype='float32')
+            label = fluid.data(name='label', shape=[100, 200], dtype='float32')
+            smooth_l1_loss = paddle.nn.loss.SmoothL1Loss()
+            ret = smooth_l1_loss(input, label)
+
+            exe = fluid.Executor(place)
+            static_ret = exe.run(prog,
+                                 feed={
+                                     'input': input_np,
+                                     'label': label_np,
+                                 },
+                                 fetch_list=[ret])
+            self.assertIsNotNone(static_ret)
+        with fluid.dygraph.guard():
+            smooth_l1_loss = paddle.nn.loss.SmoothL1Loss()
+            dy_ret = smooth_l1_loss(
+                fluid.dygraph.to_variable(input_np),
+                fluid.dygraph.to_variable(label_np))
+            dy_ret_value = dy_ret.numpy()
+            self.assertIsNotNone(dy_ret_value)
+        expected = smooth_l1_loss_np(input_np, label_np, reduction='mean')
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
+        self.assertTrue(np.allclose(static_ret, expected))
+        self.assertTrue(np.allclose(dy_ret_value, expected))
+
+    def test_smooth_l1_loss_sum(self):
+        input_np = np.random.random([100, 200]).astype(np.float32)
+        label_np = np.random.random([100, 200]).astype(np.float32)
+        prog = fluid.Program()
+        startup_prog = fluid.Program()
+        place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
+        ) else fluid.CPUPlace()
+        with fluid.program_guard(prog, startup_prog):
+            input = fluid.data(name='input', shape=[100, 200], dtype='float32')
+            label = fluid.data(name='label', shape=[100, 200], dtype='float32')
+            smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(reduction='sum')
+            ret = smooth_l1_loss(input, label)
+
+            exe = fluid.Executor(place)
+            static_ret = exe.run(prog,
+                                 feed={
+                                     'input': input_np,
+                                     'label': label_np,
+                                 },
+                                 fetch_list=[ret])
+            self.assertIsNotNone(static_ret)
+        with fluid.dygraph.guard():
+            smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(reduction='sum')
+            dy_ret = smooth_l1_loss(
+                fluid.dygraph.to_variable(input_np),
+                fluid.dygraph.to_variable(label_np))
+            dy_ret_value = dy_ret.numpy()
+            self.assertIsNotNone(dy_ret_value)
+        expected = smooth_l1_loss_np(input_np, label_np, reduction='sum')
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
+        self.assertTrue(np.allclose(static_ret, expected))
+        self.assertTrue(np.allclose(dy_ret_value, expected))
+
+    def test_smooth_l1_loss_none(self):
+        input_np = np.random.random([100, 200]).astype(np.float32)
+        label_np = np.random.random([100, 200]).astype(np.float32)
+        prog = fluid.Program()
+        startup_prog = fluid.Program()
+        place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
+        ) else fluid.CPUPlace()
+        with fluid.program_guard(prog, startup_prog):
+            input = fluid.data(name='input', shape=[100, 200], dtype='float32')
+            label = fluid.data(name='label', shape=[100, 200], dtype='float32')
+            smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(reduction='none')
+            ret = smooth_l1_loss(input, label)
+
+            exe = fluid.Executor(place)
+            static_ret = exe.run(prog,
+                                 feed={
+                                     'input': input_np,
+                                     'label': label_np,
+                                 },
+                                 fetch_list=[ret])
+            self.assertIsNotNone(static_ret)
+        with fluid.dygraph.guard():
+            smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(reduction='none')
+            dy_ret = smooth_l1_loss(
+                fluid.dygraph.to_variable(input_np),
+                fluid.dygraph.to_variable(label_np))
+            dy_ret_value = dy_ret.numpy()
+            self.assertIsNotNone(dy_ret_value)
+        expected = smooth_l1_loss_np(input_np, label_np, reduction='none')
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
+        self.assertTrue(np.allclose(static_ret, expected))
+        self.assertTrue(np.allclose(dy_ret_value, expected))
+
+    def test_smooth_l1_loss_delta(self):
+        input_np = np.random.random([100, 200]).astype(np.float32)
+        label_np = np.random.random([100, 200]).astype(np.float32)
+        delta = np.random.rand()
+        prog = fluid.Program()
+        startup_prog = fluid.Program()
+        place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
+        ) else fluid.CPUPlace()
+        with fluid.program_guard(prog, startup_prog):
+            input = fluid.data(name='input', shape=[100, 200], dtype='float32')
+            label = fluid.data(name='label', shape=[100, 200], dtype='float32')
+            smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(delta=delta)
+            ret = smooth_l1_loss(input, label)
+
+            exe = fluid.Executor(place)
+            static_ret = exe.run(prog,
+                                 feed={
+                                     'input': input_np,
+                                     'label': label_np,
+                                 },
+                                 fetch_list=[ret])
+            self.assertIsNotNone(static_ret)
+        with fluid.dygraph.guard():
+            smooth_l1_loss = paddle.nn.loss.SmoothL1Loss(delta=delta)
+            dy_ret = smooth_l1_loss(
+                fluid.dygraph.to_variable(input_np),
+                fluid.dygraph.to_variable(label_np))
+            dy_ret_value = dy_ret.numpy()
+            self.assertIsNotNone(dy_ret_value)
+        expected = smooth_l1_loss_np(input_np, label_np, delta=delta)
+        self.assertTrue(np.allclose(static_ret, dy_ret_value))
+        self.assertTrue(np.allclose(static_ret, expected))
+        self.assertTrue(np.allclose(dy_ret_value, expected))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
index a52d45521fd1b..748fcf856aab2 100644
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -88,6 +88,7 @@
 from .layer.loss import BCELoss  #DEFINE_ALIAS
 from .layer.loss import KLDivLoss  #DEFINE_ALIAS
 from .layer.loss import MarginRankingLoss  #DEFINE_ALIAS
+from .layer.loss import SmoothL1Loss  #DEFINE_ALIAS
 from .layer.norm import BatchNorm  #DEFINE_ALIAS
 from .layer.norm import GroupNorm  #DEFINE_ALIAS
 from .layer.norm import LayerNorm  #DEFINE_ALIAS
diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py
index bc71b8bdf06d2..89480bf7eed37 100644
--- a/python/paddle/nn/functional/__init__.py
+++ b/python/paddle/nn/functional/__init__.py
@@ -139,6 +139,7 @@
 from .loss import sigmoid_cross_entropy_with_logits  #DEFINE_ALIAS
 from .loss import sigmoid_focal_loss  #DEFINE_ALIAS
 from .loss import smooth_l1  #DEFINE_ALIAS
+from .loss import smooth_l1_loss  #DEFINE_ALIAS
 from .loss import softmax_with_cross_entropy  #DEFINE_ALIAS
 from .loss import square_error_cost  #DEFINE_ALIAS
 from .loss import ssd_loss  #DEFINE_ALIAS
diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
index f8bc0b1b54e96..44dcd5458b7b2 100644
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -65,6 +65,7 @@
     'sigmoid_cross_entropy_with_logits',
     'sigmoid_focal_loss',
     'smooth_l1',
+    'smooth_l1_loss',
     'softmax_with_cross_entropy',
     'square_error_cost',
     'ssd_loss',
@@ -72,6 +73,84 @@
 ]
 
 
+def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None):
+    """
+    This operator calculates smooth_l1_loss. Creates a criterion that uses a squared
+    term if the absolute element-wise error falls below 1 and an L1 term otherwise.
+    In some cases it can prevent exploding gradients and it is more robust and less
+    sensitivity to outliers. Also known as the Huber loss:
+
+    .. math::
+
+         loss(x,y)=\\frac{1}{n}\\sum_{i}z_i
+
+
+    where z_i is given by:
+
+    .. math::
+
+         \\mathop{z_i}=\\left\\{\\begin{array}{rcl}
+        0.5(x_i - y_i)^2 & & {if |x_i - y_i| < delta} \\\\
+        delta * |x_i - y_i| - 0.5 * delta^2 & & {otherwise}
+        \\end{array} \\right.
+
+    Parameters:
+        input (Tensor): Input tensor, the data type is float32 or float64. Shape is
+            (N, C), where C is number of classes, and if shape is more than 2D, this
+            is (N, C, D1, D2,..., Dk), k >= 1.
+        label (Tensor): Label tensor, the data type is float32 or float64. The shape of label
+            is the same as the shape of input.
+        reduction (str, optional): Indicate how to average the loss by batch_size,
+            the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
+            If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
+            If :attr:`reduction` is ``'sum'``, the reduced sum loss is returned.
+            If :attr:`reduction` is ``'none'``, the unreduced loss is returned.
+            Default is ``'mean'``.
+        delta (float, optional): Specifies the hyperparameter delta to be used. 
+            The value determines how large the errors need to be to use L1. Errors
+            smaller than delta are minimized with L2. Parameter is ignored for
+            negative/zero values. Default = 1.0
+        name (str, optional): Name for the operation (optional, default is
+            None). For more information, please refer to :ref:`api_guide_Name`.
+
+    Returns:
+        The tensor variable storing the smooth_l1_loss of input and label.
+
+    Return type: Tensor.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            import numpy as np
+
+            paddle.disable_static()
+            input_data = np.random.rand(3,3).astype("float32")
+            label_data = np.random.rand(3,3).astype("float32")
+            input = paddle.to_tensor(input_data)
+            label = paddle.to_tensor(label_data)
+            output = paddle.nn.functioanl.smooth_l1_loss(input, label)
+            print(output.numpy())
+    """
+    fluid.data_feeder.check_variable_and_dtype(
+        input, 'input', ['float32', 'float64'], 'smooth_l1_loss')
+    fluid.data_feeder.check_variable_and_dtype(
+        label, 'label', ['float32', 'float64'], 'smooth_l1_loss')
+
+    out = huber_loss(input=input, label=label, delta=delta)
+
+    if reduction not in ['sum', 'mean', 'none']:
+        raise ValueError(
+            "The value of 'reduction' in smooth_l1_loss should be 'sum', 'mean' or"
+            " 'none', but received %s, which is not allowed." % reduction)
+    if reduction == 'none':
+        return out
+    elif reduction == 'mean':
+        return fluid.layers.reduce_mean(out)
+    elif reduction == 'sum':
+        return fluid.layers.reduce_sum(out)
+
+
 def margin_ranking_loss(input,
                         other,
                         label,
diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py
index 9fb8ea78a16ab..10b1379c9f3ea 100644
--- a/python/paddle/nn/layer/__init__.py
+++ b/python/paddle/nn/layer/__init__.py
@@ -64,6 +64,7 @@
 from .loss import BCELoss  #DEFINE_ALIAS
 from .loss import KLDivLoss  #DEFINE_ALIAS
 from .loss import MarginRankingLoss  #DEFINE_ALIAS
+from .loss import SmoothL1Loss  #DEFINE_ALIAS
 from .norm import BatchNorm  #DEFINE_ALIAS
 from .norm import GroupNorm  #DEFINE_ALIAS
 from .norm import LayerNorm  #DEFINE_ALIAS
diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py
index 5067264ee792d..8503cdce96726 100644
--- a/python/paddle/nn/layer/loss.py
+++ b/python/paddle/nn/layer/loss.py
@@ -27,7 +27,8 @@
     'NLLLoss',
     'BCELoss',
     'KLDivLoss',
-    'MarginRankingLoss'
+    'MarginRankingLoss',
+    'SmoothL1Loss',
 ]
 
 
@@ -700,7 +701,7 @@ class MarginRankingLoss(fluid.dygraph.Layer):
     def __init__(self, margin=0.0, reduction='mean', name=None):
         if reduction not in ['sum', 'mean', 'none']:
             raise ValueError(
-                "The value of 'reduction' in L1Loss should be 'sum', 'mean' or 'none', but "
+                "The value of 'reduction' in MarginRankingLoss should be 'sum', 'mean' or 'none', but "
                 "received %s, which is not allowed." % reduction)
         super(MarginRankingLoss, self).__init__()
         self.margin = margin
@@ -711,3 +712,79 @@ def forward(self, input, other, label):
         out = paddle.nn.functional.margin_ranking_loss(
             input, other, label, self.margin, self.reduction, self.name)
         return out
+
+
+class SmoothL1Loss(fluid.dygraph.Layer):
+    """
+    This operator calculates smooth_l1_loss. Creates a criterion that uses a squared
+    term if the absolute element-wise error falls below 1 and an L1 term otherwise.
+    In some cases it can prevent exploding gradients and it is more robust and less
+    sensitivity to outliers. Also known as the Huber loss:
+
+    .. math::
+
+         loss(x,y)=\\frac{1}{n}\\sum_{i}z_i
+
+    where z_i is given by:
+
+    .. math::
+
+         \\mathop{z_i}=\\left\\{\\begin{array}{rcl}
+        0.5(x_i - y_i)^2 & & {if |x_i - y_i| < delta} \\\\
+        delta * |x_i - y_i| - 0.5 * delta^2 & & {otherwise}
+        \\end{array} \\right.
+
+    Parameters:
+        reduction (str, optional): Indicate how to average the loss by batch_size,
+            the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
+            If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
+            If :attr:`reduction` is ``'sum'``, the reduced sum loss is returned.
+            If :attr:`reduction` is ``'none'``, the unreduced loss is returned.
+            Default is ``'mean'``.
+        delta (float, optional): Specifies the hyperparameter delta to be used. 
+            The value determines how large the errors need to be to use L1. Errors
+            smaller than delta are minimized with L2. Parameter is ignored for
+            negative/zero values. Default = 1.0
+        name (str, optional): Name for the operation (optional, default is
+            None). For more information, please refer to :ref:`api_guide_Name`.
+
+    Call Parameters:
+        input (Tensor): Input tensor, the data type is float32 or float64. Shape is
+            (N, C), where C is number of classes, and if shape is more than 2D, this
+            is (N, C, D1, D2,..., Dk), k >= 1.
+        label (Tensor): Label tensor, the data type is float32 or float64. The shape of label
+            is the same as the shape of input.
+
+    Returns:
+        The tensor variable storing the smooth_l1_loss of input and label.
+
+    Return type: Tensor.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            import numpy as np
+            paddle.disable_static()
+            input_data = np.random.rand(3,3).astype("float32")
+            label_data = np.random.rand(3,3).astype("float32")
+            input = paddle.to_tensor(input_data)
+            label = paddle.to_tensor(label_data)
+            loss = paddle.nn.SmoothL1Loss()
+            output = loss(input, label)
+            print(output.numpy())
+    """
+
+    def __init__(self, reduction='mean', delta=1.0, name=None):
+        super(SmoothL1Loss, self).__init__()
+        self.reduction = reduction
+        self.delta = delta
+        self.name = name
+
+    def forward(self, input, label):
+        return F.smooth_l1_loss(
+            input,
+            label,
+            reduction=self.reduction,
+            delta=self.delta,
+            name=self.name)