From bcdab376bd86d0942bc763c64d4d53d4706998fc Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Fri, 13 Oct 2023 06:15:30 +0000
Subject: [PATCH 01/16] support tensor.to and layer.astype

---
 .../base/dygraph/tensor_patch_methods.py      | 79 +++++++++++++++++++
 python/paddle/nn/layer/layers.py              | 10 ++-
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
index b01c7a70e44066..8dd23c2715e3ca 100644
--- a/python/paddle/base/dygraph/tensor_patch_methods.py
+++ b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -512,6 +512,84 @@ def transform(t, device, dtype, blocking):
             warnings.filterwarnings("ignore", category=UserWarning)
             return transform(self, device, dtype, blocking)
 
+    @framework.dygraph_only
+    def to(self, *args, **kwargs):
+        device = None
+        dtype = None
+        blocking = None
+        size_args = len(args)
+        size_kwargs = len(kwargs)
+
+        def get_device_dtype_from_tensor(other):
+            if other is not None:
+                device = str(other.place)[6:-1]
+                dtype = other.dtype
+                return device, dtype
+            else:
+                return None, None
+
+        if size_args + size_kwargs > 3 or size_args + size_kwargs == 0:
+            raise TypeError(
+                "to() received too mant arguments - expected one of:\n* (Union[str,paddle.fluid.libpaddle.Place] device, Union[str, paddle.dtype] dtype, bool blocking)\n* (Union[str, paddle.dtype] dtype, bool blocking)\n* (paddle.Tensor other, bool blocking) "
+            )
+        valid_keys = set(["device", "dtype", "non_blocking", "other"])
+        valid_dtypes = [
+            "bfloat16",
+            "float16",
+            "float32",
+            "float64",
+            "int8",
+            "int16",
+            "int32",
+            "int64",
+            "uint8",
+            "uint16",
+            "complex64",
+            "complex128",
+            "bool",
+        ]
+        invalid_keys = set(kwargs.keys()) - valid_keys
+        if len(invalid_keys) != 0:
+            raise TypeError(
+                "to() got an unexpected keyword argument "
+                + list(invalid_keys)[0]
+            )
+        if size_args > 0:
+            if isinstance(args[0], paddle.Tensor):
+                device, dtype = get_device_dtype_from_tensor(args[0])
+                if size_args == 2:
+                    blocking = args[1]
+                else:
+                    blocking = kwargs.get("blocking", None)
+            elif (
+                isinstance(args[0], paddle.dtype)
+                or isinstance(args[0], str)
+                and args[0].lower() in valid_dtypes
+            ):
+                dtype = args[0]
+                if size_args == 2:
+                    blocking = args[1]
+                else:
+                    blocking = kwargs.get("blocking", None)
+            else:
+                device = args[0]
+                if size_args == 2:
+                    dtype = args[1]
+                elif size_args == 3:
+                    dtype, blocking = args[1], args[2]
+                else:
+                    dtype = kwargs.get("dtype", None)
+                    blocking = kwargs.get("blocking", None)
+        else:
+            device = kwargs.get("device", None)
+            dtype = kwargs.get("dtype", None)
+            blocking = kwargs.get("blocking", None)
+            if device is None and dtype is None:
+                device, dtype = get_device_dtype_from_tensor(
+                    kwargs.get("other", None)
+                )
+        return self._to(device, dtype, blocking)
+
     @property
     def grad(self):
         """
@@ -1020,6 +1098,7 @@ def coalesce(self, name=None):
         ("item", item),
         ("__setitem__", __setitem__),
         ("_to", _to),
+        ("to", to),
         ("values", values),
         ("to_dense", to_dense),
         ("to_sparse_coo", to_sparse_coo),
diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py
index 204023378b5d33..3dc67bdbd3a368 100644
--- a/python/paddle/nn/layer/layers.py
+++ b/python/paddle/nn/layer/layers.py
@@ -919,6 +919,14 @@ def parameters(self, include_sublayers=True):
         ]
         return ret
 
+    def astype(self, dtype=None):
+        self._dtype = dtype
+        for _, param in self.named_parameters(include_sublayers=True):
+            param._to(None, dtype)
+        for _, buffer in self.named_buffers(include_sublayers=True):
+            buffer.to(None, dtype)
+        return self
+
     def children(self):
         """
 
@@ -2139,7 +2147,7 @@ def _transform(self, t, device, dtype, blocking):
         if t.place.is_gpu_place():
             # for gpu, minimum memory allocation unit is 256 bytes.
             size_dtype = core.size_of_dtype(dtype)
-            # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t’ occupied memory space.
+            # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t�? occupied memory space.
             # Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough.
             waiting_alloc_memory = (
                 ((np.prod(t.shape) * size_dtype) / 256 + 1) * 256 * 1.2

From 5b182622628bda511ce07e6fc4cb7c8c1652aecb Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Thu, 19 Oct 2023 09:36:49 +0000
Subject: [PATCH 02/16] add UT and comments

---
 .../base/dygraph/tensor_patch_methods.py      |  44 +++++-
 python/paddle/nn/layer/layers.py              |  99 +++++++++++++-
 test/legacy_test/test_Tensor_to.py            | 128 ++++++++++++++++++
 test/legacy_test/test_layer_astype.py         |  51 +++++++
 4 files changed, 313 insertions(+), 9 deletions(-)
 create mode 100644 test/legacy_test/test_Tensor_to.py
 create mode 100644 test/legacy_test/test_layer_astype.py

diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
index 8dd23c2715e3ca..31ce8e8acc4504 100644
--- a/python/paddle/base/dygraph/tensor_patch_methods.py
+++ b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -514,6 +514,47 @@ def transform(t, device, dtype, blocking):
 
     @framework.dygraph_only
     def to(self, *args, **kwargs):
+        """
+        Performs Tensor dtype and/or device conversion. A torch.dtype and torch.device
+        are inferred from the arguments of self.to(*args, **kwargs).There are three ways
+        to call `to`
+            to(dtype, blocking=True)
+            to(device, dtype=None, blocking=True)
+            to(other, blocking=True)
+
+        Returns:
+            Tensor: self
+
+        Examples:
+            .. code-block:: python
+
+                >>> import paddle
+                >>> tensorx = paddle.to_tensor([1,2,3])
+                >>> print(tensorx)
+                Tensor(shape=[3], dtype=int64, place=Place(gpu:0), stop_gradient=True,
+                    [1, 2, 3])
+
+                >>> tensorx = tensorx.to("cpu")
+                >>> print(tensorx.place)
+                Place(cpu)
+
+                >>> tensorx = tensorx.to("float32")
+                >>> print(tensorx.dtype)
+                paddle.float32
+
+                >>> tensorx = tensorx.to("gpu", "int16")
+                >>> print(tensorx)
+                Tensor(shape=[3], dtype=int16, place=Place(gpu:0), stop_gradient=True,
+                    [1, 2, 3])
+                >>> tensor2 = paddle.to_tensor([4,5,6])
+                >>> tensor2
+                Tensor(shape=[3], dtype=int64, place=Place(gpu:0), stop_gradient=True,
+                    [4, 5, 6])
+                >>> tensor2 = tensor2.to(tensorx)
+                >>> print(tensor2)
+                Tensor(shape=[3], dtype=int16, place=Place(gpu:0), stop_gradient=True,
+                    [4, 5, 6])
+        """
         device = None
         dtype = None
         blocking = None
@@ -530,7 +571,7 @@ def get_device_dtype_from_tensor(other):
 
         if size_args + size_kwargs > 3 or size_args + size_kwargs == 0:
             raise TypeError(
-                "to() received too mant arguments - expected one of:\n* (Union[str,paddle.fluid.libpaddle.Place] device, Union[str, paddle.dtype] dtype, bool blocking)\n* (Union[str, paddle.dtype] dtype, bool blocking)\n* (paddle.Tensor other, bool blocking) "
+                "to() received too mant arguments - expected one of:\n* (Union[str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace(), paddle.XPUPlace(), paddle.CustomPlace()] device, Union[str, paddle.dtype] dtype, bool blocking)\n* (Union[str, paddle.dtype] dtype, bool blocking)\n* (paddle.Tensor other, bool blocking) "
             )
         valid_keys = set(["device", "dtype", "non_blocking", "other"])
         valid_dtypes = [
@@ -543,7 +584,6 @@ def get_device_dtype_from_tensor(other):
             "int32",
             "int64",
             "uint8",
-            "uint16",
             "complex64",
             "complex128",
             "bool",
diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py
index 3dc67bdbd3a368..a786f350f36417 100644
--- a/python/paddle/nn/layer/layers.py
+++ b/python/paddle/nn/layer/layers.py
@@ -920,12 +920,97 @@ def parameters(self, include_sublayers=True):
         return ret
 
     def astype(self, dtype=None):
-        self._dtype = dtype
-        for _, param in self.named_parameters(include_sublayers=True):
-            param._to(None, dtype)
-        for _, buffer in self.named_buffers(include_sublayers=True):
-            buffer.to(None, dtype)
-        return self
+        """
+
+        Casts all parameters and buffers to dtype and then return the Layer.
+
+        Parameters:
+            dtype(str|paddle.dtype): target data type of layer.
+                If set str, it can be "bool",  "float16", "float32", "float64",
+                "int8", "int16", "int32", "int64", "uint8", "complex64", "complex128".
+                Default: None
+
+        Returns:
+            Layer, self
+
+        Examples:
+            .. code-block:: python
+
+                >>> import paddle
+                >>> import paddle.nn as nn
+
+                >>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
+                >>> print(net)
+                Sequential(
+                (0): Linear(in_features=2, out_features=2, dtype=float32)
+                (1): Linear(in_features=2, out_features=2, dtype=float32)
+                )
+                >>> print(net.parameters())
+                [Parameter containing:
+                Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
+                    [[ 0.12119570,  0.71133953],
+                        [-0.46748328,  0.27421260]]), Parameter containing:
+                Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
+                    [0., 0.]), Parameter containing:
+                Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
+                    [[-0.28993332, -0.89435989],
+                        [ 0.12127877, -1.12000990]]), Parameter containing:
+                Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
+                    [0., 0.])]
+
+                >>> net.astype("int8")
+                >>> print(net)
+                Sequential(
+                (0): Linear(in_features=2, out_features=2, dtype=paddle.int8)
+                (1): Linear(in_features=2, out_features=2, dtype=paddle.int8)
+                )
+                >>> print(net.parameters())
+                [Parameter containing:
+                Tensor(shape=[2, 2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
+                    [[0, 0],
+                        [0, 0]]), Parameter containing:
+                Tensor(shape=[2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
+                    [0, 0]), Parameter containing:
+                Tensor(shape=[2, 2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
+                    [[ 0,  0],
+                        [ 0, -1]]), Parameter containing:
+                Tensor(shape=[2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
+                    [0, 0])]
+        """
+        valid_dtypes = [
+            "bfloat16",
+            "float16",
+            "float32",
+            "float64",
+            "int8",
+            "int16",
+            "int32",
+            "int64",
+            "uint8",
+            "complex64",
+            "complex128",
+            "bool",
+        ]
+        if (
+            type(dtype) is paddle.dtype
+            or type(dtype) is str
+            and dtype in valid_dtypes
+        ):
+            if type(dtype) is str:
+                dtype = framework.convert_np_dtype_to_dtype_(dtype)
+            self._dtype = dtype
+            for layer in self.sublayers():
+                layer._dtype = dtype
+            for _, param in self.named_parameters(include_sublayers=True):
+                param._to(None, dtype)
+            for _, buffer in self.named_buffers(include_sublayers=True):
+                buffer.to(None, dtype)
+            return self
+        else:
+            raise ValueError(
+                "dtype value error, must be 'bfloat16', 'float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'complex64', 'complex128', 'bool', or paddle.dtype, but recieve "
+                + str(dtype)
+            )
 
     def children(self):
         """
@@ -2147,7 +2232,7 @@ def _transform(self, t, device, dtype, blocking):
         if t.place.is_gpu_place():
             # for gpu, minimum memory allocation unit is 256 bytes.
             size_dtype = core.size_of_dtype(dtype)
-            # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t�? occupied memory space.
+            # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ��t�� occupied memory space.
             # Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough.
             waiting_alloc_memory = (
                 ((np.prod(t.shape) * size_dtype) / 256 + 1) * 256 * 1.2
diff --git a/test/legacy_test/test_Tensor_to.py b/test/legacy_test/test_Tensor_to.py
new file mode 100644
index 00000000000000..16f5df1369af3d
--- /dev/null
+++ b/test/legacy_test/test_Tensor_to.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import paddle
+from paddle import base
+
+
+class TensorToTest(unittest.TestCase):
+    def test_Tensor_to_dtype(self):
+        tensorx = paddle.to_tensor([1, 2, 3])
+        valid_dtypes = [
+            "bfloat16",
+            "float16",
+            "float32",
+            "float64",
+            "int8",
+            "int16",
+            "int32",
+            "int64",
+            "uint8",
+            "complex64",
+            "complex128",
+            "bool",
+        ]
+        for dtype in valid_dtypes:
+            tensorx = tensorx.to(dtype)
+            typex_str = str(tensorx.dtype)
+            self.assertEqual((typex_str == "paddle." + dtype), True)
+
+    def test_Tensor_to_device(self):
+        tensorx = paddle.to_tensor([1, 2, 3])
+        places = ["cpu"]
+        if base.core.is_compiled_with_cuda():
+            places.append("gpu:0")
+            places.append("gpu")
+
+        for place in places:
+            tensorx = tensorx.to(place)
+            placex_str = str(tensorx.place)
+            if place == "gpu":
+                self.assertEqual((placex_str == "Place(" + place + ":0)"), True)
+            else:
+                self.assertEqual((placex_str == "Place(" + place + ")"), True)
+
+    def test_Tensor_to_device_dtype(self):
+        tensorx = paddle.to_tensor([1, 2, 3])
+        places = ["cpu"]
+        if base.core.is_compiled_with_cuda():
+            places.append("gpu:0")
+            places.append("gpu")
+        valid_dtypes = [
+            "bfloat16",
+            "float16",
+            "float32",
+            "float64",
+            "int8",
+            "int16",
+            "int32",
+            "int64",
+            "uint8",
+            "complex64",
+            "complex128",
+            "bool",
+        ]
+        for dtype in valid_dtypes:
+            for place in places:
+                tensorx = tensorx.to(place, dtype)
+                placex_str = str(tensorx.place)
+                if place == "gpu":
+                    self.assertEqual(
+                        (placex_str == "Place(" + place + ":0)"), True
+                    )
+                else:
+                    self.assertEqual(
+                        (placex_str == "Place(" + place + ")"), True
+                    )
+                typex_str = str(tensorx.dtype)
+                self.assertEqual((typex_str == "paddle." + dtype), True)
+
+    def test_Tensor_to_blocking(self):
+        tensorx = paddle.to_tensor([1, 2, 3])
+        tensorx = tensorx.to("cpu", "int32", False)
+        placex_str = str(tensorx.place)
+        self.assertEqual((placex_str == "Place(cpu)"), True)
+        typex_str = str(tensorx.dtype)
+        self.assertEqual((typex_str == "paddle.int32"), True)
+
+    def test_Tensor_to_other(self):
+        tensor1 = paddle.to_tensor([1, 2, 3], dtype="int8", place="cpu")
+        tensor2 = paddle.to_tensor([1, 2, 3])
+        tensor2 = tensor2.to(tensor1)
+        self.assertEqual((tensor2.dtype == tensor1.dtype), True)
+        self.assertEqual((type(tensor2.place) == type(tensor1.place)), True)
+
+    def test_error(self):
+        tensorx = paddle.to_tensor([1, 2, 3])
+        # device value error
+        try:
+            tensorx = tensorx.to("error_device")
+        except Exception as error:
+            self.assertIsInstance(error, ValueError)
+        # to many augments
+        try:
+            tensorx = tensorx.to("cpu", "int32", False, "test_aug")
+        except Exception as error:
+            self.assertIsInstance(error, TypeError)
+        # invalid key
+        try:
+            tensorx = tensorx.to("cpu", "int32", test_key=False)
+        except Exception as error:
+            self.assertIsInstance(error, TypeError)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/legacy_test/test_layer_astype.py b/test/legacy_test/test_layer_astype.py
new file mode 100644
index 00000000000000..d47a88d05e6194
--- /dev/null
+++ b/test/legacy_test/test_layer_astype.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import paddle
+
+
+class LayerAstypeTest(unittest.TestCase):
+    def test_layer_astype(self):
+        linear1 = paddle.nn.Linear(10, 3)
+        valid_dtypes = [
+            "bfloat16",
+            "float16",
+            "float32",
+            "float64",
+            "int8",
+            "int16",
+            "int32",
+            "int64",
+            "uint8",
+            "complex64",
+            "complex128",
+            "bool",
+        ]
+        for dtype in valid_dtypes:
+            linear1 = linear1.astype(dtype)
+            typex_str = str(linear1._dtype)
+            self.assertEqual((typex_str == "paddle." + dtype), True)
+
+    def test_error(self):
+        linear1 = paddle.nn.Linear(10, 3)
+        try:
+            linear1 = linear1.astype("invalid_type")
+        except Exception as error:
+            self.assertIsInstance(error, ValueError)
+
+
+if __name__ == '__main__':
+    unittest.main()

From da3397f7e9167f367833bb53149e16672f5e6dcd Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Thu, 19 Oct 2023 09:42:43 +0000
Subject: [PATCH 03/16] update

---
 python/paddle/nn/layer/layers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py
index a786f350f36417..a1fc40335b1153 100644
--- a/python/paddle/nn/layer/layers.py
+++ b/python/paddle/nn/layer/layers.py
@@ -2232,7 +2232,7 @@ def _transform(self, t, device, dtype, blocking):
         if t.place.is_gpu_place():
             # for gpu, minimum memory allocation unit is 256 bytes.
             size_dtype = core.size_of_dtype(dtype)
-            # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ��t�� occupied memory space.
+            # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t’ occupied memory space.
             # Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough.
             waiting_alloc_memory = (
                 ((np.prod(t.shape) * size_dtype) / 256 + 1) * 256 * 1.2

From 7d3e3d3ae4bbbb3a8b86e8a9f5ae5c253dde0054 Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Fri, 20 Oct 2023 06:00:26 +0000
Subject: [PATCH 04/16] update com

---
 python/paddle/base/dygraph/tensor_patch_methods.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
index 31ce8e8acc4504..58d8d0dbdd9e83 100644
--- a/python/paddle/base/dygraph/tensor_patch_methods.py
+++ b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -515,7 +515,7 @@ def transform(t, device, dtype, blocking):
     @framework.dygraph_only
     def to(self, *args, **kwargs):
         """
-        Performs Tensor dtype and/or device conversion. A torch.dtype and torch.device
+        Performs Tensor dtype and/or device conversion. A paddle.dtype and place
         are inferred from the arguments of self.to(*args, **kwargs).There are three ways
         to call `to`
             to(dtype, blocking=True)
@@ -602,7 +602,7 @@ def get_device_dtype_from_tensor(other):
                 else:
                     blocking = kwargs.get("blocking", None)
             elif (
-                isinstance(args[0], paddle.dtype)
+                isinstance(args[0], paddle.dtype, np.dtype)
                 or isinstance(args[0], str)
                 and args[0].lower() in valid_dtypes
             ):

From f51b864772b1c2ea4dc8854b05357396c69e038a Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Fri, 20 Oct 2023 06:07:28 +0000
Subject: [PATCH 05/16] update dtype

---
 python/paddle/base/dygraph/tensor_patch_methods.py | 2 +-
 python/paddle/nn/layer/layers.py                   | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
index 58d8d0dbdd9e83..68c3a4e1b56ad2 100644
--- a/python/paddle/base/dygraph/tensor_patch_methods.py
+++ b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -602,7 +602,7 @@ def get_device_dtype_from_tensor(other):
                 else:
                     blocking = kwargs.get("blocking", None)
             elif (
-                isinstance(args[0], paddle.dtype, np.dtype)
+                isinstance(args[0], (paddle.dtype, np.dtype))
                 or isinstance(args[0], str)
                 and args[0].lower() in valid_dtypes
             ):
diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py
index a1fc40335b1153..ddb35f80530a5a 100644
--- a/python/paddle/nn/layer/layers.py
+++ b/python/paddle/nn/layer/layers.py
@@ -925,7 +925,7 @@ def astype(self, dtype=None):
         Casts all parameters and buffers to dtype and then return the Layer.
 
         Parameters:
-            dtype(str|paddle.dtype): target data type of layer.
+            dtype(str|paddle.dtype|numpy.dtype): target data type of layer.
                 If set str, it can be "bool",  "float16", "float32", "float64",
                 "int8", "int16", "int32", "int64", "uint8", "complex64", "complex128".
                 Default: None
@@ -992,11 +992,11 @@ def astype(self, dtype=None):
             "bool",
         ]
         if (
-            type(dtype) is paddle.dtype
+            isinstance(dtype, (paddle.dtype, np.dtype))
             or type(dtype) is str
             and dtype in valid_dtypes
         ):
-            if type(dtype) is str:
+            if isinstance(dtype, (str, np.dtype)):
                 dtype = framework.convert_np_dtype_to_dtype_(dtype)
             self._dtype = dtype
             for layer in self.sublayers():

From d88abd8a7c89c18ac56fc2c2bad18798b6691861 Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Mon, 23 Oct 2023 13:12:53 +0000
Subject: [PATCH 06/16] fix example test

---
 python/paddle/nn/layer/layers.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py
index ddb35f80530a5a..73579e80734475 100644
--- a/python/paddle/nn/layer/layers.py
+++ b/python/paddle/nn/layer/layers.py
@@ -938,6 +938,7 @@ def astype(self, dtype=None):
 
                 >>> import paddle
                 >>> import paddle.nn as nn
+                >>> paddle.seed(2023)
 
                 >>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
                 >>> print(net)
@@ -948,17 +949,17 @@ def astype(self, dtype=None):
                 >>> print(net.parameters())
                 [Parameter containing:
                 Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-                    [[ 0.12119570,  0.71133953],
-                        [-0.46748328,  0.27421260]]), Parameter containing:
+                    [[ 0.76424706,  1.21572542],
+                        [ 0.02650531, -0.16404852]]), Parameter containing:
                 Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
                     [0., 0.]), Parameter containing:
                 Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-                    [[-0.28993332, -0.89435989],
-                        [ 0.12127877, -1.12000990]]), Parameter containing:
+                    [[ 0.05151060, -1.02659082],
+                        [-1.20846939, -0.46195853]]), Parameter containing:
                 Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
                     [0., 0.])]
 
-                >>> net.astype("int8")
+                >>> net = net.astype("int8")
                 >>> print(net)
                 Sequential(
                 (0): Linear(in_features=2, out_features=2, dtype=paddle.int8)
@@ -967,13 +968,13 @@ def astype(self, dtype=None):
                 >>> print(net.parameters())
                 [Parameter containing:
                 Tensor(shape=[2, 2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
-                    [[0, 0],
+                    [[0, 1],
                         [0, 0]]), Parameter containing:
                 Tensor(shape=[2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
                     [0, 0]), Parameter containing:
                 Tensor(shape=[2, 2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
-                    [[ 0,  0],
-                        [ 0, -1]]), Parameter containing:
+                    [[ 0, -1],
+                        [-1,  0]]), Parameter containing:
                 Tensor(shape=[2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
                     [0, 0])]
         """
@@ -2232,7 +2233,7 @@ def _transform(self, t, device, dtype, blocking):
         if t.place.is_gpu_place():
             # for gpu, minimum memory allocation unit is 256 bytes.
             size_dtype = core.size_of_dtype(dtype)
-            # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t’ occupied memory space.
+            # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t�? occupied memory space.
             # Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough.
             waiting_alloc_memory = (
                 ((np.prod(t.shape) * size_dtype) / 256 + 1) * 256 * 1.2

From 142a2cc8afc10f36a5532b6b48b139cb0c051512 Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Thu, 26 Oct 2023 13:02:07 +0000
Subject: [PATCH 07/16] update example

---
 python/paddle/nn/layer/layers.py | 58 ++++++++++++--------------------
 1 file changed, 22 insertions(+), 36 deletions(-)

diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py
index 73579e80734475..101ec9707d712a 100644
--- a/python/paddle/nn/layer/layers.py
+++ b/python/paddle/nn/layer/layers.py
@@ -938,45 +938,31 @@ def astype(self, dtype=None):
 
                 >>> import paddle
                 >>> import paddle.nn as nn
-                >>> paddle.seed(2023)
+                >>> weight_attr = paddle.ParamAttr(name="weight",initializer=paddle.nn.initializer.Constant(value=1.5))
+                >>> bias_attr = paddle.ParamAttr(name="bias",initializer=paddle.nn.initializer.Constant(value=2.5))
 
-                >>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
-                >>> print(net)
-                Sequential(
-                (0): Linear(in_features=2, out_features=2, dtype=float32)
-                (1): Linear(in_features=2, out_features=2, dtype=float32)
-                )
-                >>> print(net.parameters())
+                >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr).to(device="cpu",dtype="float32")
+                >>> print(linear)
+                Linear(in_features=2, out_features=2, dtype=float32)
+                >>> print(linear.parameters())
                 [Parameter containing:
-                Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-                    [[ 0.76424706,  1.21572542],
-                        [ 0.02650531, -0.16404852]]), Parameter containing:
-                Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-                    [0., 0.]), Parameter containing:
-                Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-                    [[ 0.05151060, -1.02659082],
-                        [-1.20846939, -0.46195853]]), Parameter containing:
-                Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-                    [0., 0.])]
-
-                >>> net = net.astype("int8")
-                >>> print(net)
-                Sequential(
-                (0): Linear(in_features=2, out_features=2, dtype=paddle.int8)
-                (1): Linear(in_features=2, out_features=2, dtype=paddle.int8)
-                )
-                >>> print(net.parameters())
+                Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
+                    [[1.50000000, 1.50000000],
+                        [1.50000000, 1.50000000]]), Parameter containing:
+                Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
+                    [2.50000000, 2.50000000])]
+
+                >>> linear=linear.astype("int8")
+                >>> print(linear)
+                Linear(in_features=2, out_features=2, dtype=paddle.int8)
+                >>> print(linear.parameters())
                 [Parameter containing:
-                Tensor(shape=[2, 2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
-                    [[0, 1],
-                        [0, 0]]), Parameter containing:
-                Tensor(shape=[2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
-                    [0, 0]), Parameter containing:
-                Tensor(shape=[2, 2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
-                    [[ 0, -1],
-                        [-1,  0]]), Parameter containing:
-                Tensor(shape=[2], dtype=int8, place=Place(gpu:0), stop_gradient=False,
-                    [0, 0])]
+                Tensor(shape=[2, 2], dtype=int8, place=Place(cpu), stop_gradient=False,
+                    [[1, 1],
+                        [1, 1]]), Parameter containing:
+                Tensor(shape=[2], dtype=int8, place=Place(cpu), stop_gradient=False,
+                    [2, 2])]
+
         """
         valid_dtypes = [
             "bfloat16",

From 6a380e1b2ea2e107ceed627921cf9794c8dbb374 Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Fri, 27 Oct 2023 02:21:38 +0000
Subject: [PATCH 08/16] add some ut to fix ci-coverage

---
 .../base/dygraph/tensor_patch_methods.py      |  2 +-
 test/legacy_test/test_Tensor_to.py            | 23 +++++++++++++++++++
 test/legacy_test/test_layer_astype.py         |  8 ++++---
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
index 68c3a4e1b56ad2..387a1ba161c946 100644
--- a/python/paddle/base/dygraph/tensor_patch_methods.py
+++ b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -573,7 +573,7 @@ def get_device_dtype_from_tensor(other):
             raise TypeError(
                 "to() received too mant arguments - expected one of:\n* (Union[str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace(), paddle.XPUPlace(), paddle.CustomPlace()] device, Union[str, paddle.dtype] dtype, bool blocking)\n* (Union[str, paddle.dtype] dtype, bool blocking)\n* (paddle.Tensor other, bool blocking) "
             )
-        valid_keys = set(["device", "dtype", "non_blocking", "other"])
+        valid_keys = set(["device", "dtype", "blocking", "other"])
         valid_dtypes = [
             "bfloat16",
             "float16",
diff --git a/test/legacy_test/test_Tensor_to.py b/test/legacy_test/test_Tensor_to.py
index 16f5df1369af3d..626d7f6cfdb324 100644
--- a/test/legacy_test/test_Tensor_to.py
+++ b/test/legacy_test/test_Tensor_to.py
@@ -97,6 +97,15 @@ def test_Tensor_to_blocking(self):
         self.assertEqual((placex_str == "Place(cpu)"), True)
         typex_str = str(tensorx.dtype)
         self.assertEqual((typex_str == "paddle.int32"), True)
+        tensor2 = paddle.to_tensor([4, 5, 6])
+        tensor2 = tensor2.to(tensorx, False)
+        place2_str = str(tensor2.place)
+        self.assertEqual((place2_str == "Place(cpu)"), True)
+        type2_str = str(tensor2.dtype)
+        self.assertEqual((type2_str == "paddle.int32"), True)
+        tensor2 = tensor2.to("float16", False)
+        type2_str = str(tensor2.dtype)
+        self.assertEqual((type2_str == "paddle.float16"), True)
 
     def test_Tensor_to_other(self):
         tensor1 = paddle.to_tensor([1, 2, 3], dtype="int8", place="cpu")
@@ -105,6 +114,20 @@ def test_Tensor_to_other(self):
         self.assertEqual((tensor2.dtype == tensor1.dtype), True)
         self.assertEqual((type(tensor2.place) == type(tensor1.place)), True)
 
+    def test_kwargs(self):
+        tensorx = paddle.to_tensor([1, 2, 3])
+        tensorx = tensorx.to(device="cpu", dtype="int8", blocking=True)
+        placex_str = str(tensorx.place)
+        self.assertEqual((placex_str == "Place(cpu)"), True)
+        typex_str = str(tensorx.dtype)
+        self.assertEqual((typex_str == "paddle.int8"), True)
+        tensor2 = paddle.to_tensor([4, 5, 6])
+        tensor2 = tensor2.to(other=tensorx)
+        place2_str = str(tensor2.place)
+        self.assertEqual((place2_str == "Place(cpu)"), True)
+        type2_str = str(tensor2.dtype)
+        self.assertEqual((type2_str == "paddle.int8"), True)
+
     def test_error(self):
         tensorx = paddle.to_tensor([1, 2, 3])
         # device value error
diff --git a/test/legacy_test/test_layer_astype.py b/test/legacy_test/test_layer_astype.py
index d47a88d05e6194..19ad08a3cebc54 100644
--- a/test/legacy_test/test_layer_astype.py
+++ b/test/legacy_test/test_layer_astype.py
@@ -19,7 +19,9 @@
 
 class LayerAstypeTest(unittest.TestCase):
     def test_layer_astype(self):
-        linear1 = paddle.nn.Linear(10, 3)
+        net = paddle.nn.Sequential(
+            paddle.nn.Linear(2, 2), paddle.nn.Linear(2, 2)
+        )
         valid_dtypes = [
             "bfloat16",
             "float16",
@@ -35,8 +37,8 @@ def test_layer_astype(self):
             "bool",
         ]
         for dtype in valid_dtypes:
-            linear1 = linear1.astype(dtype)
-            typex_str = str(linear1._dtype)
+            net = net.astype(dtype)
+            typex_str = str(net._dtype)
             self.assertEqual((typex_str == "paddle." + dtype), True)
 
     def test_error(self):

From c36807c773fe77304fe2d59c87d8d7c742ce759e Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Thu, 2 Nov 2023 16:46:07 +0800
Subject: [PATCH 09/16] fix codestyle

---
 python/paddle/base/dygraph/tensor_patch_methods.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
index 387a1ba161c946..afe10dd1d79163 100644
--- a/python/paddle/base/dygraph/tensor_patch_methods.py
+++ b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -571,7 +571,11 @@ def get_device_dtype_from_tensor(other):
 
         if size_args + size_kwargs > 3 or size_args + size_kwargs == 0:
             raise TypeError(
-                "to() received too mant arguments - expected one of:\n* (Union[str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace(), paddle.XPUPlace(), paddle.CustomPlace()] device, Union[str, paddle.dtype] dtype, bool blocking)\n* (Union[str, paddle.dtype] dtype, bool blocking)\n* (paddle.Tensor other, bool blocking) "
+                "to() received too mant arguments - expected one of:\n  \
+                * (Union[str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace(), paddle.XPUPlace(), paddle.CustomPlace()] \
+                device, Union[str, paddle.dtype] dtype, bool blocking)\n \
+                * (Union[str, paddle.dtype] dtype, bool blocking)\n \
+                * (paddle.Tensor other, bool blocking) "
             )
         valid_keys = set(["device", "dtype", "blocking", "other"])
         valid_dtypes = [

From 2d76a16aa60bed624319ed883a4debc1ef41171e Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Thu, 2 Nov 2023 18:09:59 +0800
Subject: [PATCH 10/16] fix codestyle

---
 python/paddle/base/dygraph/tensor_patch_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
index afe10dd1d79163..344d0230a79a8d 100644
--- a/python/paddle/base/dygraph/tensor_patch_methods.py
+++ b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -577,7 +577,7 @@ def get_device_dtype_from_tensor(other):
                 * (Union[str, paddle.dtype] dtype, bool blocking)\n \
                 * (paddle.Tensor other, bool blocking) "
             )
-        valid_keys = set(["device", "dtype", "blocking", "other"])
+        valid_keys = {"device", "dtype", "blocking", "other"}
         valid_dtypes = [
             "bfloat16",
             "float16",

From 7fb9f6581c4df50ef4a94c255df1c1ff05e5763e Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Thu, 2 Nov 2023 19:46:56 +0800
Subject: [PATCH 11/16] update

---
 python/paddle/nn/layer/layers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py
index 101ec9707d712a..50352d36f2926c 100644
--- a/python/paddle/nn/layer/layers.py
+++ b/python/paddle/nn/layer/layers.py
@@ -2219,7 +2219,7 @@ def _transform(self, t, device, dtype, blocking):
         if t.place.is_gpu_place():
             # for gpu, minimum memory allocation unit is 256 bytes.
             size_dtype = core.size_of_dtype(dtype)
-            # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t�? occupied memory space.
+            # Note(zhangbo): Paddle GPU minimum memory allocation unit is 256 bytes, waiting_alloc_memory will comput ‘t’ occupied memory space.
             # Coefficient 1.2 is used to avoid OOM that may occur in this critical state when the memory is just enough.
             waiting_alloc_memory = (
                 ((np.prod(t.shape) * size_dtype) / 256 + 1) * 256 * 1.2

From fe565a73ed53d3ec9e94b5b39e290993278cf21b Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Sat, 4 Nov 2023 02:57:33 +0000
Subject: [PATCH 12/16] add ut to test layer params' and buffers' type

---
 python/paddle/base/dygraph/tensor_patch_methods.py | 4 ++--
 python/paddle/nn/layer/layers.py                   | 2 +-
 test/legacy_test/test_layer_astype.py              | 9 +++++++++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
index 344d0230a79a8d..b5877faea528d1 100644
--- a/python/paddle/base/dygraph/tensor_patch_methods.py
+++ b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -573,8 +573,8 @@ def get_device_dtype_from_tensor(other):
             raise TypeError(
                 "to() received too mant arguments - expected one of:\n  \
                 * (Union[str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace(), paddle.XPUPlace(), paddle.CustomPlace()] \
-                device, Union[str, paddle.dtype] dtype, bool blocking)\n \
-                * (Union[str, paddle.dtype] dtype, bool blocking)\n \
+                device, Union[str, paddle.dtype, numpy.dtype] dtype, bool blocking)\n \
+                * (Union[str, paddle.dtype, numpy.dtype] dtype, bool blocking)\n \
                 * (paddle.Tensor other, bool blocking) "
             )
         valid_keys = {"device", "dtype", "blocking", "other"}
diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py
index 50352d36f2926c..bb15d6b7296e3d 100644
--- a/python/paddle/nn/layer/layers.py
+++ b/python/paddle/nn/layer/layers.py
@@ -995,7 +995,7 @@ def astype(self, dtype=None):
             return self
         else:
             raise ValueError(
-                "dtype value error, must be 'bfloat16', 'float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'complex64', 'complex128', 'bool', or paddle.dtype, but recieve "
+                "dtype value error, must be 'bfloat16', 'float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'complex64', 'complex128', 'bool', or paddle.dtype, numpy.dtype, but recieve "
                 + str(dtype)
             )
 
diff --git a/test/legacy_test/test_layer_astype.py b/test/legacy_test/test_layer_astype.py
index 19ad08a3cebc54..f60f49f8d81a20 100644
--- a/test/legacy_test/test_layer_astype.py
+++ b/test/legacy_test/test_layer_astype.py
@@ -14,6 +14,8 @@
 
 import unittest
 
+import numpy as np
+
 import paddle
 
 
@@ -22,6 +24,9 @@ def test_layer_astype(self):
         net = paddle.nn.Sequential(
             paddle.nn.Linear(2, 2), paddle.nn.Linear(2, 2)
         )
+        value = np.array([0]).astype("float32")
+        buffer = paddle.to_tensor(value)
+        net.register_buffer("test_buffer", buffer, persistable=True)
         valid_dtypes = [
             "bfloat16",
             "float16",
@@ -40,6 +45,10 @@ def test_layer_astype(self):
             net = net.astype(dtype)
             typex_str = str(net._dtype)
             self.assertEqual((typex_str == "paddle." + dtype), True)
+            param_typex_str = str(net.parameters()[0].dtype)
+            self.assertEqual((param_typex_str == "paddle." + dtype), True)
+            buffer_typex_str = str(net.buffers()[0].dtype)
+            self.assertEqual((buffer_typex_str == "paddle." + dtype), True)
 
     def test_error(self):
         linear1 = paddle.nn.Linear(10, 3)

From dbd9d7fc0aa0722c37576e2343aadc89fedffd22 Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Mon, 6 Nov 2023 12:33:14 +0000
Subject: [PATCH 13/16] update test

---
 test/legacy_test/test_Tensor_to.py    | 38 ++++++++++++---------------
 test/legacy_test/test_layer_astype.py |  6 ++---
 2 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/test/legacy_test/test_Tensor_to.py b/test/legacy_test/test_Tensor_to.py
index 626d7f6cfdb324..c9901cb68d780e 100644
--- a/test/legacy_test/test_Tensor_to.py
+++ b/test/legacy_test/test_Tensor_to.py
@@ -38,7 +38,7 @@ def test_Tensor_to_dtype(self):
         for dtype in valid_dtypes:
             tensorx = tensorx.to(dtype)
             typex_str = str(tensorx.dtype)
-            self.assertEqual((typex_str == "paddle." + dtype), True)
+            self.assertTrue(typex_str, "paddle." + dtype)
 
     def test_Tensor_to_device(self):
         tensorx = paddle.to_tensor([1, 2, 3])
@@ -51,9 +51,9 @@ def test_Tensor_to_device(self):
             tensorx = tensorx.to(place)
             placex_str = str(tensorx.place)
             if place == "gpu":
-                self.assertEqual((placex_str == "Place(" + place + ":0)"), True)
+                self.assertTrue(placex_str, "Place(" + place + ":0)")
             else:
-                self.assertEqual((placex_str == "Place(" + place + ")"), True)
+                self.assertTrue(placex_str, "Place(" + place + ")")
 
     def test_Tensor_to_device_dtype(self):
         tensorx = paddle.to_tensor([1, 2, 3])
@@ -80,53 +80,49 @@ def test_Tensor_to_device_dtype(self):
                 tensorx = tensorx.to(place, dtype)
                 placex_str = str(tensorx.place)
                 if place == "gpu":
-                    self.assertEqual(
-                        (placex_str == "Place(" + place + ":0)"), True
-                    )
+                    self.assertTrue(placex_str, "Place(" + place + ":0)")
                 else:
-                    self.assertEqual(
-                        (placex_str == "Place(" + place + ")"), True
-                    )
+                    self.assertTrue(placex_str, "Place(" + place + ")")
                 typex_str = str(tensorx.dtype)
-                self.assertEqual((typex_str == "paddle." + dtype), True)
+                self.assertTrue(typex_str, "paddle." + dtype)
 
     def test_Tensor_to_blocking(self):
         tensorx = paddle.to_tensor([1, 2, 3])
         tensorx = tensorx.to("cpu", "int32", False)
         placex_str = str(tensorx.place)
-        self.assertEqual((placex_str == "Place(cpu)"), True)
+        self.assertTrue(placex_str, "Place(cpu)")
         typex_str = str(tensorx.dtype)
-        self.assertEqual((typex_str == "paddle.int32"), True)
+        self.assertTrue(typex_str, "paddle.int32")
         tensor2 = paddle.to_tensor([4, 5, 6])
         tensor2 = tensor2.to(tensorx, False)
         place2_str = str(tensor2.place)
-        self.assertEqual((place2_str == "Place(cpu)"), True)
+        self.assertTrue(place2_str, "Place(cpu)")
         type2_str = str(tensor2.dtype)
-        self.assertEqual((type2_str == "paddle.int32"), True)
+        self.assertTrue(type2_str, "paddle.int32")
         tensor2 = tensor2.to("float16", False)
         type2_str = str(tensor2.dtype)
-        self.assertEqual((type2_str == "paddle.float16"), True)
+        self.assertTrue(type2_str, "paddle.float16")
 
     def test_Tensor_to_other(self):
         tensor1 = paddle.to_tensor([1, 2, 3], dtype="int8", place="cpu")
         tensor2 = paddle.to_tensor([1, 2, 3])
         tensor2 = tensor2.to(tensor1)
-        self.assertEqual((tensor2.dtype == tensor1.dtype), True)
-        self.assertEqual((type(tensor2.place) == type(tensor1.place)), True)
+        self.assertTrue(tensor2.dtype, tensor1.dtype)
+        self.assertTrue(type(tensor2.place), type(tensor1.place))
 
     def test_kwargs(self):
         tensorx = paddle.to_tensor([1, 2, 3])
         tensorx = tensorx.to(device="cpu", dtype="int8", blocking=True)
         placex_str = str(tensorx.place)
-        self.assertEqual((placex_str == "Place(cpu)"), True)
+        self.assertTrue(placex_str, "Place(cpu)")
         typex_str = str(tensorx.dtype)
-        self.assertEqual((typex_str == "paddle.int8"), True)
+        self.assertTrue(typex_str, "paddle.int8")
         tensor2 = paddle.to_tensor([4, 5, 6])
         tensor2 = tensor2.to(other=tensorx)
         place2_str = str(tensor2.place)
-        self.assertEqual((place2_str == "Place(cpu)"), True)
+        self.assertTrue(place2_str, "Place(cpu)")
         type2_str = str(tensor2.dtype)
-        self.assertEqual((type2_str == "paddle.int8"), True)
+        self.assertTrue(type2_str, "paddle.int8")
 
     def test_error(self):
         tensorx = paddle.to_tensor([1, 2, 3])
diff --git a/test/legacy_test/test_layer_astype.py b/test/legacy_test/test_layer_astype.py
index f60f49f8d81a20..413b3ca6813f22 100644
--- a/test/legacy_test/test_layer_astype.py
+++ b/test/legacy_test/test_layer_astype.py
@@ -44,11 +44,11 @@ def test_layer_astype(self):
         for dtype in valid_dtypes:
             net = net.astype(dtype)
             typex_str = str(net._dtype)
-            self.assertEqual((typex_str == "paddle." + dtype), True)
+            self.assertTrue(typex_str, "paddle." + dtype)
             param_typex_str = str(net.parameters()[0].dtype)
-            self.assertEqual((param_typex_str == "paddle." + dtype), True)
+            self.assertTrue(param_typex_str, "paddle." + dtype)
             buffer_typex_str = str(net.buffers()[0].dtype)
-            self.assertEqual((buffer_typex_str == "paddle." + dtype), True)
+            self.assertTrue(buffer_typex_str, "paddle." + dtype)
 
     def test_error(self):
         linear1 = paddle.nn.Linear(10, 3)

From bf37ce24d02179711cecbf43158b85b5d64551d8 Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Tue, 7 Nov 2023 07:40:34 +0000
Subject: [PATCH 14/16] fix doc

---
 python/paddle/nn/layer/layers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py
index bb15d6b7296e3d..038c7e7053df55 100644
--- a/python/paddle/nn/layer/layers.py
+++ b/python/paddle/nn/layer/layers.py
@@ -926,7 +926,7 @@ def astype(self, dtype=None):
 
         Parameters:
             dtype(str|paddle.dtype|numpy.dtype): target data type of layer.
-                If set str, it can be "bool",  "float16", "float32", "float64",
+                If set str, it can be "bool", "bfloat16", "float16", "float32", "float64",
                 "int8", "int16", "int32", "int64", "uint8", "complex64", "complex128".
                 Default: None
 
@@ -995,7 +995,7 @@ def astype(self, dtype=None):
             return self
         else:
             raise ValueError(
-                "dtype value error, must be 'bfloat16', 'float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'complex64', 'complex128', 'bool', or paddle.dtype, numpy.dtype, but recieve "
+                "dtype value error, must be 'bfloat16', 'float16', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 'uint8', 'complex64', 'complex128', 'bool', or paddle.dtype, numpy.dtype, but recieve "
                 + str(dtype)
             )
 

From 660f1d633d40ac387c67d3662117df7d9e65f704 Mon Sep 17 00:00:00 2001
From: YibLiu <68105073+YibinLiu666@users.noreply.github.com>
Date: Wed, 8 Nov 2023 12:34:53 +0800
Subject: [PATCH 15/16] Update
 python/paddle/base/dygraph/tensor_patch_methods.py

Co-authored-by: zachary sun <70642955+sunzhongkai588@users.noreply.github.com>
---
 python/paddle/base/dygraph/tensor_patch_methods.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
index b5877faea528d1..7c4bf1207a410b 100644
--- a/python/paddle/base/dygraph/tensor_patch_methods.py
+++ b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -516,11 +516,12 @@ def transform(t, device, dtype, blocking):
     def to(self, *args, **kwargs):
         """
         Performs Tensor dtype and/or device conversion. A paddle.dtype and place
-        are inferred from the arguments of self.to(*args, **kwargs).There are three ways
-        to call `to`
-            to(dtype, blocking=True)
-            to(device, dtype=None, blocking=True)
-            to(other, blocking=True)
+        are inferred from the arguments of ``self.to(*args, **kwargs)``.There are 
+        three ways to call `to`:
+        
+            1. to(dtype, blocking=True)
+            2. to(device, dtype=None, blocking=True)
+            3. to(other, blocking=True)
 
         Returns:
             Tensor: self

From d40fe54cbdf4fb0c6284241846da8af344b27483 Mon Sep 17 00:00:00 2001
From: YibinLiu666 <2632839426@qq.com>
Date: Wed, 8 Nov 2023 16:45:17 +0800
Subject: [PATCH 16/16] update doc

---
 python/paddle/base/dygraph/tensor_patch_methods.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
index 7c4bf1207a410b..4d221a5e888122 100644
--- a/python/paddle/base/dygraph/tensor_patch_methods.py
+++ b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -516,9 +516,9 @@ def transform(t, device, dtype, blocking):
     def to(self, *args, **kwargs):
         """
         Performs Tensor dtype and/or device conversion. A paddle.dtype and place
-        are inferred from the arguments of ``self.to(*args, **kwargs)``.There are 
+        are inferred from the arguments of ``self.to(*args, **kwargs)``.There are
         three ways to call `to`:
-        
+
             1. to(dtype, blocking=True)
             2. to(device, dtype=None, blocking=True)
             3. to(other, blocking=True)