From a9102907ef209f8e682f9b5394c0152b331c1a27 Mon Sep 17 00:00:00 2001 From: Sekiro-x <2095349370@qq.com> Date: Mon, 30 Oct 2023 22:25:41 +0800 Subject: [PATCH 1/8] 28 --- python/paddle/device/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 7ee16ffcf5464e..1b9a0fbaed8bcd 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -571,8 +571,9 @@ class Stream: A device stream wrapper around StreamBase. Parameters: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. - It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, - where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). + It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, + where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). + priority(int, optional): priority of the CUDA stream. Can be either 1 (high priority) or 2 (low priority). By default, streams have priority 2. From 4647f9d1c715c0302674b5e759617f2e01d40d4c Mon Sep 17 00:00:00 2001 From: Sekiro-x <2095349370@qq.com> Date: Tue, 31 Oct 2023 21:22:35 +0800 Subject: [PATCH 2/8] 28 --- python/paddle/device/__init__.py | 43 +++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 1b9a0fbaed8bcd..6b84f6db0885a2 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -58,10 +58,12 @@ def is_compiled_with_custom_device(device_type): """ + Whether paddle was built with Paddle_CUSTOM_DEVICE . Args: std::string, the registered device type, like "npu". + Return: bool, ``True`` if CustomDevice is supported, otherwise ``False``. @@ -124,7 +126,7 @@ def XPUPlace(dev_id): """ Return a Baidu Kunlun Place - Parameters: + Args: dev_id(int): Baidu Kunlun device id Examples: @@ -568,17 +570,20 @@ def __repr__(self): class Stream: ''' + A device stream wrapper around StreamBase. - Parameters: - device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. - It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, - where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). + Args: + device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. + It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, + where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). priority(int, optional): priority of the CUDA stream. Can be either 1 (high priority) or 2 (low priority). By default, streams have priority 2. + Returns: Stream: The stream. + Examples: .. code-block:: python @@ -590,6 +595,7 @@ class Stream: >>> s2 = paddle.device.Stream('custom_cpu') >>> s3 = paddle.device.Stream('custom_cpu:0') >>> s4 = paddle.device.Stream(paddle.CustomPlace('custom_cpu', 0)) + ''' def __init__(self, device=None, priority=2, stream_base=None): @@ -634,11 +640,15 @@ def __init__(self, device=None, priority=2, stream_base=None): def wait_event(self, event): ''' + Makes all future work submitted to the stream wait for an event. - Parameters: + + Args: event (Event): an event to wait for. + Returns: None. + Examples: .. code-block:: python @@ -651,18 +661,23 @@ def wait_event(self, event): >>> e = paddle.device.Event() >>> e.record(s1) >>> s2.wait_event(e) + ''' self.stream_base.wait_event(event.event_base) def wait_stream(self, stream): ''' + Synchronizes with another stream. All future work submitted to this stream will wait until all kernels submitted to a given stream at the time of call complete. - Parameters: + + Args: stream (Stream): a stream to synchronize. + Returns: None. + Examples: .. code-block:: python @@ -673,17 +688,22 @@ def wait_stream(self, stream): >>> s1 = paddle.device.Stream() >>> s2 = paddle.device.Stream() >>> s1.wait_stream(s2) + ''' self.stream_base.wait_stream(stream.stream_base) def record_event(self, event=None): ''' + Records an event. - Parameters: + + Args: event (Event, optional): event to record. If not given, a new one - will be allocated. + will be allocated. + Returns: Event: Recorded event. + Examples: .. code-block:: python @@ -696,6 +716,7 @@ def record_event(self, event=None): >>> e2 = paddle.device.Event() >>> s.record_event(e2) + ''' if event is None: event = Event(self.device) @@ -704,9 +725,12 @@ def record_event(self, event=None): def query(self): ''' + Checks if all the work submitted has been completed. + Returns: bool: Whether all kernels in this stream are completed. + Examples: .. code-block:: python @@ -716,6 +740,7 @@ def query(self): >>> paddle.set_device('custom_cpu') >>> s = paddle.device.Stream() >>> s.query() + ''' return self.stream_base.query() From 91bc194b5777a75ba509d381727bf22947b5d23e Mon Sep 17 00:00:00 2001 From: Sekiro-x <76903040+Sekiro-x@users.noreply.github.com> Date: Thu, 2 Nov 2023 21:49:11 +0800 Subject: [PATCH 3/8] Apply suggestions from code review Co-authored-by: zachary sun <70642955+sunzhongkai588@users.noreply.github.com> --- python/paddle/device/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 6b84f6db0885a2..c46359a4848b72 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -62,7 +62,7 @@ def is_compiled_with_custom_device(device_type): Whether paddle was built with Paddle_CUSTOM_DEVICE . Args: - std::string, the registered device type, like "npu". + device_type (str): the registered device type, like "npu". Return: bool, ``True`` if CustomDevice is supported, otherwise ``False``. @@ -575,7 +575,7 @@ class Stream: Args: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. - It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, + It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). priority(int, optional): priority of the CUDA stream. Can be either 1 (high priority) or 2 (low priority). By default, streams have From 41ddc3c3f51344c7ea9c79c65ab38df7f7212c51 Mon Sep 17 00:00:00 2001 From: Sekiro-x <2095349370@qq.com> Date: Thu, 2 Nov 2023 22:00:53 +0800 Subject: [PATCH 4/8] Update --- python/paddle/device/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 6b84f6db0885a2..0452a3cbd92a2c 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -421,7 +421,8 @@ def get_available_custom_device(): class Event: ''' A device event wrapper around StreamBase. - Parameters: + + Args: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). @@ -575,7 +576,7 @@ class Stream: Args: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. - It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, + It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). priority(int, optional): priority of the CUDA stream. Can be either 1 (high priority) or 2 (low priority). By default, streams have From 673657546e940bc16eb25258fc4e6b55724bf5e8 Mon Sep 17 00:00:00 2001 From: Sekiro-x <2095349370@qq.com> Date: Thu, 2 Nov 2023 22:07:12 +0800 Subject: [PATCH 5/8] '28' --- python/paddle/device/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 0452a3cbd92a2c..72065133d7371f 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -62,7 +62,7 @@ def is_compiled_with_custom_device(device_type): Whether paddle was built with Paddle_CUSTOM_DEVICE . Args: - std::string, the registered device type, like "npu". + device_type (str): the registered device type, like "npu". Return: bool, ``True`` if CustomDevice is supported, otherwise ``False``. @@ -424,7 +424,7 @@ class Event: Args: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. - It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, + It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). enable_timing (bool, optional): indicates if the event should measure time, default is False blocking (bool, optional): if True, ``wait`` will be blocking, default is False @@ -788,7 +788,7 @@ def current_stream(device=None): Return the current stream by the device. Parameters: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): The device which want to get stream from. If device is None, the device is the current device. Default: None. - It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, + It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, where ``x`` is the index of the GPUs, CustomDevicecs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). Returns: Stream: The stream to the device. From 4d8957849c65065f338abb149bcd7d21de8ce6b1 Mon Sep 17 00:00:00 2001 From: Sekiro-x <2095349370@qq.com> Date: Thu, 2 Nov 2023 22:30:18 +0800 Subject: [PATCH 6/8] 'all' --- python/paddle/device/__init__.py | 76 +++++++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 72065133d7371f..9c6a563f9ae175 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -72,12 +72,14 @@ def is_compiled_with_custom_device(device_type): >>> import paddle >>> support_npu = paddle.device.is_compiled_with_custom_device("npu") + """ return core.is_compiled_with_custom_device(device_type) def is_compiled_with_ipu(): """ + Whether paddle was built with WITH_IPU=ON to support Graphcore IPU. Returns (bool): `True` if IPU is supported, otherwise `False`. @@ -87,12 +89,14 @@ def is_compiled_with_ipu(): >>> import paddle >>> support_ipu = paddle.is_compiled_with_ipu() + """ return core.is_compiled_with_ipu() def IPUPlace(): """ + Return a Graphcore IPU Place Examples: @@ -103,12 +107,14 @@ def IPUPlace(): >>> import paddle >>> paddle.device.set_device('ipu') >>> place = paddle.device.IPUPlace() + """ return core.IPUPlace() def is_compiled_with_xpu(): """ + Whether paddle was built with WITH_XPU=ON to support Baidu Kunlun Returns (bool): whether paddle was built with WITH_XPU=ON @@ -118,12 +124,14 @@ def is_compiled_with_xpu(): >>> import paddle >>> support_xpu = paddle.device.is_compiled_with_xpu() + """ return core.is_compiled_with_xpu() def XPUPlace(dev_id): """ + Return a Baidu Kunlun Place Args: @@ -137,12 +145,14 @@ def XPUPlace(dev_id): >>> import paddle >>> paddle.device.set_device('xpu') >>> place = paddle.device.XPUPlace(0) + """ return core.XPUPlace(dev_id) def get_cudnn_version(): """ + This function return the version of cudnn. the retuen value is int which represents the cudnn version. For example, if it return 7600, it represents the version of cudnn is 7.6. @@ -251,11 +261,12 @@ def _convert_to_place(device): def set_device(device): """ + Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU and IPU. They are represented by string identifiers. This function can specify the global device which the OP will run. - Parameters: + Args: device(str): This parameter determines the specific running device. It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``, ``npu:x`` and ``ipu``, where ``x`` is the index of the GPUs, XPUs or NPUs. @@ -273,6 +284,7 @@ def set_device(device): >>> x1 = paddle.ones(name='x1', shape=[1, 2], dtype='int32') >>> x2 = paddle.zeros(name='x2', shape=[1, 2], dtype='int32') >>> data = paddle.stack([x1,x2], axis=1) + """ place = _convert_to_place(device) framework._set_expected_place(place) @@ -281,6 +293,7 @@ def set_device(device): def get_device(): """ + This function can get the current global device of the program is running. It's a string which is like 'cpu', 'gpu:x', 'xpu:x' and 'npu:x'. if the global device is not set, it will return a string which is 'gpu:x' when cuda is avaliable or it @@ -320,6 +333,7 @@ def get_device(): def get_all_device_type(): """ + Get all available device types. Returns: @@ -342,12 +356,14 @@ def get_all_device_type(): >>> # Case 4: paddlepaddle-gpu package installed, and custom deivce 'CustomCPU' and 'CustomGPU' is registerd. >>> # Output: ['cpu', 'gpu', 'CustomCPU', 'CustomGPU'] + """ return core.get_all_device_type() def get_all_custom_device_type(): """ + Get all available custom device types. Returns: @@ -364,12 +380,14 @@ def get_all_custom_device_type(): >>> # Case 2: paddlepaddle-gpu package installed, and custom deivce 'CustomCPU' and 'CustomGPU' is registerd. >>> # Output: ['CustomCPU', 'CustomGPU'] + """ return core.get_all_custom_device_type() def get_available_device(): """ + Get all available devices. Returns: @@ -392,12 +410,14 @@ def get_available_device(): >>> # Case 4: paddlepaddle-gpu package installed, and custom deivce 'CustomCPU' and 'CustomGPU' is registerd. >>> # Output: ['cpu', 'gpu:0', 'gpu:1', 'CustomCPU', 'CustomGPU:0', 'CustomGPU:1'] + """ return core.get_available_device() def get_available_custom_device(): """ + Get all available custom devices. Returns: @@ -414,12 +434,14 @@ def get_available_custom_device(): >>> # Case 2: paddlepaddle-gpu package installed, and custom deivce 'CustomCPU' and 'CustomGPU' is registerd. >>> # Output: ['CustomCPU', 'CustomGPU:0', 'CustomGPU:1'] + """ return core.get_available_custom_device() class Event: ''' + A device event wrapper around StreamBase. Args: @@ -429,8 +451,10 @@ class Event: enable_timing (bool, optional): indicates if the event should measure time, default is False blocking (bool, optional): if True, ``wait`` will be blocking, default is False interprocess (bool): if True, the event can be shared between processes, default is False + Returns: Event: The event. + Examples: .. code-block:: python @@ -442,6 +466,7 @@ class Event: >>> e2 = paddle.device.Event('custom_cpu') >>> e3 = paddle.device.Event('custom_cpu:0') >>> e4 = paddle.device.Event(paddle.CustomPlace('custom_cpu', 0)) + ''' def __init__( @@ -481,12 +506,16 @@ def __init__( def record(self, stream=None): ''' + Records the event in a given stream. - Parameters: + + Args: stream(Stream, optional): The given stream. By default, stream is None, event will be recorded in current_stream. + Returns: None. + Examples: .. code-block:: python @@ -499,6 +528,7 @@ def record(self, stream=None): >>> s = paddle.device.Stream() >>> e.record(s) + ''' if stream is None: stream = current_stream(self.device) @@ -507,9 +537,12 @@ def record(self, stream=None): def query(self): ''' + Checks if all work currently captured by event has completed. + Returns: bool: Whether all work currently captured by event has completed. + Examples: .. code-block:: python @@ -520,15 +553,19 @@ def query(self): >>> e = paddle.device.Event() >>> e.record() >>> e.query() + ''' return self.event_base.query() def elapsed_time(self, end_event): ''' + Returns the time elapsed in milliseconds after the event was recorded and before the end_event was recorded. + Returns: int: The time. + Examples: .. code-block:: python @@ -542,16 +579,20 @@ def elapsed_time(self, end_event): >>> e2 = paddle.device.Event() >>> e2.record() >>> e1.elapsed_time(e2) + ''' return 0 def synchronize(self): ''' + Waits for the event to complete. Waits until the completion of all work currently captured in this event. This prevents the CPU thread from proceeding until the event completes. + Returns: None. + Examples: .. code-block:: python @@ -562,6 +603,7 @@ def synchronize(self): >>> e = paddle.device.Event() >>> e.record() >>> e.synchronize() + ''' self.event_base.synchronize() @@ -747,9 +789,12 @@ def query(self): def synchronize(self): ''' + Wait for all the kernels in this stream to complete. + Returns: None. + Examples: .. code-block:: python @@ -759,6 +804,7 @@ def synchronize(self): >>> paddle.set_device('custom_cpu') >>> s = paddle.device.Stream() >>> s.synchronize() + ''' self.stream_base.synchronize() @@ -785,13 +831,16 @@ def __repr__(self): def current_stream(device=None): ''' + Return the current stream by the device. - Parameters: + Args: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): The device which want to get stream from. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, where ``x`` is the index of the GPUs, CustomDevicecs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). + Returns: Stream: The stream to the device. + Examples: .. code-block:: python @@ -803,6 +852,7 @@ def current_stream(device=None): >>> s2 = paddle.device.current_stream("custom_cpu:0") >>> place = paddle.CustomPlace('custom_cpu', 0) >>> s3 = paddle.device.current_stream(place) + ''' if device is None: place = paddle.framework._current_expected_place() @@ -831,11 +881,14 @@ def current_stream(device=None): def set_stream(stream): ''' + Set the current stream. - Parameters: + Args: stream(Stream): The selected stream. + Returns: Stream: The previous stream. + Examples: .. code-block:: python @@ -845,6 +898,7 @@ def set_stream(stream): >>> paddle.set_device('custom_cpu') >>> s = paddle.device.Stream() >>> paddle.device.set_stream(s) + ''' prev_stream = current_stream(stream.stream_base.place) @@ -871,13 +925,17 @@ def set_stream(stream): class stream_guard: ''' + Notes: This API only supports dynamic graph mode currently. A context manager that specifies the current stream context by the given stream. - Parameters: + + Args: stream(Stream, optional): the selected stream. If stream is None, just yield. + Returns: None. + Examples: .. code-block:: python @@ -892,6 +950,7 @@ class stream_guard: >>> with paddle.device.stream_guard(s): ... s.wait_stream(paddle.device.default_stream()) ... data4 = data1 + data3 + ''' def __init__(self, stream=None): @@ -926,13 +985,15 @@ def __exit__(self, *args): def synchronize(device=None): """ + Wait for the compute on the given device to finish. - Parameters: + + Args: device(str|paddle.CUDAPlace(n)|paddle.XPUPlace(n)|paddle.CustomPlace(n)): The device which want to wait for. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``, ``xpu``, ``xpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n). - Examples: + Examples: .. code-block:: python >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE) @@ -943,6 +1004,7 @@ def synchronize(device=None): >>> paddle.device.synchronize("custom_cpu:0") >>> place = paddle.CustomPlace('custom_cpu', 0) >>> paddle.device.synchronize(place) + """ if device is None: From 62ecd4d200342ea922eba8e7c23cfc42bdea6e1b Mon Sep 17 00:00:00 2001 From: Sekiro-x <2095349370@qq.com> Date: Thu, 2 Nov 2023 22:41:05 +0800 Subject: [PATCH 7/8] 'all' --- python/paddle/device/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 9c6a563f9ae175..eb4ee04c8319c6 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -833,6 +833,7 @@ def current_stream(device=None): ''' Return the current stream by the device. + Args: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): The device which want to get stream from. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, @@ -883,6 +884,7 @@ def set_stream(stream): ''' Set the current stream. + Args: stream(Stream): The selected stream. @@ -1004,7 +1006,7 @@ def synchronize(device=None): >>> paddle.device.synchronize("custom_cpu:0") >>> place = paddle.CustomPlace('custom_cpu', 0) >>> paddle.device.synchronize(place) - + """ if device is None: From 318c70b2ed4e5c53b9a9d6d19ad3a044049bbf4f Mon Sep 17 00:00:00 2001 From: Sekiro <2095349370@qq.com> Date: Sat, 4 Nov 2023 12:48:49 +0800 Subject: [PATCH 8/8] 'all' --- python/paddle/device/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index eb4ee04c8319c6..f6c3bfc78a9a64 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -833,7 +833,7 @@ def current_stream(device=None): ''' Return the current stream by the device. - + Args: device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): The device which want to get stream from. If device is None, the device is the current device. Default: None. It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec,