Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fea] Support python inference #773

Merged
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
1c8e025
[Doc] Add pretrained model for laplace2d & refine comments (#639)
HydrogenSulfate Nov 15, 2023
2ad63b8
add deploy module for aneurysm
HydrogenSulfate Nov 15, 2023
615799e
Merge branch 'PaddlePaddle:develop' into add_inference_module
HydrogenSulfate Nov 15, 2023
8d93283
Merge branch 'PaddlePaddle:develop' into add_inference_module
HydrogenSulfate Nov 15, 2023
b62d6b0
update code
HydrogenSulfate Nov 15, 2023
975fdf0
optimize inference config of aneurysm
HydrogenSulfate Jan 15, 2024
feb4a39
update aneurysm code
HydrogenSulfate Jan 15, 2024
6264c70
Merge branch 'develop' into add_py_infer_deploy
HydrogenSulfate Jan 16, 2024
594b6d8
Merge branch 'develop' into add_py_infer_deploy
HydrogenSulfate Jan 24, 2024
742b801
Merge branch 'PaddlePaddle:develop' into add_py_infer_deploy
HydrogenSulfate Jan 28, 2024
b1f1d24
Merge branch 'develop' into add_py_infer_deploy
HydrogenSulfate Feb 1, 2024
f072c0d
Merge branch 'add_py_infer_deploy' of https://github.com/HydrogenSulf…
HydrogenSulfate Feb 1, 2024
5a1793b
update code
HydrogenSulfate Feb 1, 2024
fc1d4d6
update code
HydrogenSulfate Feb 1, 2024
8c87875
update code
HydrogenSulfate Feb 2, 2024
0e96161
Merge branch 'develop' into add_py_infer_deploy
HydrogenSulfate Feb 2, 2024
5aa8a34
update aneurysm document
HydrogenSulfate Feb 3, 2024
b12f65b
update export and inference document
HydrogenSulfate Feb 3, 2024
790b2fc
Merge branch 'add_py_infer_deploy' of https://github.com/HydrogenSulf…
HydrogenSulfate Feb 3, 2024
1280136
fix docstring
HydrogenSulfate Feb 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions deploy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
deploy module is designed for inference and deployment.
"""
13 changes: 13 additions & 0 deletions deploy/python_infer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
219 changes: 219 additions & 0 deletions deploy/python_infer/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import platform
from os import path as osp
from typing import TYPE_CHECKING
from typing import Optional
from typing import Tuple

from paddle import inference as paddle_inference
from typing_extensions import Literal

from ppsci.utils import logger

if TYPE_CHECKING:
import onnxruntime


class Predictor:
"""
Initializes the inference engine with the given parameters.

Args:
pdmodel_path (Optional[str]): Path to the PaddlePaddle model file. Defaults to None.
pdpiparams_path (Optional[str]): Path to the PaddlePaddle model parameters file. Defaults to None.
device (Literal["gpu", "cpu", "npu", "xpu"], optional): Device to use for inference. Defaults to "cpu".
engine (Literal["native", "tensorrt", "onnx", "mkldnn"], optional): Inference engine to use. Defaults to "native".
precision (Literal["fp32", "fp16", "int8"], optional): Precision to use for inference. Defaults to "fp32".
onnx_path (Optional[str], optional): Path to the ONNX model file. Defaults to None.
ir_optim (bool, optional): Whether to use IR optimization. Defaults to True.
min_subgraph_size (int, optional): Minimum subgraph size for IR optimization. Defaults to 15.
gpu_mem (int, optional): Maximum GPU memory(MB) to use. Defaults to 500(MB).
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个参数应该不是最大显存占用,可以再确认下

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已根据官方文档进行更正

gpu_id (int, optional): GPU ID to use. Defaults to 0.
num_cpu_threads (int, optional): Number of CPU threads to use. Defaults to 1.
"""

def __init__(
self,
pdmodel_path: Optional[str] = None,
pdpiparams_path: Optional[str] = None,
*,
device: Literal["gpu", "cpu", "npu", "xpu"] = "cpu",
engine: Literal["native", "tensorrt", "onnx", "mkldnn"] = "native",
precision: Literal["fp32", "fp16", "int8"] = "fp32",
onnx_path: Optional[str] = None,
ir_optim: bool = True,
min_subgraph_size: int = 15,
gpu_mem: int = 500,
gpu_id: int = 0,
max_batch_size: int = 10,
num_cpu_threads: int = 10,
):
self.pdmodel_path = pdmodel_path
self.pdpiparams_path = pdpiparams_path

self._check_device(device)
self.device = device
self._check_engine(engine)
self.engine = engine
self._check_precision(precision)
self.precision = precision

self.onnx_path = onnx_path
self.ir_optim = ir_optim
self.min_subgraph_size = min_subgraph_size
self.gpu_mem = gpu_mem
self.gpu_id = gpu_id
self.max_batch_size = max_batch_size
self.num_cpu_threads = num_cpu_threads

if self.engine == "onnx":
self.predictor, self.config = self._create_onnx_predictor()
else:
self.predictor, self.config = self._create_paddle_predictor()

logger.message(
f"Inference with engine: {self.engine}, precision: {self.precision}, "
f"device: {self.device}."
)

def predict(self, image):
raise NotImplementedError

def _create_paddle_predictor(
self,
) -> Tuple[paddle_inference.Predictor, paddle_inference.Config]:
if not osp.exists(self.pdmodel_path):
raise FileNotFoundError(
f"Given 'pdmodel_path': {self.pdmodel_path} does not exist. "
"Please check if it is correct."
)
if not osp.exists(self.pdpiparams_path):
raise FileNotFoundError(
f"Given 'pdpiparams_path': {self.pdpiparams_path} does not exist. "
"Please check if it is correct."
)

config = paddle_inference.Config(self.pdmodel_path, self.pdpiparams_path)
if self.device == "gpu":
config.enable_use_gpu(self.gpu_mem, self.gpu_id)
if self.engine == "tensorrt":
if self.precision == "fp16":
precision = paddle_inference.Config.Precision.Half
elif self.precision == "int8":
precision = paddle_inference.Config.Precision.Int8
else:
precision = paddle_inference.Config.Precision.Float32
config.enable_tensorrt_engine(
workspace_size=1 << 30,
precision_mode=precision,
max_batch_size=self.max_batch_size,
min_subgraph_size=self.min_subgraph_size,
use_calib_mode=False,
)
# collect shape
pdmodel_dir = osp.dirname(self.pdmodel_path)
trt_shape_path = osp.join(pdmodel_dir, "trt_dynamic_shape.txt")

if not osp.exists(trt_shape_path):
config.collect_shape_range_info(trt_shape_path)
logger.info(
f"Save collected dynamic shape info to: {trt_shape_path}"
)
try:
config.enable_tuned_tensorrt_dynamic_shape(trt_shape_path, True)
except Exception as e:
logger.warning(e)
logger.warning(
"TRT dynamic shape is disabled for your paddlepaddle < 2.3.0"
)

elif self.device == "npu":
config.enable_custom_device("npu")
elif self.device == "xpu":
config.enable_xpu(10 * 1024 * 1024)
else:
config.disable_gpu()
if self.engine == "mkldnn":
# 'set_mkldnn_cache_capatity' is not available on macOS
if platform.system() != "Darwin":
...
# cache 10 different shapes for mkldnn to avoid memory leak
# config.set_mkldnn_cache_capacity(10)
config.enable_mkldnn()

if self.precision == "fp16":
config.enable_mkldnn_bfloat16()

config.set_cpu_math_library_num_threads(self.num_cpu_threads)

# enable memory optim
config.enable_memory_optim()
config.disable_glog_info()
# enable zero copy
config.switch_use_feed_fetch_ops(False)
config.switch_ir_optim(self.ir_optim)

predictor = paddle_inference.create_predictor(config)
return predictor, config

def _create_onnx_predictor(
self,
) -> Tuple["onnxruntime.InferenceSession", "onnxruntime.SessionOptions"]:
if not osp.exists(self.onnx_path):
raise FileNotFoundError(
f"Given 'onnx_path' {self.onnx_path} does not exist. "
"Please check if it is correct."
)

try:
import onnxruntime as ort
except ModuleNotFoundError:
raise ModuleNotFoundError(
"Please install onnxruntime with `pip install onnxruntime`."
)

# set config for onnx predictor
config = ort.SessionOptions()
config.intra_op_num_threads = self.num_cpu_threads
if self.ir_optim:
config.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL

# instantiate onnx predictor
predictor = ort.InferenceSession(self.onnx_path, sess_options=config)
return predictor, config

def _check_device(self, device: str):
if device not in ["gpu", "cpu", "npu", "xpu"]:
raise ValueError(
"Inference only supports 'gpu', 'cpu', 'npu' and 'xpu' devices, "
f"but got {device}."
)

def _check_engine(self, engine: str):
if engine not in ["native", "tensorrt", "onnx", "mkldnn"]:
raise ValueError(
"Inference only supports 'native', 'tensorrt', 'onnx' and 'mkldnn' "
f"engines, but got {engine}."
)

def _check_precision(self, precision: str):
if precision not in ["fp32", "fp16", "int8"]:
raise ValueError(
"Inference only supports 'fp32', 'fp16' and 'int8' "
f"precision, but got {precision}."
)
120 changes: 120 additions & 0 deletions deploy/python_infer/pinn_predictor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict
from typing import Union

import numpy as np
import paddle
from omegaconf import DictConfig

from deploy.python_infer import base
from ppsci.utils import logger
from ppsci.utils import misc


class PINNPredictor(base.Predictor):
"""General predictor for PINN-based models.

Args:
cfg (DictConfig): Running configuration.
"""

def __init__(
self,
cfg: DictConfig,
):
super().__init__(
cfg.INFER.pdmodel_path,
cfg.INFER.pdpiparams_path,
device=cfg.INFER.device,
engine=cfg.INFER.engine,
precision=cfg.INFER.precision,
onnx_path=cfg.INFER.onnx_path,
ir_optim=cfg.INFER.ir_optim,
min_subgraph_size=cfg.INFER.min_subgraph_size,
gpu_mem=cfg.INFER.gpu_mem,
gpu_id=cfg.INFER.gpu_id,
max_batch_size=cfg.INFER.max_batch_size,
num_cpu_threads=cfg.INFER.num_cpu_threads,
)
self.log_freq = cfg.log_freq

def predict(
self,
input_dict: Dict[str, Union[np.ndarray, paddle.Tensor]],
batch_size: int = 64,
) -> Dict[str, np.ndarray]:
"""
Predicts the output of the model for the given input.

Args:
input_dict (Dict[str, Union[np.ndarray, paddle.Tensor]]):
A dictionary containing the input data.
batch_size (int, optional): The batch size to use for prediction.
Defaults to 64.

Returns:
Dict[str, np.ndarray]: A dictionary containing the predicted output.
"""
if batch_size > self.max_batch_size:
logger.warning(
f"batch_size({batch_size}) is larger than "
f"max_batch_size({self.max_batch_size}), which may occur error."
)

# prepare input handle(s)
input_handles = {
name: self.predictor.get_input_handle(name) for name in input_dict
}
# prepare output handle(s)
output_handles = {
name: self.predictor.get_output_handle(name)
for name in self.predictor.get_output_names()
}

num_samples = len(next(iter(input_dict.values())))
batch_num = (num_samples + (batch_size - 1)) // batch_size
pred_dict = misc.Prettydefaultdict(list)

# inference by batch
for batch_id in range(1, batch_num + 1):
if batch_id % self.log_freq == 0 or batch_id == batch_num:
logger.info(f"Predicting batch {batch_id}/{batch_num}")

# prepare batch input dict
st = (batch_id - 1) * batch_size
ed = min(num_samples, batch_id * batch_size)
batch_input_dict = {key: input_dict[key][st:ed] for key in input_dict}

# send batch input data to input handle(s)
for name, handle in input_handles.items():
handle.copy_from_cpu(batch_input_dict[name])

# run predictor
self.predictor.run()

# receive batch output data from output handle(s)
batch_output_dict = {
name: output_handles[name].copy_to_cpu() for name in output_handles
}

# collect batch output data
for key, batch_output in batch_output_dict.items():
pred_dict[key].append(batch_output)

# concatenate local predictions
pred_dict = {key: np.concatenate(value) for key, value in pred_dict.items()}

return pred_dict
Loading