forked from PaddlePaddle/Paddle2ONNX
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request PaddlePaddle#24 from OliverLPH/infer_py_test
demo commit for adding inference testing case
- Loading branch information
Showing
4 changed files
with
309 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# -*- coding: utf-8 -*- | ||
# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python | ||
""" | ||
init | ||
""" | ||
from .infer_test import InferenceTest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,225 @@ | ||
# -*- coding: utf-8 -*- | ||
# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python | ||
""" | ||
infer test | ||
""" | ||
import time | ||
import os | ||
import sys | ||
import logging | ||
import threading | ||
from multiprocessing import Process | ||
|
||
|
||
import psutil | ||
import yaml | ||
import pytest | ||
import pynvml | ||
import numpy as np | ||
import paddle.inference as paddle_infer | ||
|
||
from pynvml.smi import nvidia_smi | ||
|
||
_gpu_mem_lists = [] | ||
|
||
FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" | ||
logging.basicConfig(level=logging.INFO, format=FORMAT) | ||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class InferenceTest(object): | ||
""" | ||
python inference test base class | ||
""" | ||
|
||
def __init__(self): | ||
""" | ||
__init__ | ||
""" | ||
pass | ||
|
||
def load_config(self, **kwargs): | ||
""" | ||
load model to create config | ||
Args: | ||
model_path(str) : uncombined model path | ||
model_file(str) : combined model's model file | ||
params_file(str) : combined model's params file | ||
Returns: | ||
None | ||
""" | ||
model_path = kwargs.get("model_path", None) | ||
model_file = kwargs.get("model_file", None) | ||
params_file = kwargs.get("params_file", None) | ||
|
||
if model_path: | ||
assert os.path.exists(model_path) | ||
self.pd_config = paddle_infer.Config(model_path) | ||
elif model_file: | ||
assert os.path.exists(params_file) | ||
self.pd_config = paddle_infer.Config(model_file, params_file) | ||
else: | ||
raise Exception(f"model file path is not exist, [{model_path}] or [{model_file}] invalid!") | ||
|
||
def get_truth_val(self, input_data_dict: dict, device: str) -> dict: | ||
""" | ||
get truth value calculated by target device kernel | ||
Args: | ||
input_data_dict(dict) : input data constructed as dictionary | ||
Returns: | ||
None | ||
""" | ||
if device == "cpu": | ||
self.pd_config.disable_gpu() | ||
elif device == "gpu": | ||
self.pd_config.enable_use_gpu(1000, 0) | ||
else: | ||
raise Exception(f"{device} not support in current test codes") | ||
self.pd_config.switch_ir_optim(False) | ||
predictor = paddle_infer.create_predictor(self.pd_config) | ||
|
||
input_names = predictor.get_input_names() | ||
for i, input_data_name in enumerate(input_names): | ||
input_handle = predictor.get_input_handle(input_data_name) | ||
input_handle.copy_from_cpu(input_data_dict[input_data_name]) | ||
|
||
predictor.run() | ||
|
||
output_data_dict = {} | ||
output_names = predictor.get_output_names() | ||
for _, output_data_name in enumerate(output_names): | ||
output_handle = predictor.get_output_handle(output_data_name) | ||
output_data = output_handle.copy_to_cpu() | ||
output_data = output_data.flatten() | ||
output_data_dict[output_data_name] = output_data | ||
return output_data_dict | ||
|
||
def config_test(self): | ||
""" | ||
test config instance | ||
""" | ||
assert isinstance(self.pd_config, paddle_infer.Config), "Paddle Inference Config created failed" | ||
|
||
def disable_gpu_test(self, input_data_dict: dict, repeat=20): | ||
""" | ||
test disable_gpu() api | ||
Args: | ||
input_data_dict(dict) : input data constructed as dictionary | ||
repeat(int) : inference repeat time, set to catch gpu mem | ||
Returns: | ||
None | ||
""" | ||
self.pd_config.disable_gpu() | ||
predictor = paddle_infer.create_predictor(self.pd_config) | ||
|
||
cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES") | ||
if cuda_visible_devices: | ||
cuda_visible_device = cuda_visible_devices.split(",")[0] | ||
else: | ||
cuda_visible_device = 0 | ||
|
||
ori_gpu_mem = float(get_gpu_mem(cuda_visible_device)["used(MB)"]) | ||
|
||
record_thread = threading.Thread(target=record_by_pid, args=(os.getpid(), cuda_visible_device)) | ||
record_thread.setDaemon(True) | ||
record_thread.start() | ||
|
||
input_names = predictor.get_input_names() | ||
for i, input_data_name in enumerate(input_names): | ||
input_handle = predictor.get_input_handle(input_data_name) | ||
input_handle.copy_from_cpu(input_data_dict[input_data_name]) | ||
|
||
for i in range(repeat): | ||
predictor.run() | ||
|
||
output_names = predictor.get_output_names() | ||
output_handle = predictor.get_output_handle(output_names[0]) | ||
output_data = output_handle.copy_to_cpu() | ||
|
||
gpu_max_mem = max([float(i["used(MB)"]) for i in _gpu_mem_lists]) | ||
assert abs(gpu_max_mem - ori_gpu_mem) < 1, "set disable_gpu(), but gpu activity found" | ||
|
||
def trt_fp32_bz1_test(self, input_data_dict: dict, output_data_dict: dict, repeat=5, delta=1e-5): | ||
""" | ||
test enable_tensorrt_engine() | ||
batch_size = 1 | ||
trt max_batch_size = 1 | ||
precision_mode = paddle_infer.PrecisionType.Float32 | ||
Args: | ||
input_data_dict(dict) : input data constructed as dictionary | ||
output_data_dict(dict) : output data constructed as dictionary | ||
repeat(int) : inference repeat time, set to catch gpu mem | ||
delta(float): difference threshold between inference outputs and thruth value | ||
Returns: | ||
None | ||
""" | ||
self.pd_config.enable_use_gpu(1000, 0) | ||
self.pd_config.enable_tensorrt_engine( | ||
workspace_size=1 << 30, | ||
max_batch_size=1, | ||
min_subgraph_size=3, | ||
precision_mode=paddle_infer.PrecisionType.Float32, | ||
use_static=False, | ||
use_calib_mode=False, | ||
) | ||
predictor = paddle_infer.create_predictor(self.pd_config) | ||
|
||
input_names = predictor.get_input_names() | ||
for _, input_data_name in enumerate(input_names): | ||
input_handle = predictor.get_input_handle(input_data_name) | ||
input_handle.copy_from_cpu(input_data_dict[input_data_name]) | ||
|
||
for i in range(repeat): | ||
predictor.run() | ||
|
||
output_names = predictor.get_output_names() | ||
for i, output_data_name in enumerate(output_names): | ||
output_handle = predictor.get_output_handle(output_data_name) | ||
output_data = output_handle.copy_to_cpu() | ||
output_data = output_data.flatten() | ||
output_data_truth_val = output_data_dict[output_data_name].flatten() | ||
for j, out_data in enumerate(output_data): | ||
assert ( | ||
abs(out_data[j] - output_data_truth_val[j]) <= delta | ||
), f"{out_data[j]} - {output_data_truth_val[j]} > {delta}" | ||
|
||
|
||
def record_by_pid(pid: int, cuda_visible_device: int): | ||
""" | ||
record_by_pid | ||
Args: | ||
pid(int) : pid of the process | ||
cuda_visible_device(int) : first gpu card of CUDA_VISIBLE_DEVICES | ||
Returns: | ||
gpu_max_mem(float): recorded max gpu mem | ||
""" | ||
global _gpu_mem_lists | ||
|
||
while psutil.pid_exists(pid): | ||
gpu_mem = get_gpu_mem(cuda_visible_device) | ||
_gpu_mem_lists.append(gpu_mem) | ||
|
||
gpu_max_mem = max([float(i["used(MB)"]) for i in _gpu_mem_lists]) | ||
return gpu_max_mem | ||
|
||
|
||
def get_gpu_mem(gpu_id=0): | ||
""" | ||
get gpu mem from gpu id | ||
Args: | ||
gpu_id(int): gpu id | ||
Returns: | ||
gpu_mem(dict): gpu infomartion | ||
""" | ||
pynvml.nvmlInit() | ||
gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id) | ||
gpu_mem_info = pynvml.nvmlDeviceGetMemoryInfo(gpu_handle) | ||
gpu_utilization_info = pynvml.nvmlDeviceGetUtilizationRates(gpu_handle) | ||
gpu_mem = {} | ||
gpu_mem["total(MB)"] = gpu_mem_info.total / 1024.0 ** 2 | ||
gpu_mem["free(MB)"] = gpu_mem_info.free / 1024.0 ** 2 | ||
gpu_mem["used(MB)"] = gpu_mem_info.used / 1024.0 ** 2 | ||
gpu_mem["gpu_utilization_rate(%)"] = gpu_utilization_info.gpu | ||
gpu_mem["gpu_mem_utilization_rate(%)"] = gpu_utilization_info.memory | ||
pynvml.nvmlShutdown() | ||
return gpu_mem |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# -*- coding: utf-8 -*- | ||
# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python | ||
""" | ||
test resnet50 model | ||
""" | ||
import os | ||
import sys | ||
import logging | ||
import six | ||
|
||
import wget | ||
import pytest | ||
import numpy as np | ||
|
||
# pylint: disable=wrong-import-position | ||
sys.path.append("..") | ||
from test_case import InferenceTest | ||
|
||
# pylint: enable=wrong-import-position | ||
|
||
|
||
def check_model_exist(): | ||
""" | ||
check model exist | ||
""" | ||
resnet50_url = "https://paddle-inference-dist.bj.bcebos.com/Paddle-Inference-Demo/resnet50.tgz" | ||
if not os.path.exists("./resnet50/inference.pdiparams"): | ||
wget.download(resnet50_url, out="./") | ||
|
||
|
||
@pytest.mark.p0 | ||
@pytest.mark.config_init_combined_model | ||
def test_config(): | ||
""" | ||
test combined model config | ||
""" | ||
check_model_exist() | ||
test_suite = InferenceTest() | ||
test_suite.load_config(model_file="./resnet50/inference.pdmodel", params_file="./resnet50/inference.pdiparams") | ||
test_suite.config_test() | ||
|
||
|
||
@pytest.mark.p0 | ||
@pytest.mark.config_disablegpu_memory | ||
def test_disable_gpu(): | ||
""" | ||
test no gpu resources occupied after disable gpu | ||
""" | ||
check_model_exist() | ||
test_suite = InferenceTest() | ||
test_suite.load_config(model_file="./resnet50/inference.pdmodel", params_file="./resnet50/inference.pdiparams") | ||
batch_size = 1 | ||
fake_input = np.random.randn(batch_size, 3, 224, 224).astype("float32") | ||
input_data_dict = {"inputs": fake_input} | ||
test_suite.disable_gpu_test(input_data_dict) | ||
|
||
|
||
@pytest.mark.p1 | ||
@pytest.mark.trt_fp32_bz1_precision | ||
def test_trtfp32_bz1(): | ||
""" | ||
compared trt fp32 batch_size=1 resnet50 outputs with no ir config | ||
""" | ||
check_model_exist() | ||
|
||
batch_size = 1 | ||
fake_input = np.random.randn(batch_size, 3, 224, 224).astype("float32") | ||
input_data_dict = {"inputs": fake_input} | ||
|
||
test_suite = InferenceTest() | ||
test_suite.load_config(model_file="./resnet50/inference.pdmodel", params_file="./resnet50/inference.pdiparams") | ||
output_data_dict = test_suite.get_truth_val(input_data_dict, device="gpu") | ||
|
||
del test_suite # destroy class to save memory | ||
|
||
test_suite2 = InferenceTest() | ||
test_suite2.load_config(model_file="./resnet50/inference.pdmodel", params_file="./resnet50/inference.pdiparams") | ||
test_suite2.trt_fp32_bz1_test(input_data_dict, output_data_dict) |