From 2797cf8294361c75dd868f62a3e327546155c413 Mon Sep 17 00:00:00 2001 From: zhangyue66 Date: Fri, 14 Feb 2025 15:58:58 +0800 Subject: [PATCH 1/5] ort upgrade from 1.12.0 to 1.20.1 --- libs/ultra-infer/CMakeLists.txt | 4 ++-- libs/ultra-infer/cmake/onnxruntime.cmake | 4 ++-- .../runtime/backends/ort/ops/adaptive_pool2d.cc | 5 +++++ .../runtime/backends/ort/ops/adaptive_pool2d.h | 8 ++++++++ .../runtime/backends/ort/ops/multiclass_nms.cc | 5 +++++ .../ultra_infer/runtime/backends/ort/ops/multiclass_nms.h | 7 +++++++ 6 files changed, 29 insertions(+), 4 deletions(-) mode change 100755 => 100644 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc mode change 100755 => 100644 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h mode change 100755 => 100644 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc mode change 100755 => 100644 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h diff --git a/libs/ultra-infer/CMakeLists.txt b/libs/ultra-infer/CMakeLists.txt index 7c59827ecc..11a598f75e 100755 --- a/libs/ultra-infer/CMakeLists.txt +++ b/libs/ultra-infer/CMakeLists.txt @@ -36,10 +36,10 @@ set(THIRD_PARTY_DIR ${PROJECT_SOURCE_DIR}/third_party) add_subdirectory(${CSRCS_DIR_NAME}/ultra_infer) include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake) -# Set C++11 as standard for the whole project +# Set C++17 as standard for the whole project if(NOT MSVC) if(NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD 17) endif() set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3") if(NEED_ABI0) diff --git a/libs/ultra-infer/cmake/onnxruntime.cmake b/libs/ultra-infer/cmake/onnxruntime.cmake index 8672c8c7ef..da0e740743 100755 --- a/libs/ultra-infer/cmake/onnxruntime.cmake +++ b/libs/ultra-infer/cmake/onnxruntime.cmake @@ -41,8 +41,8 @@ else() endif() set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}") -set(ONNXRUNTIME_VERSION "1.12.0") -set(ONNXRUNTIME_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs/") +set(ONNXRUNTIME_VERSION "1.20.1") +set(ONNXRUNTIME_URL_PREFIX "https://fastdeploy.bj.bcebos.com/third_libs/") if(WIN32) if(WITH_GPU) diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc old mode 100755 new mode 100644 index 983217fde5..c0eddc00f2 --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc @@ -60,6 +60,11 @@ void AdaptivePool2dKernel::CpuAdaptivePool( } } +OrtStatusPtr AdaptivePool2dKernel::ComputeV2(OrtKernelContext *context) { + Compute(context); + return nullptr; +} + void AdaptivePool2dKernel::Compute(OrtKernelContext *context) { #if ORT_API_VERSION >= 14 Ort::KernelContext ort_context{context}; diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h old mode 100755 new mode 100644 index f105a2e8b6..b4de2bb0fd --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h @@ -49,6 +49,8 @@ struct AdaptivePool2dKernel { void Compute(OrtKernelContext *context); + OrtStatusPtr ComputeV2(OrtKernelContext *context); + void CpuAdaptivePool(const std::vector &input_size, const std::vector &output_size, const float *input_data, float *output_data); @@ -61,6 +63,12 @@ struct AdaptivePool2dOp return new AdaptivePool2dKernel(api, info, provider_); } + OrtStatusPtr CreateKernelV2(OrtApi api, const OrtKernelInfo *info, + void **op_kernel) const { + *op_kernel = new AdaptivePool2dKernel(api, info, provider_); + return nullptr; + } + const char *GetName() const { return "AdaptivePool2d"; } size_t GetInputTypeCount() const { return 1; } diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc old mode 100755 new mode 100644 index c7c5bf3724..34165d8b4c --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc @@ -156,6 +156,11 @@ int MultiClassNmsKernel::NMSForEachSample( return num_det; } +OrtStatusPtr MultiClassNmsKernel::ComputeV2(OrtKernelContext *context) { + Compute(context); + return nullptr; +} + void MultiClassNmsKernel::Compute(OrtKernelContext *context) { #if ORT_API_VERSION >= 14 Ort::KernelContext ort_context{context}; diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h old mode 100755 new mode 100644 index 4f71b14fc9..4c6e95863e --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h @@ -40,6 +40,7 @@ struct MultiClassNmsKernel { void GetAttribute(const OrtKernelInfo *info); void Compute(OrtKernelContext *context); + OrtStatusPtr ComputeV2(OrtKernelContext *context); void FastNMS(const float *boxes, const float *scores, const int &num_boxes, std::vector *keep_indices); int NMSForEachSample(const float *boxes, const float *scores, int num_boxes, @@ -53,6 +54,12 @@ struct MultiClassNmsOp return new MultiClassNmsKernel(api, info); } + OrtStatusPtr CreateKernelV2(OrtApi api, const OrtKernelInfo *info, + void **op_kernel) const { + *op_kernel = new MultiClassNmsKernel(api, info); + return nullptr; + } + const char *GetName() const { return "MultiClassNMS"; } size_t GetInputTypeCount() const { return 2; } From 2fe75e6818362271fa4e441c9d6b5ee1b8770ca3 Mon Sep 17 00:00:00 2001 From: zhangyue66 Date: Mon, 17 Feb 2025 11:27:02 +0800 Subject: [PATCH 2/5] update --- libs/ultra-infer/CMakeLists.txt | 15 +- libs/ultra-infer/UltraInfer.cmake.in | 27 + libs/ultra-infer/cmake/om.cmake | 29 + libs/ultra-infer/cmake/onnxruntime.cmake | 4 +- libs/ultra-infer/python/setup.py | 1 + .../python/ultra_infer/__init__.py | 2 + .../python/ultra_infer/c_lib_wrap.py.in | 4 + .../ultra-infer/python/ultra_infer/runtime.py | 4 + libs/ultra-infer/ultra_infer/core/config.h.in | 4 + .../ultra-infer/ultra_infer/pybind/runtime.cc | 6 +- .../runtime/backends/om/om_backend.cc | 556 ++++++++++++++++++ .../runtime/backends/om/om_backend.h | 80 +++ .../backends/ort/ops/adaptive_pool2d.cc | 5 - .../backends/ort/ops/adaptive_pool2d.h | 8 - .../backends/ort/ops/multiclass_nms.cc | 5 - .../runtime/backends/ort/ops/multiclass_nms.h | 7 - .../ultra_infer/runtime/enum_variables.cc | 7 + .../ultra_infer/runtime/enum_variables.h | 8 +- .../ultra_infer/runtime/option_pybind.cc | 1 + .../ultra_infer/runtime/runtime.cc | 18 + .../ultra-infer/ultra_infer/runtime/runtime.h | 1 + .../ultra_infer/runtime/runtime_option.cc | 8 + .../ultra_infer/runtime/runtime_option.h | 1 + 23 files changed, 767 insertions(+), 34 deletions(-) create mode 100644 libs/ultra-infer/cmake/om.cmake create mode 100644 libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.cc create mode 100644 libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.h mode change 100644 => 100755 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc mode change 100644 => 100755 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h mode change 100644 => 100755 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc mode change 100644 => 100755 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h diff --git a/libs/ultra-infer/CMakeLists.txt b/libs/ultra-infer/CMakeLists.txt index 11a598f75e..3d7ccfff6f 100755 --- a/libs/ultra-infer/CMakeLists.txt +++ b/libs/ultra-infer/CMakeLists.txt @@ -36,10 +36,10 @@ set(THIRD_PARTY_DIR ${PROJECT_SOURCE_DIR}/third_party) add_subdirectory(${CSRCS_DIR_NAME}/ultra_infer) include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake) -# Set C++17 as standard for the whole project +# Set C++11 as standard for the whole project if(NOT MSVC) if(NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD 11) endif() set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3") if(NEED_ABI0) @@ -70,6 +70,7 @@ option(ENABLE_SOPHGO_BACKEND "Whether to enable SOPHON backend." OFF) option(ENABLE_TVM_BACKEND "Whether to enable TVM backend." OFF) option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF) option(ENABLE_HORIZON_BACKEND "Whether to enable HORIZON backend." OFF) +option(ENABLE_OM_BACKEND "Whether to enable OM backend." OFF) option(ENABLE_VISION "Whether to enable vision models usage." OFF) option(ENABLE_TEXT "Whether to enable text models usage." OFF) option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF) @@ -172,6 +173,7 @@ file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ult file(GLOB_RECURSE DEPLOY_TVM_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/runtime/backends/tvm/*.cc) file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/runtime/backends/lite/*.cc) file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/pipeline/*.cc) +file(GLOB_RECURSE DEPLOY_OM_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/runtime/backends/om/*.cc) file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/vision/*.cc) file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/text/*.cc) file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/ultra_infer/*_pybind.cc) @@ -194,7 +196,7 @@ list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS} ${DEPLOY_SOPHGO_SRCS} ${DEPLOY_HORIZON_SRCS} ${DEPLOY_TVM_SRCS} - ${DEPLOY_PADDLE_CUSTOM_OP_SRCS}) + ${DEPLOY_PADDLE_CUSTOM_OP_SRCS} ${DEPLOY_OM_SRCS}) set(DEPEND_LIBS "") @@ -328,6 +330,13 @@ if(ENABLE_POROS_BACKEND) execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib) endif() +if(ENABLE_OM_BACKEND) + add_definitions(-DENABLE_OM_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OM_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/om.cmake) + list(APPEND DEPEND_LIBS ${NPU_libs}) +endif() + if(WITH_GPU) add_definitions(-DWITH_GPU) include_directories(${CUDA_DIRECTORY}/include) diff --git a/libs/ultra-infer/UltraInfer.cmake.in b/libs/ultra-infer/UltraInfer.cmake.in index d7d46adfe3..9358f21c37 100755 --- a/libs/ultra-infer/UltraInfer.cmake.in +++ b/libs/ultra-infer/UltraInfer.cmake.in @@ -32,6 +32,7 @@ set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@) set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@) set(ENABLE_POROS_BACKEND @ENABLE_POROS_BACKEND@) set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@) +set(ENABLE_OM_BACKEND @ENABLE_OM_BACKEND@) set(ENABLE_PADDLE2ONNX @ENABLE_PADDLE2ONNX@) set(BUILD_PADDLE2ONNX @BUILD_PADDLE2ONNX@) @@ -179,6 +180,32 @@ if(ENABLE_POROS_BACKEND) list(APPEND ULTRAINFER_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/include ${TORCH_INCLUDE}) endif() +if(ENABLE_OM_BACKEND) + set(LIB_PATH $ENV{NPU_HOST_LIB}) + if (NOT DEFINED ENV{NPU_HOST_LIB}) + set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/lib64") + message(STATUS "set default LIB_PATH: ${LIB_PATH}") + else() + message(STATUS "set LIB_PATH: ${LIB_PATH}") + endif () + + set(INC_PATH $ENV{DDK_PATH}) + if (NOT DEFINED ENV{DDK_PATH}) + set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest") + message(STATUS "set default INC_PATH: ${INC_PATH}") + else() + message(STATUS "set INC_PATH: ${INC_PATH}") + endif () + + set(NPU_libs ascendcl stdc++) + + link_directories(${LIB_PATH}) + + list(APPEND ULTRAINFER_LIBS ${NPU_libs}) + + list(APPEND ULTRAINFER_INCS ${INC_PATH}/runtime/include/) +endif() + if(WITH_GPU) if(NOT CUDA_DIRECTORY) set(CUDA_DIRECTORY "/usr/local/cuda") diff --git a/libs/ultra-infer/cmake/om.cmake b/libs/ultra-infer/cmake/om.cmake new file mode 100644 index 0000000000..df5066aaf4 --- /dev/null +++ b/libs/ultra-infer/cmake/om.cmake @@ -0,0 +1,29 @@ +set(CMAKE_CXX_FLAGS_DEBUG "-fPIC -O0 -g -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "-fPIC -O2 -Wall") + +set(INC_PATH $ENV{DDK_PATH}) +if (NOT DEFINED ENV{DDK_PATH}) + set(INC_PATH "/usr/local/Ascend/ascend-toolkit/latest") + message(STATUS "set default INC_PATH: ${INC_PATH}") +else() + message(STATUS "set INC_PATH: ${INC_PATH}") +endif () + +set(LIB_PATH $ENV{NPU_HOST_LIB}) +if (NOT DEFINED ENV{NPU_HOST_LIB}) + set(LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/lib64") + message(STATUS "set default LIB_PATH: ${LIB_PATH}") +else() + message(STATUS "set LIB_PATH: ${LIB_PATH}") +endif () + + +set(NPU_libs ascendcl stdc++) + +include_directories( + ${INC_PATH}/runtime/include/ +) + +link_directories( + ${LIB_PATH} +) diff --git a/libs/ultra-infer/cmake/onnxruntime.cmake b/libs/ultra-infer/cmake/onnxruntime.cmake index da0e740743..8672c8c7ef 100755 --- a/libs/ultra-infer/cmake/onnxruntime.cmake +++ b/libs/ultra-infer/cmake/onnxruntime.cmake @@ -41,8 +41,8 @@ else() endif() set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}") -set(ONNXRUNTIME_VERSION "1.20.1") -set(ONNXRUNTIME_URL_PREFIX "https://fastdeploy.bj.bcebos.com/third_libs/") +set(ONNXRUNTIME_VERSION "1.12.0") +set(ONNXRUNTIME_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs/") if(WIN32) if(WITH_GPU) diff --git a/libs/ultra-infer/python/setup.py b/libs/ultra-infer/python/setup.py index 1ef4bf7d48..7459ebf084 100755 --- a/libs/ultra-infer/python/setup.py +++ b/libs/ultra-infer/python/setup.py @@ -69,6 +69,7 @@ setup_configs["ENABLE_POROS_BACKEND"] = os.getenv("ENABLE_POROS_BACKEND", "OFF") setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF") setup_configs["ENABLE_LITE_BACKEND"] = os.getenv("ENABLE_LITE_BACKEND", "OFF") +setup_configs["ENABLE_OM_BACKEND"] = os.getenv("ENABLE_OM_BACKEND", "OFF") setup_configs["ENABLE_PADDLE2ONNX"] = os.getenv("ENABLE_PADDLE2ONNX", "OFF") setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF") setup_configs["ENABLE_FLYCV"] = os.getenv("ENABLE_FLYCV", "OFF") diff --git a/libs/ultra-infer/python/ultra_infer/__init__.py b/libs/ultra-infer/python/ultra_infer/__init__.py index 597572c9d9..9286b07f0b 100755 --- a/libs/ultra-infer/python/ultra_infer/__init__.py +++ b/libs/ultra-infer/python/ultra_infer/__init__.py @@ -152,6 +152,8 @@ def tensorrt_is_avaliable(): is_built_with_paddle, is_built_with_trt, get_default_cuda_directory, + is_built_with_openvino, + is_built_with_om, ) diff --git a/libs/ultra-infer/python/ultra_infer/c_lib_wrap.py.in b/libs/ultra-infer/python/ultra_infer/c_lib_wrap.py.in index 248f2b66af..262270161b 100755 --- a/libs/ultra-infer/python/ultra_infer/c_lib_wrap.py.in +++ b/libs/ultra-infer/python/ultra_infer/c_lib_wrap.py.in @@ -43,6 +43,10 @@ def is_built_with_openvino() ->bool: return True if "@ENABLE_OPENVINO_BACKEND@" == "ON" else False +def is_built_with_om() ->bool: + return True if "@ENABLE_OM_BACKEND@" == "ON" else False + + def get_default_cuda_directory() -> str: if not is_built_with_gpu(): return "" diff --git a/libs/ultra-infer/python/ultra_infer/runtime.py b/libs/ultra-infer/python/ultra_infer/runtime.py index fcb67f8570..8cd4cae33f 100755 --- a/libs/ultra-infer/python/ultra_infer/runtime.py +++ b/libs/ultra-infer/python/ultra_infer/runtime.py @@ -321,6 +321,10 @@ def use_paddle_lite_backend(self): """Wrapper function of use_lite_backend(), use Paddle Lite backend, support inference Paddle model on ARM CPU.""" return self.use_lite_backend() + def use_om_backend(self): + """Use Om backend, support inference Om model on NPU""" + return self._option.use_om_backend() + def set_lite_context_properties(self, context_properties): """Set nnadapter context properties for Paddle Lite backend.""" logging.warning( diff --git a/libs/ultra-infer/ultra_infer/core/config.h.in b/libs/ultra-infer/ultra_infer/core/config.h.in index f51e321cb1..f309783d83 100755 --- a/libs/ultra-infer/ultra_infer/core/config.h.in +++ b/libs/ultra-infer/ultra_infer/core/config.h.in @@ -84,3 +84,7 @@ #ifndef ENABLE_TVM_BACKEND #cmakedefine ENABLE_TVM_BACKEND #endif + +#ifndef ENABLE_OM_BACKEND +#cmakedefine ENABLE_OM_BACKEND +#endif diff --git a/libs/ultra-infer/ultra_infer/pybind/runtime.cc b/libs/ultra-infer/ultra_infer/pybind/runtime.cc index fe34f240e6..1d5225049d 100755 --- a/libs/ultra-infer/ultra_infer/pybind/runtime.cc +++ b/libs/ultra-infer/ultra_infer/pybind/runtime.cc @@ -137,7 +137,8 @@ void BindRuntime(pybind11::module &m) { .value("RKNPU2", Backend::RKNPU2) .value("SOPHGOTPU", Backend::SOPHGOTPU) .value("TVM", Backend::TVM) - .value("LITE", Backend::LITE); + .value("LITE", Backend::LITE) + .value("OMONNPU", Backend::OMONNPU); pybind11::enum_(m, "ModelFormat", pybind11::arithmetic(), "ModelFormat for inference.") .value("PADDLE", ModelFormat::PADDLE) @@ -145,7 +146,8 @@ void BindRuntime(pybind11::module &m) { .value("RKNN", ModelFormat::RKNN) .value("SOPHGO", ModelFormat::SOPHGO) .value("ONNX", ModelFormat::ONNX) - .value("TVMFormat", ModelFormat::TVMFormat); + .value("TVMFormat", ModelFormat::TVMFormat) + .value("OM", ModelFormat::OM); pybind11::enum_(m, "Device", pybind11::arithmetic(), "Device for inference.") .value("CPU", Device::CPU) diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.cc b/libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.cc new file mode 100644 index 0000000000..c08c2ffd02 --- /dev/null +++ b/libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.cc @@ -0,0 +1,556 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ultra_infer/runtime/backends/om/om_backend.h" + +#include "acl/acl.h" +#include +#include + +namespace ultra_infer { + +OmBackend::~OmBackend() { + FreeInputBuffer(); + FreeOutputBuffer(); + DestroyInput(); + DestroyOutput(); + DestroyResource(); +} + +TensorInfo OmBackend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), + "The index: %d should less than the number of inputs: %d.", index, + NumInputs()); + return inputs_desc_[index]; +} + +std::vector OmBackend::GetInputInfos() { return inputs_desc_; } + +TensorInfo OmBackend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index: %d should less than the number of outputs %d.", index, + NumOutputs()); + + return outputs_desc_[index]; +} + +std::vector OmBackend::GetOutputInfos() { return outputs_desc_; } + +bool OmBackend::Init(const RuntimeOption &runtime_option) { + // ACL init + aclError ret = aclInit(NULL); + if (ret != ACL_SUCCESS) { + FDERROR << "acl init failed, errorCode = " << static_cast(ret); + return false; + } + + // set device + ret = aclrtSetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + FDERROR << "acl set device" << deviceId_ + << " failed, errorCode = " << static_cast(ret); + return false; + } + + // create context (set current) + ret = aclrtCreateContext(&context_, deviceId_); + if (ret != ACL_SUCCESS) { + FDERROR << "acl create context failed, deviceId" << deviceId_ + << ", errorCode = " << static_cast(ret); + return false; + } + + // create stream + ret = aclrtCreateStream(&stream_); + if (ret != ACL_SUCCESS) { + FDERROR << "acl create stream failed, deviceId" << deviceId_ + << ", errorCode = " << static_cast(ret); + return false; + } + + // get run mode + // runMode is ACL_HOST which represents app is running in host + // runMode is ACL_DEVICE which represents app is running in device + aclrtRunMode runMode; + ret = aclrtGetRunMode(&runMode); + if (ret != ACL_SUCCESS) { + FDERROR << "acl get run mode failed, errorCode = " + << static_cast(ret); + return false; + } + + // model init; + const char *omModelPath = (char *)runtime_option.model_file.data(); + FDINFO << "omModelPath = " << omModelPath; + ret = LoadModel(omModelPath); + if (ret != true) { + FDERROR << "execute LoadModel failed"; + return false; + } + + // build input/output info + ret = CreateModelDesc(); + if (ret != true) { + FDERROR << "execute CreateModelDesc failed"; + return false; + } + ret = CreateInput(); + if (ret != true) { + FDERROR << "execute CreateInput failed"; + FreeInputBuffer(); + return false; + } + ret = CreateOutput(); + if (ret != true) { + FDERROR << "execute CreateOutput failed"; + FreeInputBuffer(); + return false; + } + + return true; +} + +bool OmBackend::Infer(std::vector &inputs, + std::vector *outputs, bool copy_to_fd) { + // Judge whether the input and output size are the same + if (inputs.size() != inputs_desc_.size()) { + FDERROR << "[OmBackend] Size of the inputs(" << inputs.size() + << ") should keep same with the inputs of this model(" + << inputs_desc_.size() << ")." << std::endl; + FreeInputBuffer(); + return false; + } + + // cp input tensor to inputBuffer + for (size_t i = 0; i < inputs.size(); ++i) { + if (inputs[i].Data() == nullptr) { + FDERROR << "inputs[i].Data is NULL." << std::endl; + return false; + } + size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, i); + aclError aclRet = + aclrtMemcpy(inputBuffer[i], modelInputSize, inputs[i].Data(), + inputs[i].Nbytes(), ACL_MEMCPY_DEVICE_TO_DEVICE); + if (aclRet != ACL_SUCCESS) { + FDERROR << "memcpy d2d failed. buffer size is " << modelInputSize + << "inputs[i].Nbytes() is " << inputs[i].Nbytes() + << ", errorCode is " << static_cast(aclRet); + return false; + } + } + + bool ret = Execute(); + if (ret != true) { + FDERROR << "execute inference failed"; + FreeInputBuffer(); + DestroyInput(); + DestroyOutput(); + return false; + } + + // cp outputbuffer to outputs + outputs->resize(outputs_desc_.size()); + std::vector temp_shape(4); + for (size_t i = 0; i < outputs_desc_.size(); ++i) { + temp_shape.resize(outputs_desc_[i].shape.size()); + for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) { + temp_shape[j] = outputs_desc_[i].shape[j]; + } + (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype, + outputs_desc_[i].name); + size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i); + if (modelOutputSize != (*outputs)[i].Nbytes()) { + FDERROR << "output size is not match, index: " << i + << ", modelOutputSize:" << modelOutputSize + << ", (*outputs)[i].Nbytes():" << (*outputs)[i].Nbytes(); + return false; + } + aclError aclRet = aclrtMemcpy( + (*outputs)[i].MutableData(), (*outputs)[i].Nbytes(), outputBuffer[i], + (*outputs)[i].Nbytes(), ACL_MEMCPY_DEVICE_TO_HOST); + if (aclRet != ACL_SUCCESS) { + FDERROR << "memcpy h2d failed. buffer size is " << (*outputs)[i].Nbytes() + << ", errorCode is " << static_cast(aclRet); + return false; + } + } + + return true; +} + +bool OmBackend::LoadModel(const char *modelPath) { + if (loadFlag_) { + FDERROR << "model has already been loaded"; + return false; + } + aclError ret = aclmdlQuerySize(modelPath, &modelWorkSize_, &modelWeightSize_); + if (ret != ACL_SUCCESS) { + FDERROR << "query model false, model file is" << modelPath + << ", errorCode is " << static_cast(ret); + return false; + } + // using ACL_MEM_MALLOC_HUGE_FIRST to malloc memory, huge memory is preferred + // to use and huge memory can improve performance. + ret = aclrtMalloc(&modelWorkPtr_, modelWorkSize_, ACL_MEM_MALLOC_HUGE_FIRST); + if (ret != ACL_SUCCESS) { + FDERROR << "malloc buffer for work failed, require size is " + << modelWorkSize_ << ", errorCode is " << static_cast(ret); + return false; + } + + // using ACL_MEM_MALLOC_HUGE_FIRST to malloc memory, huge memory is preferred + // to use and huge memory can improve performance. + ret = aclrtMalloc(&modelWeightPtr_, modelWeightSize_, + ACL_MEM_MALLOC_HUGE_FIRST); + if (ret != ACL_SUCCESS) { + FDERROR << "malloc buffer for weight failed, require size is " + << modelWeightSize_ << ", errorCode is " + << static_cast(ret); + return false; + } + + ret = aclmdlLoadFromFileWithMem(modelPath, &modelId_, modelWorkPtr_, + modelWorkSize_, modelWeightPtr_, + modelWeightSize_); + if (ret != ACL_SUCCESS) { + FDERROR << "load model from file failed, model file is " << modelPath + << ", errorCode is " << static_cast(ret); + return false; + } + + loadFlag_ = true; + FDINFO << "load model " << modelPath << " success"; + return true; +} + +bool OmBackend::Execute() { + aclError ret = aclmdlExecute(modelId_, input_, output_); + if (ret != ACL_SUCCESS) { + FDERROR << "execute model failed, modelId is " << modelId_ + << ", errorCode is " << static_cast(ret); + return false; + } + FDINFO << "model execute success"; + return true; +} + +bool OmBackend::CreateModelDesc() { + modelDesc_ = aclmdlCreateDesc(); + if (modelDesc_ == nullptr) { + FDERROR << "create model description failed"; + return false; + } + + aclError ret = aclmdlGetDesc(modelDesc_, modelId_); + if (ret != ACL_SUCCESS) { + FDERROR << "get model description failed, modelId is " << modelId_ + << ", errorCode is " << static_cast(ret); + return false; + } + + FDINFO << "create model description success"; + + return true; +} + +bool OmBackend::CreateInput() { + // om used in this sample has only one input + if (modelDesc_ == nullptr) { + FDERROR << "no model description, create input failed"; + return false; + } + + // input:aclmdlDataset + input_ = aclmdlCreateDataset(); + if (input_ == nullptr) { + FDERROR << "can't create dataset, create input failed"; + return false; + } + + // get input nums + size_t inputNum = aclmdlGetNumInputs(modelDesc_); + inputs_desc_.resize(inputNum); + inputBuffer.resize(inputNum, nullptr); + // inputBuffer = {nullptr}; + for (size_t i = 0; i < inputNum; ++i) { + // get input size + size_t modelInputSize = aclmdlGetInputSizeByIndex(modelDesc_, i); + aclError ret = + aclrtMalloc(&inputBuffer[i], modelInputSize, ACL_MEM_MALLOC_HUGE_FIRST); + if (ret != ACL_SUCCESS) { + FDERROR << "can't malloc buffer, size is " << modelInputSize + << ", errorCode is " << static_cast(ret); + return false; + } + // inputData:aclDataBuffer + aclDataBuffer *inputData = + aclCreateDataBuffer(inputBuffer[i], modelInputSize); + if (inputData == nullptr) { + FDERROR << "can't create data buffer, create input failed"; + return false; + } + + // add aclDataBuffer to input + ret = aclmdlAddDatasetBuffer(input_, inputData); + if (ret != ACL_SUCCESS) { + FDERROR << "add input dataset buffer failed, errorCode is " + << static_cast(ret); + (void)aclDestroyDataBuffer(inputData); + inputData = nullptr; + return false; + } + + // get name/shape/dtype of input to build inputs_desc_ + const char *name; + name = aclmdlGetInputNameByIndex(modelDesc_, i); + std::string temp_name = name; + + std::vector temp_shape{}; + aclmdlIODims dims; + ret = aclmdlGetInputDims(modelDesc_, i, &dims); + if (ret != ACL_SUCCESS) { + FDERROR << "get input tensor dims fail! ret=" << ret << std::endl; + return false; + } + int n_dims = (int)dims.dimCount; + temp_shape.resize(n_dims); + for (int j = 0; j < n_dims; j++) { + temp_shape[j] = (int)dims.dims[j]; + } + + aclDataType dtype = aclmdlGetInputDataType(modelDesc_, i); + FDDataType temp_dtype; + switch (dtype) { + case ACL_BOOL: + temp_dtype = FDDataType::BOOL; + break; + case ACL_UINT8: + temp_dtype = FDDataType::UINT8; + break; + case ACL_INT8: + temp_dtype = FDDataType::INT8; + break; + case ACL_INT16: + temp_dtype = FDDataType::INT16; + break; + case ACL_INT32: + temp_dtype = FDDataType::INT32; + break; + case ACL_INT64: + temp_dtype = FDDataType::INT64; + break; + case ACL_FLOAT16: + temp_dtype = FDDataType::FP16; + break; + case ACL_FLOAT: + temp_dtype = FDDataType::FP32; + break; + case ACL_DOUBLE: + temp_dtype = FDDataType::FP64; + break; + default: + FDERROR << "unsupported input tensor dtype: " << (int)dtype; + return false; + } + TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; + inputs_desc_[i] = temp_input_info; + } + FDINFO << "create model input success"; + + return true; +} + +bool OmBackend::CreateOutput() { + if (modelDesc_ == nullptr) { + FDERROR << "no model description, create ouput failed"; + return false; + } + + output_ = aclmdlCreateDataset(); + if (output_ == nullptr) { + FDERROR << "can't create dataset, create output failed"; + return false; + } + + size_t outputSize = aclmdlGetNumOutputs(modelDesc_); + outputs_desc_.resize(outputSize); + outputBuffer.resize(outputSize, nullptr); + for (size_t i = 0; i < outputSize; ++i) { + size_t modelOutputSize = aclmdlGetOutputSizeByIndex(modelDesc_, i); + aclError ret = aclrtMalloc(&outputBuffer[i], modelOutputSize, + ACL_MEM_MALLOC_HUGE_FIRST); + if (ret != ACL_SUCCESS) { + FDERROR << "can't malloc buffer, size is " << modelOutputSize + << ", errorCode is " << static_cast(ret); + return false; + } + + aclDataBuffer *outputData = + aclCreateDataBuffer(outputBuffer[i], modelOutputSize); + if (outputData == nullptr) { + FDERROR << "can't create data buffer, create output failed"; + return false; + } + + ret = aclmdlAddDatasetBuffer(output_, outputData); + if (ret != ACL_SUCCESS) { + FDERROR << "add output dataset buffer failed, errorCode is " + << static_cast(ret); + (void)aclDestroyDataBuffer(outputData); + return false; + } + + const char *name; + name = aclmdlGetOutputNameByIndex(modelDesc_, i); + std::string temp_name = name; + + std::vector temp_shape{}; + aclmdlIODims dims; + ret = aclmdlGetOutputDims(modelDesc_, i, &dims); + if (ret != ACL_SUCCESS) { + FDERROR << "get output tensor dims fail! ret=" << ret << std::endl; + return false; + } + int n_dims = (int)dims.dimCount; + temp_shape.resize(n_dims); + for (int j = 0; j < n_dims; j++) { + temp_shape[j] = (int)dims.dims[j]; + } + + aclDataType dtype = aclmdlGetOutputDataType(modelDesc_, i); + FDDataType temp_dtype; + switch (dtype) { + case ACL_BOOL: + temp_dtype = FDDataType::BOOL; + break; + case ACL_UINT8: + temp_dtype = FDDataType::UINT8; + break; + case ACL_INT8: + temp_dtype = FDDataType::INT8; + break; + case ACL_INT16: + temp_dtype = FDDataType::INT16; + break; + case ACL_INT32: + temp_dtype = FDDataType::INT32; + break; + case ACL_INT64: + temp_dtype = FDDataType::INT64; + break; + case ACL_FLOAT16: + temp_dtype = FDDataType::FP16; + break; + case ACL_FLOAT: + temp_dtype = FDDataType::FP32; + break; + case ACL_DOUBLE: + temp_dtype = FDDataType::FP64; + break; + default: + FDERROR << "unsupported output tensor dtype: " << (int)dtype; + return false; + } + TensorInfo temp_output_info = {temp_name, temp_shape, temp_dtype}; + outputs_desc_[i] = temp_output_info; + } + + FDINFO << "create model output success"; + + return true; +} + +void OmBackend::FreeInputBuffer() { + for (int i = 0; i < (int)inputs_desc_.size(); ++i) { + if (inputBuffer[i] != nullptr) { + (void)aclrtFree(inputBuffer[i]); + inputBuffer[i] = nullptr; + } + } +} + +void OmBackend::FreeOutputBuffer() { + for (int i = 0; i < (int)outputs_desc_.size(); ++i) { + if (outputBuffer[i] != nullptr) { + (void)aclrtFree(outputBuffer[i]); + outputBuffer[i] = nullptr; + } + } +} + +void OmBackend::DestroyInput() { + if (input_ == nullptr) { + return; + } + + for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(input_); ++i) { + aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(input_, i); + (void)aclDestroyDataBuffer(dataBuffer); + } + (void)aclmdlDestroyDataset(input_); + input_ = nullptr; + FDINFO << "destroy model input success"; +} + +void OmBackend::DestroyOutput() { + if (output_ == nullptr) { + return; + } + + for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(output_); ++i) { + aclDataBuffer *dataBuffer = aclmdlGetDatasetBuffer(output_, i); + void *data = aclGetDataBufferAddr(dataBuffer); + (void)aclrtFree(data); + (void)aclDestroyDataBuffer(dataBuffer); + } + + (void)aclmdlDestroyDataset(output_); + output_ = nullptr; + FDINFO << "destroy model output success"; +} + +void OmBackend::DestroyResource() { + aclError ret; + if (stream_ != nullptr) { + ret = aclrtDestroyStream(stream_); + if (ret != ACL_SUCCESS) { + FDERROR << "destroy stream failed, errorCode = " + << static_cast(ret); + } + stream_ = nullptr; + } + + if (context_ != nullptr) { + ret = aclrtDestroyContext(context_); + if (ret != ACL_SUCCESS) { + FDERROR << "destroy context failed, errorCode = " + << static_cast(ret); + } + context_ = nullptr; + } + + ret = aclrtResetDevice(deviceId_); + if (ret != ACL_SUCCESS) { + FDERROR << "reset device " << deviceId_ + << " failed, errorCode = " << static_cast(ret); + } + + ret = aclFinalize(); + if (ret != ACL_SUCCESS) { + FDERROR << "finalize acl failed, errorCode = " << static_cast(ret); + } + FDINFO << "end to destroy acl resource"; +} + +} // namespace ultra_infer diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.h b/libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.h new file mode 100644 index 0000000000..bd75641d43 --- /dev/null +++ b/libs/ultra-infer/ultra_infer/runtime/backends/om/om_backend.h @@ -0,0 +1,80 @@ +// Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include "acl/acl.h" +#include "ultra_infer/core/fd_tensor.h" +#include "ultra_infer/runtime/backends/backend.h" + +namespace ultra_infer { +class OmBackend : public BaseBackend { +public: + OmBackend() = default; + virtual ~OmBackend(); + + // OM Backend implementation. + bool Init(const RuntimeOption &runtime_option) override; + + int NumInputs() const override { + return static_cast(inputs_desc_.size()); + } + int NumOutputs() const override { + return static_cast(outputs_desc_.size()); + } + + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + + bool Infer(std::vector &inputs, std::vector *outputs, + bool copy_to_fd = true) override; + +private: + std::vector inputs_desc_; + std::vector outputs_desc_; + std::vector inputBuffer; + std::vector outputBuffer; + bool loadFlag_ = false; // model load flag + int32_t deviceId_; + uint32_t modelId_; + size_t modelWorkSize_; // model work memory buffer size + size_t modelWeightSize_; // model weight memory buffer size + void *modelWorkPtr_; // model work memory buffer + void *modelWeightPtr_; // model weight memory buffer + aclmdlDesc *modelDesc_; + aclmdlDataset *input_; + aclmdlDataset *output_; + aclrtContext context_; + aclrtStream stream_; + + bool LoadModel(const char *modelPath); + bool Execute(); + bool CreateInput(); + void DestroyInput(); + bool CreateOutput(); + void DestroyOutput(); + void DestroyResource(); + bool CreateModelDesc(); + void FreeInputBuffer(); + void FreeOutputBuffer(); +}; +} // namespace ultra_infer diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc old mode 100644 new mode 100755 index c0eddc00f2..983217fde5 --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc @@ -60,11 +60,6 @@ void AdaptivePool2dKernel::CpuAdaptivePool( } } -OrtStatusPtr AdaptivePool2dKernel::ComputeV2(OrtKernelContext *context) { - Compute(context); - return nullptr; -} - void AdaptivePool2dKernel::Compute(OrtKernelContext *context) { #if ORT_API_VERSION >= 14 Ort::KernelContext ort_context{context}; diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h old mode 100644 new mode 100755 index b4de2bb0fd..f105a2e8b6 --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h @@ -49,8 +49,6 @@ struct AdaptivePool2dKernel { void Compute(OrtKernelContext *context); - OrtStatusPtr ComputeV2(OrtKernelContext *context); - void CpuAdaptivePool(const std::vector &input_size, const std::vector &output_size, const float *input_data, float *output_data); @@ -63,12 +61,6 @@ struct AdaptivePool2dOp return new AdaptivePool2dKernel(api, info, provider_); } - OrtStatusPtr CreateKernelV2(OrtApi api, const OrtKernelInfo *info, - void **op_kernel) const { - *op_kernel = new AdaptivePool2dKernel(api, info, provider_); - return nullptr; - } - const char *GetName() const { return "AdaptivePool2d"; } size_t GetInputTypeCount() const { return 1; } diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc old mode 100644 new mode 100755 index 34165d8b4c..c7c5bf3724 --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc @@ -156,11 +156,6 @@ int MultiClassNmsKernel::NMSForEachSample( return num_det; } -OrtStatusPtr MultiClassNmsKernel::ComputeV2(OrtKernelContext *context) { - Compute(context); - return nullptr; -} - void MultiClassNmsKernel::Compute(OrtKernelContext *context) { #if ORT_API_VERSION >= 14 Ort::KernelContext ort_context{context}; diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h old mode 100644 new mode 100755 index 4c6e95863e..4f71b14fc9 --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h @@ -40,7 +40,6 @@ struct MultiClassNmsKernel { void GetAttribute(const OrtKernelInfo *info); void Compute(OrtKernelContext *context); - OrtStatusPtr ComputeV2(OrtKernelContext *context); void FastNMS(const float *boxes, const float *scores, const int &num_boxes, std::vector *keep_indices); int NMSForEachSample(const float *boxes, const float *scores, int num_boxes, @@ -54,12 +53,6 @@ struct MultiClassNmsOp return new MultiClassNmsKernel(api, info); } - OrtStatusPtr CreateKernelV2(OrtApi api, const OrtKernelInfo *info, - void **op_kernel) const { - *op_kernel = new MultiClassNmsKernel(api, info); - return nullptr; - } - const char *GetName() const { return "MultiClassNMS"; } size_t GetInputTypeCount() const { return 2; } diff --git a/libs/ultra-infer/ultra_infer/runtime/enum_variables.cc b/libs/ultra-infer/ultra_infer/runtime/enum_variables.cc index 01f9c8152b..16c8910698 100755 --- a/libs/ultra-infer/ultra_infer/runtime/enum_variables.cc +++ b/libs/ultra-infer/ultra_infer/runtime/enum_variables.cc @@ -36,6 +36,8 @@ std::ostream &operator<<(std::ostream &out, const Backend &backend) { out << "Backend::HORIZONNPU"; } else if (backend == Backend::TVM) { out << "Backend::TVM"; + } else if (backend == Backend::OMONNPU) { + out << "Backend::OMONNPU"; } else { out << "UNKNOWN-Backend"; } @@ -92,6 +94,8 @@ std::ostream &operator<<(std::ostream &out, const ModelFormat &format) { out << "ModelFormat::HORIZON"; } else if (format == ModelFormat::TVMFormat) { out << "ModelFormat::TVMFormat"; + } else if (format == ModelFormat::OM) { + out << "ModelFormat::OM"; } else { out << "UNKNOWN-ModelFormat"; } @@ -129,6 +133,9 @@ std::vector GetAvailableBackends() { #endif #ifdef ENABLE_TVM_BACKEND backends.push_back(Backend::TVM); +#endif +#ifdef ENABLE_OM_BACKEND + backends.push_back(Backend::OMONNPU); #endif return backends; } diff --git a/libs/ultra-infer/ultra_infer/runtime/enum_variables.h b/libs/ultra-infer/ultra_infer/runtime/enum_variables.h index d0eefd8c9f..304e2cbbc4 100755 --- a/libs/ultra-infer/ultra_infer/runtime/enum_variables.h +++ b/libs/ultra-infer/ultra_infer/runtime/enum_variables.h @@ -39,6 +39,7 @@ enum Backend { SOPHGOTPU, ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only HORIZONNPU, ///< HORIZONNPU, support Horizon format model, Horizon NPU TVM, ///< TVMBackend, support TVM format model, CPU / Nvidia GPU + OMONNPU, ///< OMONNPU, support OM format model, OM NPU }; /** @@ -74,6 +75,7 @@ enum ModelFormat { SOPHGO, ///< Model with SOPHGO format HORIZON, ///< Model with HORIZON format TVMFormat, ///< Model with TVM format + OM, ///< Model with OM format }; /// Describle all the supported backends for specified model format @@ -87,7 +89,8 @@ static std::map> {ModelFormat::HORIZON, {Backend::HORIZONNPU}}, {ModelFormat::TORCHSCRIPT, {Backend::POROS}}, {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}, - {ModelFormat::TVMFormat, {Backend::TVM}}}; + {ModelFormat::TVMFormat, {Backend::TVM}}, + {ModelFormat::OM, {Backend::OMONNPU}}}; /// Describle all the supported backends for specified device static std::map> s_default_backends_by_device = { @@ -104,7 +107,8 @@ static std::map> s_default_backends_by_device = { {Device::KUNLUNXIN, {Backend::LITE, Backend::PDINFER}}, {Device::ASCEND, {Backend::LITE}}, {Device::SOPHGOTPUD, {Backend::SOPHGOTPU}}, - {Device::DIRECTML, {Backend::ORT}}}; + {Device::DIRECTML, {Backend::ORT}}, + {Device::ASCEND, {Backend::OMONNPU}}}; inline bool Supported(ModelFormat format, Backend backend) { auto iter = s_default_backends_by_format.find(format); diff --git a/libs/ultra-infer/ultra_infer/runtime/option_pybind.cc b/libs/ultra-infer/ultra_infer/runtime/option_pybind.cc index aaf098d477..73a9b77b79 100755 --- a/libs/ultra-infer/ultra_infer/runtime/option_pybind.cc +++ b/libs/ultra-infer/ultra_infer/runtime/option_pybind.cc @@ -65,6 +65,7 @@ void BindOption(pybind11::module &m) { .def("use_trt_backend", &RuntimeOption::UseTrtBackend) .def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend) .def("use_lite_backend", &RuntimeOption::UseLiteBackend) + .def("use_om_backend", &RuntimeOption::UseOMBackend) .def("enable_pinned_memory", &RuntimeOption::EnablePinnedMemory) .def("disable_pinned_memory", &RuntimeOption::DisablePinnedMemory) .def("use_ipu", &RuntimeOption::UseIpu) diff --git a/libs/ultra-infer/ultra_infer/runtime/runtime.cc b/libs/ultra-infer/ultra_infer/runtime/runtime.cc index a0c1ace8ec..38954e94fc 100755 --- a/libs/ultra-infer/ultra_infer/runtime/runtime.cc +++ b/libs/ultra-infer/ultra_infer/runtime/runtime.cc @@ -63,6 +63,10 @@ #include "ultra_infer/runtime/backends/tvm/tvm_backend.h" #endif +#ifdef ENABLE_OM_BACKEND +#include "ultra_infer/runtime/backends/om/om_backend.h" +#endif + namespace ultra_infer { bool AutoSelectBackend(RuntimeOption &option) { @@ -147,6 +151,8 @@ bool Runtime::Init(const RuntimeOption &_option) { CreateHorizonBackend(); } else if (option.backend == Backend::TVM) { CreateTVMBackend(); + } else if (option.backend == Backend::OMONNPU) { + CreateOMBackend(); } else { std::string msg = Str(GetAvailableBackends()); FDERROR << "The compiled UltraInfer only supports " << msg << ", " @@ -397,6 +403,18 @@ void Runtime::CreatePorosBackend() { << "." << std::endl; } +void Runtime::CreateOMBackend() { +#ifdef ENABLE_OM_BACKEND + backend_ = utils::make_unique(); + FDASSERT(backend_->Init(option), "Failed to initialize om backend."); +#else + FDASSERT(false, "OMBackend is not available, please compiled with ", + " ENABLE_OM_BACKEND=ON."); +#endif + FDINFO << "Runtime initialized with Backend::OMONNPU in " << option.device + << "." << std::endl; +} + // only for poros backend bool Runtime::Compile(std::vector> &prewarm_tensors) { #ifdef ENABLE_POROS_BACKEND diff --git a/libs/ultra-infer/ultra_infer/runtime/runtime.h b/libs/ultra-infer/ultra_infer/runtime/runtime.h index c465f39988..16094d60f3 100755 --- a/libs/ultra-infer/ultra_infer/runtime/runtime.h +++ b/libs/ultra-infer/ultra_infer/runtime/runtime.h @@ -119,6 +119,7 @@ struct ULTRAINFER_DECL Runtime { void CreateSophgoNPUBackend(); void CreatePorosBackend(); void CreateTVMBackend(); + void CreateOMBackend(); std::unique_ptr backend_; std::vector input_tensors_; std::vector output_tensors_; diff --git a/libs/ultra-infer/ultra_infer/runtime/runtime_option.cc b/libs/ultra-infer/ultra_infer/runtime/runtime_option.cc index ea1a6f8488..13119d5f34 100755 --- a/libs/ultra-infer/ultra_infer/runtime/runtime_option.cc +++ b/libs/ultra-infer/ultra_infer/runtime/runtime_option.cc @@ -234,6 +234,14 @@ void RuntimeOption::UseHorizonNPUBackend() { #endif } +void RuntimeOption::UseOMBackend() { +#ifdef ENABLE_OM_BACKEND + backend = Backend::OMONNPU; +#else + FDASSERT(false, "The FastDeploy didn't compile with npu om"); +#endif +} + void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) { FDWARNING << "`RuntimeOption::SetPaddleMKLDNN` will be removed in v1.2.0, " "please modify its member variable directly, e.g " diff --git a/libs/ultra-infer/ultra_infer/runtime/runtime_option.h b/libs/ultra-infer/ultra_infer/runtime/runtime_option.h index 1afda680b0..1a90ccb854 100755 --- a/libs/ultra-infer/ultra_infer/runtime/runtime_option.h +++ b/libs/ultra-infer/ultra_infer/runtime/runtime_option.h @@ -277,6 +277,7 @@ struct ULTRAINFER_DECL RuntimeOption { void UseLiteBackend(); void UseHorizonNPUBackend(); void UseTVMBackend(); + void UseOMBackend(); }; } // namespace ultra_infer From f3777810bc1c9ddb6ea6db9c3a4e06ed6c0af192 Mon Sep 17 00:00:00 2001 From: zhangyue66 Date: Mon, 17 Feb 2025 11:31:01 +0800 Subject: [PATCH 3/5] update --- libs/ultra-infer/CMakeLists.txt | 4 ++-- libs/ultra-infer/cmake/onnxruntime.cmake | 4 ++-- .../runtime/backends/ort/ops/adaptive_pool2d.cc | 5 +++++ .../runtime/backends/ort/ops/adaptive_pool2d.h | 8 ++++++++ .../runtime/backends/ort/ops/multiclass_nms.cc | 5 +++++ .../ultra_infer/runtime/backends/ort/ops/multiclass_nms.h | 7 +++++++ 6 files changed, 29 insertions(+), 4 deletions(-) mode change 100755 => 100644 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc mode change 100755 => 100644 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h mode change 100755 => 100644 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc mode change 100755 => 100644 libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h diff --git a/libs/ultra-infer/CMakeLists.txt b/libs/ultra-infer/CMakeLists.txt index 3d7ccfff6f..04cdbf36cc 100755 --- a/libs/ultra-infer/CMakeLists.txt +++ b/libs/ultra-infer/CMakeLists.txt @@ -36,10 +36,10 @@ set(THIRD_PARTY_DIR ${PROJECT_SOURCE_DIR}/third_party) add_subdirectory(${CSRCS_DIR_NAME}/ultra_infer) include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake) -# Set C++11 as standard for the whole project +# Set C++17 as standard for the whole project if(NOT MSVC) if(NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD 17) endif() set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3") if(NEED_ABI0) diff --git a/libs/ultra-infer/cmake/onnxruntime.cmake b/libs/ultra-infer/cmake/onnxruntime.cmake index 8672c8c7ef..da0e740743 100755 --- a/libs/ultra-infer/cmake/onnxruntime.cmake +++ b/libs/ultra-infer/cmake/onnxruntime.cmake @@ -41,8 +41,8 @@ else() endif() set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}") -set(ONNXRUNTIME_VERSION "1.12.0") -set(ONNXRUNTIME_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs/") +set(ONNXRUNTIME_VERSION "1.20.1") +set(ONNXRUNTIME_URL_PREFIX "https://fastdeploy.bj.bcebos.com/third_libs/") if(WIN32) if(WITH_GPU) diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc old mode 100755 new mode 100644 index 983217fde5..c0eddc00f2 --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.cc @@ -60,6 +60,11 @@ void AdaptivePool2dKernel::CpuAdaptivePool( } } +OrtStatusPtr AdaptivePool2dKernel::ComputeV2(OrtKernelContext *context) { + Compute(context); + return nullptr; +} + void AdaptivePool2dKernel::Compute(OrtKernelContext *context) { #if ORT_API_VERSION >= 14 Ort::KernelContext ort_context{context}; diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h old mode 100755 new mode 100644 index f105a2e8b6..b4de2bb0fd --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/adaptive_pool2d.h @@ -49,6 +49,8 @@ struct AdaptivePool2dKernel { void Compute(OrtKernelContext *context); + OrtStatusPtr ComputeV2(OrtKernelContext *context); + void CpuAdaptivePool(const std::vector &input_size, const std::vector &output_size, const float *input_data, float *output_data); @@ -61,6 +63,12 @@ struct AdaptivePool2dOp return new AdaptivePool2dKernel(api, info, provider_); } + OrtStatusPtr CreateKernelV2(OrtApi api, const OrtKernelInfo *info, + void **op_kernel) const { + *op_kernel = new AdaptivePool2dKernel(api, info, provider_); + return nullptr; + } + const char *GetName() const { return "AdaptivePool2d"; } size_t GetInputTypeCount() const { return 1; } diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc old mode 100755 new mode 100644 index c7c5bf3724..34165d8b4c --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.cc @@ -156,6 +156,11 @@ int MultiClassNmsKernel::NMSForEachSample( return num_det; } +OrtStatusPtr MultiClassNmsKernel::ComputeV2(OrtKernelContext *context) { + Compute(context); + return nullptr; +} + void MultiClassNmsKernel::Compute(OrtKernelContext *context) { #if ORT_API_VERSION >= 14 Ort::KernelContext ort_context{context}; diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h old mode 100755 new mode 100644 index 4f71b14fc9..4c6e95863e --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ops/multiclass_nms.h @@ -40,6 +40,7 @@ struct MultiClassNmsKernel { void GetAttribute(const OrtKernelInfo *info); void Compute(OrtKernelContext *context); + OrtStatusPtr ComputeV2(OrtKernelContext *context); void FastNMS(const float *boxes, const float *scores, const int &num_boxes, std::vector *keep_indices); int NMSForEachSample(const float *boxes, const float *scores, int num_boxes, @@ -53,6 +54,12 @@ struct MultiClassNmsOp return new MultiClassNmsKernel(api, info); } + OrtStatusPtr CreateKernelV2(OrtApi api, const OrtKernelInfo *info, + void **op_kernel) const { + *op_kernel = new MultiClassNmsKernel(api, info); + return nullptr; + } + const char *GetName() const { return "MultiClassNMS"; } size_t GetInputTypeCount() const { return 2; } From 1602b31118b869607fc3fcf45c63cd73eaffea72 Mon Sep 17 00:00:00 2001 From: zhangyue66 Date: Fri, 21 Feb 2025 11:35:25 +0800 Subject: [PATCH 4/5] fix --- .../ultra_infer/runtime/backends/ort/ort_backend.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ort_backend.cc b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ort_backend.cc index ff068140af..9cb80865be 100755 --- a/libs/ultra-infer/ultra_infer/runtime/backends/ort/ort_backend.cc +++ b/libs/ultra-infer/ultra_infer/runtime/backends/ort/ort_backend.cc @@ -328,6 +328,9 @@ void OrtBackend::OrtValueToFDTensor(const Ort::Value &value, FDTensor *tensor, } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8) { dtype = FDDataType::INT8; numel *= sizeof(int8_t); + } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL) { + dtype = FDDataType::BOOL; + numel *= sizeof(bool); } else { FDASSERT( false, From 28a94fd28996b323e9044595963690d565a12e0a Mon Sep 17 00:00:00 2001 From: zhangyue66 Date: Fri, 21 Feb 2025 14:05:23 +0800 Subject: [PATCH 5/5] fix --- libs/ultra-infer/ultra_infer/pybind/main.cc.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/ultra-infer/ultra_infer/pybind/main.cc.in b/libs/ultra-infer/ultra_infer/pybind/main.cc.in index dcdaf3c25e..0ada742315 100755 --- a/libs/ultra-infer/ultra_infer/pybind/main.cc.in +++ b/libs/ultra-infer/ultra_infer/pybind/main.cc.in @@ -41,6 +41,8 @@ pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) { dt = pybind11::dtype::of(); } else if (fd_dtype == FDDataType::FP16) { dt = pybind11::dtype::of(); + } else if (fd_dtype == FDDataType::BOOL) { + dt = pybind11::dtype::of(); } else { FDASSERT(false, "The function doesn't support data type of %s.", Str(fd_dtype).c_str());