From 8d344ee1642decced07b8e112e9702c6021f3303 Mon Sep 17 00:00:00 2001 From: Chen Xin Date: Fri, 13 Jan 2023 16:08:29 +0800 Subject: [PATCH 1/3] [Enhancement] Speedup TopDownAffine by CropResizePad (#1486) * "use 'CropResizePad' to speed up topdownaffine" * add missing header (cherry picked from commit c458e2a5244c299f4a76e0d41fa63e8bdde86d20) --- .../apis/c/mmdeploy/pose_detector.cpp | 2 +- csrc/mmdeploy/codebase/mmpose/CMakeLists.txt | 8 +- .../mmpose/keypoints_from_heatmap.cpp | 1 - .../mmpose/keypoints_from_regression.cpp | 1 - .../codebase/mmpose/topdown_affine.cpp | 163 ++++++++---------- csrc/mmdeploy/operation/cpu/CMakeLists.txt | 3 +- .../operation/cpu/crop_resize_pad.cpp | 25 +++ csrc/mmdeploy/operation/cuda/CMakeLists.txt | 3 +- .../operation/cuda/crop_resize_pad.cpp | 88 ++++++++++ csrc/mmdeploy/operation/vision.cpp | 1 + csrc/mmdeploy/operation/vision.h | 9 + csrc/mmdeploy/utils/opencv/opencv_utils.cpp | 12 ++ csrc/mmdeploy/utils/opencv/opencv_utils.h | 10 ++ demo/csrc/c/det_pose.cpp | 3 +- 14 files changed, 229 insertions(+), 100 deletions(-) create mode 100644 csrc/mmdeploy/operation/cpu/crop_resize_pad.cpp create mode 100644 csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp index 5db936bde5..e4fffd4655 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp @@ -117,7 +117,7 @@ int mmdeploy_pose_detector_create_input(const mmdeploy_mat_t* mats, int mat_coun } else { b = {0, 0, img.width(), img.height(), 1.0}; } - input_images.push_back({{"ori_img", img}, {"bbox", std::move(b)}, {"rotation", 0.f}}); + input_images.push_back({{"ori_img", img}, {"bbox", std::move(b)}}); }; for (int i = 0; i < mat_count; ++i) { diff --git a/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt b/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt index 968b6e956c..86b244a662 100644 --- a/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt +++ b/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt @@ -2,13 +2,15 @@ project(mmdeploy_mmpose) +find_package(OpenCV REQUIRED) -file(GLOB_RECURSE SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp") -mmdeploy_add_module(${PROJECT_NAME} "${SRCS}") +aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} MMPOSE_SRCS) + +mmdeploy_add_module(${PROJECT_NAME} ${MMPOSE_SRCS}) target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::transform mmdeploy_operation - mmdeploy_opencv_utils) + ${OpenCV_LIBS}) add_library(mmdeploy::mmpose ALIAS ${PROJECT_NAME}) set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} pose_detector CACHE INTERNAL "") diff --git a/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp b/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp index cf20173e69..a199b752e0 100644 --- a/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp +++ b/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp @@ -12,7 +12,6 @@ #include "mmdeploy/core/value.h" #include "mmdeploy/experimental/module_adapter.h" #include "mmpose.h" -#include "opencv_utils.h" namespace mmdeploy::mmpose { diff --git a/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp b/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp index 6810519575..06f372c788 100644 --- a/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp +++ b/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp @@ -11,7 +11,6 @@ #include "mmdeploy/core/value.h" #include "mmdeploy/experimental/module_adapter.h" #include "mmpose.h" -#include "opencv_utils.h" namespace mmdeploy::mmpose { diff --git a/csrc/mmdeploy/codebase/mmpose/topdown_affine.cpp b/csrc/mmdeploy/codebase/mmpose/topdown_affine.cpp index 75b93fa847..f04ab0f38e 100644 --- a/csrc/mmdeploy/codebase/mmpose/topdown_affine.cpp +++ b/csrc/mmdeploy/codebase/mmpose/topdown_affine.cpp @@ -1,5 +1,6 @@ // Copyright (c) OpenMMLab. All rights reserved. +#include #include #include "mmdeploy/archive/value_archive.h" @@ -10,31 +11,18 @@ #include "mmdeploy/operation/managed.h" #include "mmdeploy/operation/vision.h" #include "mmdeploy/preprocess/transform/transform.h" -#include "opencv2/imgproc.hpp" -#include "opencv_utils.h" using namespace std; namespace mmdeploy::mmpose { -cv::Point2f operator*(const cv::Point2f& a, const cv::Point2f& b) { - cv::Point2f c; - c.x = a.x * b.x; - c.y = a.y * b.y; - return c; -} - class TopDownAffine : public transform::Transform { public: explicit TopDownAffine(const Value& args) noexcept { - use_udp_ = args.value("use_udp", use_udp_); - backend_ = args.contains("backend") && args["backend"].is_string() - ? args["backend"].get() - : backend_; - stream_ = args["context"]["stream"].get(); assert(args.contains("image_size")); from_value(args["image_size"], image_size_); - warp_affine_ = operation::Managed::Create("bilinear"); + crop_resize_pad_ = + ::mmdeploy::operation::Managed<::mmdeploy::operation::CropResizePad>::Create(); } ~TopDownAffine() override = default; @@ -52,6 +40,7 @@ class TopDownAffine : public transform::Transform { // after mmpose v0.26.0 from_value(data["center"], c); from_value(data["scale"], s); + from_value(data["bbox"], bbox); } else { // before mmpose v0.26.0 from_value(data["bbox"], bbox); @@ -59,28 +48,84 @@ class TopDownAffine : public transform::Transform { } // end prepare data - auto r = data["rotation"].get(); - Tensor dst; - if (use_udp_) { - cv::Mat trans = - GetWarpMatrix(r, {c[0] * 2.f, c[1] * 2.f}, {image_size_[0] - 1.f, image_size_[1] - 1.f}, - {s[0] * 200.f, s[1] * 200.f}); - OUTCOME_TRY(warp_affine_.Apply(img, dst, trans.ptr(), image_size_[1], image_size_[0])); - } else { - cv::Mat trans = - GetAffineTransform({c[0], c[1]}, {s[0], s[1]}, r, {image_size_[0], image_size_[1]}); - OUTCOME_TRY(warp_affine_.Apply(img, dst, trans.ptr(), image_size_[1], image_size_[0])); + { + s[0] *= 200; + s[1] *= 200; + const std::array img_roi{0, 0, (int)img.shape(2), (int)img.shape(1)}; + const std::array tmp_roi{0, 0, (int)image_size_[0], (int)image_size_[1]}; + auto roi = round({c[0] - s[0] / 2.f, c[1] - s[1] / 2.f, s[0], s[1]}); + auto src_roi = intersect(roi, img_roi); + // prior scale factor + auto factor = (float)image_size_[0] / s[0]; + // rounded dst roi + auto dst_roi = round({(src_roi[0] - roi[0]) * factor, // + (src_roi[1] - roi[1]) * factor, // + src_roi[2] * factor, // + src_roi[3] * factor}); + dst_roi = intersect(dst_roi, tmp_roi); + // exact scale factors + auto factor_x = (float)dst_roi[2] / src_roi[2]; + auto factor_y = (float)dst_roi[3] / src_roi[3]; + // center of src roi + auto c_src_x = src_roi[0] + (src_roi[2] - 1) / 2.f; + auto c_src_y = src_roi[1] + (src_roi[3] - 1) / 2.f; + // center of dst roi + auto c_dst_x = dst_roi[0] + (dst_roi[2] - 1) / 2.f; + auto c_dst_y = dst_roi[1] + (dst_roi[3] - 1) / 2.f; + // vector from c_dst to (w/2, h/2) + auto v_dst_x = image_size_[0] / 2.f - c_dst_x; + auto v_dst_y = image_size_[1] / 2.f - c_dst_y; + // vector from c_src to corrected center + auto v_src_x = v_dst_x / factor_x; + auto v_src_y = v_dst_y / factor_y; + // corrected center + c[0] = c_src_x + v_src_x; + c[1] = c_src_y + v_src_y; + // corrected scale + s[0] = image_size_[0] / factor_x / 200.f; + s[1] = image_size_[1] / factor_y / 200.f; + + vector crop_rect = {src_roi[1], src_roi[0], src_roi[1] + src_roi[3] - 1, + src_roi[0] + src_roi[2] - 1}; + vector target_size = {dst_roi[2], dst_roi[3]}; + vector pad_rect = {dst_roi[1], dst_roi[0], image_size_[1] - dst_roi[3] - dst_roi[1], + image_size_[0] - dst_roi[2] - dst_roi[0]}; + crop_resize_pad_.Apply(img, crop_rect, target_size, pad_rect, dst); } - data["img_shape"] = {1, image_size_[1], image_size_[0], dst.shape(3)}; data["img"] = std::move(dst); + data["img_shape"] = {1, image_size_[1], image_size_[0], img.shape(3)}; data["center"] = to_value(c); data["scale"] = to_value(s); MMDEPLOY_DEBUG("output: {}", data); return success(); } + static std::array round(const std::array& a) { + return { + static_cast(std::round(a[0])), + static_cast(std::round(a[1])), + static_cast(std::round(a[2])), + static_cast(std::round(a[3])), + }; + } + + // xywh + template + static std::array intersect(std::array a, std::array b) { + auto x1 = std::max(a[0], b[0]); + auto y1 = std::max(a[1], b[1]); + a[2] = std::min(a[0] + a[2], b[0] + b[2]) - x1; + a[3] = std::min(a[1] + a[3], b[1] + b[3]) - y1; + a[0] = x1; + a[1] = y1; + if (a[2] <= 0 || a[3] <= 0) { + a = {}; + } + return a; + } + void Box2cs(vector& box, vector& center, vector& scale) { // bbox_xywh2cs float x = box[0]; @@ -99,71 +144,9 @@ class TopDownAffine : public transform::Transform { scale.push_back(h / 200 * 1.25); } - cv::Mat GetWarpMatrix(float theta, cv::Size2f size_input, cv::Size2f size_dst, - cv::Size2f size_target) { - theta = theta * 3.1415926 / 180; - float scale_x = size_dst.width / size_target.width; - float scale_y = size_dst.height / size_target.height; - cv::Mat matrix = cv::Mat(2, 3, CV_32F); - matrix.at(0, 0) = std::cos(theta) * scale_x; - matrix.at(0, 1) = -std::sin(theta) * scale_x; - matrix.at(0, 2) = - scale_x * (-0.5f * size_input.width * std::cos(theta) + - 0.5f * size_input.height * std::sin(theta) + 0.5f * size_target.width); - matrix.at(1, 0) = std::sin(theta) * scale_y; - matrix.at(1, 1) = std::cos(theta) * scale_y; - matrix.at(1, 2) = - scale_y * (-0.5f * size_input.width * std::sin(theta) - - 0.5f * size_input.height * std::cos(theta) + 0.5f * size_target.height); - return matrix; - } - - cv::Mat GetAffineTransform(cv::Point2f center, cv::Point2f scale, float rot, cv::Size output_size, - cv::Point2f shift = {0.f, 0.f}, bool inv = false) { - cv::Point2f scale_tmp = scale * 200; - float src_w = scale_tmp.x; - int dst_w = output_size.width; - int dst_h = output_size.height; - float rot_rad = 3.1415926 * rot / 180; - cv::Point2f src_dir = rotate_point({0.f, src_w * -0.5f}, rot_rad); - cv::Point2f dst_dir = {0.f, dst_w * -0.5f}; - - cv::Point2f src_points[3]; - src_points[0] = center + scale_tmp * shift; - src_points[1] = center + src_dir + scale_tmp * shift; - src_points[2] = Get3rdPoint(src_points[0], src_points[1]); - - cv::Point2f dst_points[3]; - dst_points[0] = {dst_w * 0.5f, dst_h * 0.5f}; - dst_points[1] = dst_dir + cv::Point2f(dst_w * 0.5f, dst_h * 0.5f); - dst_points[2] = Get3rdPoint(dst_points[0], dst_points[1]); - - cv::Mat trans = inv ? cv::getAffineTransform(dst_points, src_points) - : cv::getAffineTransform(src_points, dst_points); - trans.convertTo(trans, CV_32F); - return trans; - } - - cv::Point2f rotate_point(cv::Point2f pt, float angle_rad) { - float sn = std::sin(angle_rad); - float cs = std::cos(angle_rad); - float new_x = pt.x * cs - pt.y * sn; - float new_y = pt.x * sn + pt.y * cs; - return {new_x, new_y}; - } - - cv::Point2f Get3rdPoint(cv::Point2f a, cv::Point2f b) { - cv::Point2f direction = a - b; - cv::Point2f third_pt = b + cv::Point2f(-direction.y, direction.x); - return third_pt; - } - protected: - operation::Managed warp_affine_; - bool use_udp_{false}; vector image_size_; - std::string backend_; - Stream stream_; + ::mmdeploy::operation::Managed<::mmdeploy::operation::CropResizePad> crop_resize_pad_; }; MMDEPLOY_REGISTER_TRANSFORM(TopDownAffine); diff --git a/csrc/mmdeploy/operation/cpu/CMakeLists.txt b/csrc/mmdeploy/operation/cpu/CMakeLists.txt index d1310baaef..fa6a1de56c 100644 --- a/csrc/mmdeploy/operation/cpu/CMakeLists.txt +++ b/csrc/mmdeploy/operation/cpu/CMakeLists.txt @@ -10,7 +10,8 @@ set(SRCS resize.cpp normalize.cpp crop.cpp flip.cpp - warp_affine.cpp) + warp_affine.cpp + crop_resize_pad.cpp) mmdeploy_add_module(${PROJECT_NAME} "${SRCS}") diff --git a/csrc/mmdeploy/operation/cpu/crop_resize_pad.cpp b/csrc/mmdeploy/operation/cpu/crop_resize_pad.cpp new file mode 100644 index 0000000000..ee0dea65ab --- /dev/null +++ b/csrc/mmdeploy/operation/cpu/crop_resize_pad.cpp @@ -0,0 +1,25 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "mmdeploy/operation/vision.h" +#include "mmdeploy/utils/opencv/opencv_utils.h" + +namespace mmdeploy::operation::cpu { + +class CropResizePadImpl : public CropResizePad { + public: + CropResizePadImpl() = default; + + Result apply(const Tensor &src, const std::vector &crop_rect, + const std::vector &target_size, const std::vector &pad_rect, + Tensor &dst) override { + auto src_mat = mmdeploy::cpu::Tensor2CVMat(src); + auto dst_mat = mmdeploy::cpu::CropResizePad(src_mat, crop_rect, target_size, pad_rect); + dst = mmdeploy::cpu::CVMat2Tensor(dst_mat); + return success(); + } +}; + +MMDEPLOY_REGISTER_FACTORY_FUNC(CropResizePad, (cpu, 0), + []() { return std::make_unique(); }); + +} // namespace mmdeploy::operation::cpu diff --git a/csrc/mmdeploy/operation/cuda/CMakeLists.txt b/csrc/mmdeploy/operation/cuda/CMakeLists.txt index 5e04f640bd..8322b9d2ad 100644 --- a/csrc/mmdeploy/operation/cuda/CMakeLists.txt +++ b/csrc/mmdeploy/operation/cuda/CMakeLists.txt @@ -18,7 +18,8 @@ set(SRCS resize.cpp crop.cpp crop.cu flip.cpp - warp_affine.cpp) + warp_affine.cpp + crop_resize_pad.cpp) mmdeploy_add_module(${PROJECT_NAME} "${SRCS}") diff --git a/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp b/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp new file mode 100644 index 0000000000..8bf3232d03 --- /dev/null +++ b/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp @@ -0,0 +1,88 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "mmdeploy/operation/vision.h" +#include "ppl/cv/cuda/resize.h" + +namespace mmdeploy::operation::cuda { + +class CropResizePadImpl : public CropResizePad { + public: + CropResizePadImpl() = default; + + Result apply(const Tensor &src, const std::vector &crop_rect, + const std::vector &target_size, const std::vector &pad_rect, + Tensor &dst) override { + auto cuda_stream = GetNative(stream()); + + int width = target_size[0] + pad_rect[1] + pad_rect[3]; + int height = target_size[1] + pad_rect[0] + pad_rect[2]; + + TensorDesc desc{device(), src.data_type(), {1, height, width, src.shape(3)}, src.name()}; + Tensor dst_tensor(desc); + cudaMemsetAsync(dst_tensor.data(), 0, dst_tensor.byte_size(), cuda_stream); + + if (src.data_type() == DataType::kINT8) { + OUTCOME_TRY( + ResizeDispatch(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream)); + } else if (src.data_type() == DataType::kFLOAT) { + OUTCOME_TRY( + ResizeDispatch(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream)); + } else { + MMDEPLOY_ERROR("unsupported data type {}", src.data_type()); + return Status(eNotSupported); + } + + dst = std::move(dst_tensor); + return success(); + } + + private: + template + auto Select(int channels) -> decltype(&ppl::cv::cuda::Resize) { + switch (channels) { + case 1: + return &ppl::cv::cuda::Resize; + case 3: + return &ppl::cv::cuda::Resize; + case 4: + return &ppl::cv::cuda::Resize; + default: + MMDEPLOY_ERROR("unsupported channels {}", channels); + return nullptr; + } + } + + template + Result ResizeDispatch(const Tensor &src, const std::vector &crop_rect, + const std::vector &target_size, const std::vector &pad_rect, + Tensor &dst, cudaStream_t stream) { + int in_height = crop_rect[2] - crop_rect[0] + 1; + int in_width = crop_rect[3] - crop_rect[1] + 1; + int in_width_stride = src.shape(2) * src.shape(3); + int in_offset = crop_rect[0] * in_width_stride + crop_rect[1] * src.shape(3); + int out_h = target_size[1]; + int out_w = target_size[0]; + int out_width_stride = dst.shape(2) * dst.shape(3); + int out_offset = pad_rect[0] * out_width_stride + pad_rect[1] * dst.shape(3); + auto interp = ppl::cv::InterpolationType::INTERPOLATION_LINEAR; + + auto input = src.data(); + auto output = dst.data(); + + ppl::common::RetCode ret = 0; + + if (auto resize = Select(src.shape(3)); resize) { + ret = resize(stream, in_height, in_width, in_width_stride, input + in_offset, out_h, out_w, + out_width_stride, output + out_offset, interp); + } else { + return Status(eNotSupported); + } + + return ret == 0 ? success() : Result(Status(eFail)); + } +}; + +MMDEPLOY_REGISTER_FACTORY_FUNC(CropResizePad, (cuda, 0), + []() { return std::make_unique(); }); + +} // namespace mmdeploy::operation::cuda diff --git a/csrc/mmdeploy/operation/vision.cpp b/csrc/mmdeploy/operation/vision.cpp index 35076e2bdb..18694f06a5 100644 --- a/csrc/mmdeploy/operation/vision.cpp +++ b/csrc/mmdeploy/operation/vision.cpp @@ -13,5 +13,6 @@ MMDEPLOY_DEFINE_REGISTRY(Normalize); MMDEPLOY_DEFINE_REGISTRY(Crop); MMDEPLOY_DEFINE_REGISTRY(Flip); MMDEPLOY_DEFINE_REGISTRY(WarpAffine); +MMDEPLOY_DEFINE_REGISTRY(CropResizePad); } // namespace mmdeploy::operation diff --git a/csrc/mmdeploy/operation/vision.h b/csrc/mmdeploy/operation/vision.h index 9b65dbaaac..10e699fed4 100644 --- a/csrc/mmdeploy/operation/vision.h +++ b/csrc/mmdeploy/operation/vision.h @@ -84,6 +84,15 @@ class WarpAffine : public Operation { }; MMDEPLOY_DECLARE_REGISTRY(WarpAffine, unique_ptr(const string_view& interp)); +class CropResizePad : public Operation { + public: + virtual Result apply(const Tensor& src, const std::vector& crop_rect, + const std::vector& target_size, const std::vector& pad_rect, + Tensor& dst) = 0; +}; + +MMDEPLOY_DECLARE_REGISTRY(CropResizePad, unique_ptr()); + } // namespace mmdeploy::operation #endif // MMDEPLOY_CSRC_MMDEPLOY_PREPROCESS_OPERATION_RESIZE_H_ diff --git a/csrc/mmdeploy/utils/opencv/opencv_utils.cpp b/csrc/mmdeploy/utils/opencv/opencv_utils.cpp index d410d5dcc8..17a87bbf9b 100644 --- a/csrc/mmdeploy/utils/opencv/opencv_utils.cpp +++ b/csrc/mmdeploy/utils/opencv/opencv_utils.cpp @@ -290,6 +290,18 @@ cv::Mat Pad(const cv::Mat& src, int top, int left, int bottom, int right, int bo return dst; } +cv::Mat CropResizePad(const cv::Mat& src, const std::vector& crop_rect, + const std::vector& target_size, const std::vector& pad_rect) { + int width = target_size[0] + pad_rect[1] + pad_rect[3]; + int height = target_size[1] + pad_rect[0] + pad_rect[2]; + cv::Mat dst = cv::Mat::zeros(height, width, src.type()); + cv::Rect roi1 = {crop_rect[1], crop_rect[0], crop_rect[3] - crop_rect[1] + 1, + crop_rect[2] - crop_rect[0] + 1}; + cv::Rect roi2 = {pad_rect[1], pad_rect[0], target_size[0], target_size[1]}; + cv::resize(src(roi1), dst(roi2), {target_size[0], target_size[1]}); + return dst; +} + bool Compare(const cv::Mat& src1, const cv::Mat& src2, float threshold) { cv::Mat _src1, _src2, diff; src1.convertTo(_src1, CV_32FC(src1.channels())); diff --git a/csrc/mmdeploy/utils/opencv/opencv_utils.h b/csrc/mmdeploy/utils/opencv/opencv_utils.h index 0c9646466d..9dd268e651 100644 --- a/csrc/mmdeploy/utils/opencv/opencv_utils.h +++ b/csrc/mmdeploy/utils/opencv/opencv_utils.h @@ -91,6 +91,16 @@ MMDEPLOY_API cv::Mat CvtColor(const cv::Mat& src, PixelFormat src_format, PixelF MMDEPLOY_API cv::Mat Pad(const cv::Mat& src, int top, int left, int bottom, int right, int border_type, float val); +/** + * @param src + * @param crop_rect t, l, b, r + * @param target_size w, h + * @param pad_rect t, l, b, r + */ +MMDEPLOY_API cv::Mat CropResizePad(const cv::Mat& src, const std::vector& crop_rect, + const std::vector& target_size, + const std::vector& pad_rect); + /** * @brief compare two images * diff --git a/demo/csrc/c/det_pose.cpp b/demo/csrc/c/det_pose.cpp index 30f80f1a77..a12a33ef7b 100644 --- a/demo/csrc/c/det_pose.cpp +++ b/demo/csrc/c/det_pose.cpp @@ -65,8 +65,7 @@ class AddBboxField { auto _box = from_value>(dets["bbox"]); rect = cv::Rect(cv::Rect2f(cv::Point2f(_box[0], _box[1]), cv::Point2f(_box[2], _box[3]))); } - return Value{ - {"ori_img", _img}, {"bbox", {rect.x, rect.y, rect.width, rect.height}}, {"rotation", 0.f}}; + return Value{{"ori_img", _img}, {"bbox", {rect.x, rect.y, rect.width, rect.height}}}; } }; From d91ba2f3c8f5ba1cba683c76e9a75836e6492204 Mon Sep 17 00:00:00 2001 From: Li Zhang Date: Fri, 20 Jan 2023 00:04:42 +0800 Subject: [PATCH 2/3] fix 'cvtcolor' error in the preprocessing of single channel images (#1666) (cherry picked from commit 8bb3fcc6d856857dc8341fb4c7de42f6b0c23ab0) --- csrc/mmdeploy/operation/cuda/cvtcolor.cpp | 9 +++++---- csrc/mmdeploy/preprocess/transform/load.cpp | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/csrc/mmdeploy/operation/cuda/cvtcolor.cpp b/csrc/mmdeploy/operation/cuda/cvtcolor.cpp index eaff21b1a5..1e35adc06b 100644 --- a/csrc/mmdeploy/operation/cuda/cvtcolor.cpp +++ b/csrc/mmdeploy/operation/cuda/cvtcolor.cpp @@ -90,10 +90,11 @@ class CvtColorImpl : public CvtColor { auto height = src.height(); auto width = src.width(); - auto channels = src.channel(); - auto stride = width * channels; + auto src_channels = src.channel(); + auto src_stride = width * src_channels; Mat dst_mat(height, width, dst_fmt, src.type(), device()); + auto dst_stride = width * dst_mat.channel(); auto convert = [&](auto type) -> Result { using T = typename decltype(type)::type; @@ -101,8 +102,8 @@ class CvtColorImpl : public CvtColor { if (!converter) { return Status(eNotSupported); } - auto ret = - converter(cuda_stream, height, width, stride, src.data(), stride, dst_mat.data()); + auto ret = converter(cuda_stream, height, width, src_stride, src.data(), dst_stride, + dst_mat.data()); if (ret != ppl::common::RC_SUCCESS) { return Status(eFail); } diff --git a/csrc/mmdeploy/preprocess/transform/load.cpp b/csrc/mmdeploy/preprocess/transform/load.cpp index 57879d5b4b..c386957222 100644 --- a/csrc/mmdeploy/preprocess/transform/load.cpp +++ b/csrc/mmdeploy/preprocess/transform/load.cpp @@ -61,7 +61,7 @@ class PrepareImage : public Transform { if (color_type_ == "color" || color_type_ == "color_ignore_orientation") { OUTCOME_TRY(cvt_color_.Apply(src_mat, dst_mat, PixelFormat::kBGR)); } else { - OUTCOME_TRY(cvt_color_.Apply(dst_mat, dst_mat, PixelFormat::kGRAYSCALE)); + OUTCOME_TRY(cvt_color_.Apply(src_mat, dst_mat, PixelFormat::kGRAYSCALE)); } auto tensor = to_tensor(dst_mat); if (to_float32_) { From 7d9696914996d1543428a16dbb2c11a9842e7540 Mon Sep 17 00:00:00 2001 From: Li Zhang Date: Tue, 31 Jan 2023 11:24:24 +0800 Subject: [PATCH 3/3] [Feature] Pose tracker C/C++/Python API&demos (#1663) * add PoseTracker API * add mahalanobis distance, add det_pose demo * simplify api * simplify api * fix cmake & fix `CropResizePad` * ignore out of frame bboxes * clean-up * fix lint * add c api docs * add c++ api docs/comments * fix gcc7 build * fix gcc7+opencv3 * fix stupid lint * fix ci * add help info & webcam support for C++ pose tracker demo * add webcam support for Python pose tracker demo * fix lint * minor * minor * fix MSVC build * fix python binding * simplify module adapter * fix module adapter * minor fix (cherry picked from commit 3d425bbb9ffd652debb3ebb842c5048ec59051f8) --- csrc/mmdeploy/apis/c/CMakeLists.txt | 4 + csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp | 8 +- csrc/mmdeploy/apis/c/mmdeploy/classifier.h | 6 +- csrc/mmdeploy/apis/c/mmdeploy/common.cpp | 6 +- .../apis/c/mmdeploy/common_internal.h | 8 +- csrc/mmdeploy/apis/c/mmdeploy/detector.cpp | 2 +- csrc/mmdeploy/apis/c/mmdeploy/detector.h | 6 +- csrc/mmdeploy/apis/c/mmdeploy/executor.cpp | 8 +- csrc/mmdeploy/apis/c/mmdeploy/executor.h | 2 +- .../apis/c/mmdeploy/executor_internal.h | 2 +- csrc/mmdeploy/apis/c/mmdeploy/model.cpp | 4 +- csrc/mmdeploy/apis/c/mmdeploy/model.h | 2 +- csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp | 8 +- csrc/mmdeploy/apis/c/mmdeploy/pipeline.h | 4 +- .../apis/c/mmdeploy/pose_detector.cpp | 8 +- csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h | 6 +- .../mmdeploy/apis/c/mmdeploy/pose_tracker.cpp | 225 ++++ csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h | 158 +++ csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp | 10 +- csrc/mmdeploy/apis/c/mmdeploy/restorer.h | 6 +- .../apis/c/mmdeploy/rotated_detector.cpp | 8 +- .../apis/c/mmdeploy/rotated_detector.h | 6 +- csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp | 8 +- csrc/mmdeploy/apis/c/mmdeploy/segmentor.h | 6 +- .../apis/c/mmdeploy/text_detector.cpp | 10 +- csrc/mmdeploy/apis/c/mmdeploy/text_detector.h | 6 +- .../apis/c/mmdeploy/text_recognizer.cpp | 10 +- .../apis/c/mmdeploy/text_recognizer.h | 6 +- .../apis/c/mmdeploy/video_recognizer.cpp | 10 +- .../apis/c/mmdeploy/video_recognizer.h | 6 +- csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp | 38 + .../apis/cxx/mmdeploy/pose_tracker.hpp | 151 +++ csrc/mmdeploy/apis/python/pose_tracker.cpp | 149 +++ csrc/mmdeploy/codebase/mmpose/CMakeLists.txt | 9 +- .../codebase/mmpose/pose_tracker/common.h | 57 + .../codebase/mmpose/pose_tracker/pipeline.cpp | 117 ++ .../mmpose/pose_tracker/pose_tracker.cpp | 399 ++++++ .../mmpose/pose_tracker/pose_tracker.h | 101 ++ .../mmpose/pose_tracker/smoothing_filter.cpp | 49 + .../mmpose/pose_tracker/smoothing_filter.h | 58 + .../codebase/mmpose/pose_tracker/track.cpp | 70 + .../codebase/mmpose/pose_tracker/track.h | 65 + .../mmpose/pose_tracker/tracking_filter.cpp | 199 +++ .../mmpose/pose_tracker/tracking_filter.h | 50 + .../codebase/mmpose/pose_tracker/utils.cpp | 136 ++ .../codebase/mmpose/pose_tracker/utils.h | 96 ++ csrc/mmdeploy/experimental/module_adapter.h | 91 +- csrc/mmdeploy/graph/inference.cpp | 2 +- .../operation/cuda/crop_resize_pad.cpp | 20 +- csrc/mmdeploy/utils/opencv/opencv_utils.cpp | 10 +- demo/csrc/CMakeLists.txt | 4 +- demo/csrc/cpp/det_pose.cpp | 113 ++ demo/csrc/cpp/pose_tracker.cpp | 1144 ++--------------- demo/python/det_pose.py | 85 ++ demo/python/pose_tracker.py | 98 ++ 55 files changed, 2696 insertions(+), 1174 deletions(-) create mode 100644 csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.cpp create mode 100644 csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h create mode 100644 csrc/mmdeploy/apis/cxx/mmdeploy/pose_tracker.hpp create mode 100644 csrc/mmdeploy/apis/python/pose_tracker.cpp create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/common.h create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/pipeline.cpp create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.cpp create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.h create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.cpp create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.h create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/track.cpp create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/track.h create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.cpp create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.h create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.cpp create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.h create mode 100644 demo/csrc/cpp/det_pose.cpp create mode 100644 demo/python/det_pose.py create mode 100644 demo/python/pose_tracker.py diff --git a/csrc/mmdeploy/apis/c/CMakeLists.txt b/csrc/mmdeploy/apis/c/CMakeLists.txt index a9dd03c06a..1d0533aff4 100644 --- a/csrc/mmdeploy/apis/c/CMakeLists.txt +++ b/csrc/mmdeploy/apis/c/CMakeLists.txt @@ -12,6 +12,9 @@ macro(add_object name) target_compile_options(${name} PRIVATE $<$:-fvisibility=hidden>) endif () target_link_libraries(${name} PRIVATE mmdeploy::core) + target_include_directories(${name} PUBLIC + $ + $) set(CAPI_OBJS ${CAPI_OBJS} ${name}) mmdeploy_export(${name}) endmacro() @@ -31,6 +34,7 @@ foreach (TASK ${COMMON_LIST}) mmdeploy_add_library(${TARGET_NAME}) target_link_libraries(${TARGET_NAME} PRIVATE ${OBJECT_NAME}) target_include_directories(${TARGET_NAME} PUBLIC + $ $) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mmdeploy/${TASK}.h DESTINATION include/mmdeploy) diff --git a/csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp b/csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp index 42168ad547..6b177c9b47 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp @@ -1,17 +1,17 @@ // Copyright (c) OpenMMLab. All rights reserved. -#include "classifier.h" +#include "mmdeploy/classifier.h" #include -#include "common_internal.h" -#include "handle.h" #include "mmdeploy/archive/value_archive.h" #include "mmdeploy/codebase/mmcls/mmcls.h" +#include "mmdeploy/common_internal.h" #include "mmdeploy/core/device.h" #include "mmdeploy/core/graph.h" #include "mmdeploy/core/utils/formatter.h" -#include "pipeline.h" +#include "mmdeploy/handle.h" +#include "mmdeploy/pipeline.h" using namespace mmdeploy; using namespace std; diff --git a/csrc/mmdeploy/apis/c/mmdeploy/classifier.h b/csrc/mmdeploy/apis/c/mmdeploy/classifier.h index 0b16a68072..54e9d0215b 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/classifier.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/classifier.h @@ -8,9 +8,9 @@ #ifndef MMDEPLOY_CLASSIFIER_H #define MMDEPLOY_CLASSIFIER_H -#include "common.h" -#include "executor.h" -#include "model.h" +#include "mmdeploy/common.h" +#include "mmdeploy/executor.h" +#include "mmdeploy/model.h" #ifdef __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/common.cpp b/csrc/mmdeploy/apis/c/mmdeploy/common.cpp index 564ba5dd0d..1936be3353 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/common.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/common.cpp @@ -1,9 +1,9 @@ -#include "common.h" +#include "mmdeploy/common.h" -#include "common_internal.h" -#include "executor_internal.h" +#include "mmdeploy/common_internal.h" #include "mmdeploy/core/mat.h" #include "mmdeploy/core/profiler.h" +#include "mmdeploy/executor_internal.h" mmdeploy_value_t mmdeploy_value_copy(mmdeploy_value_t value) { if (!value) { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/common_internal.h b/csrc/mmdeploy/apis/c/mmdeploy/common_internal.h index c7d22268de..a1ddecb54d 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/common_internal.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/common_internal.h @@ -3,12 +3,12 @@ #ifndef MMDEPLOY_CSRC_APIS_C_COMMON_INTERNAL_H_ #define MMDEPLOY_CSRC_APIS_C_COMMON_INTERNAL_H_ -#include "common.h" -#include "handle.h" +#include "mmdeploy/common.h" #include "mmdeploy/core/mat.h" #include "mmdeploy/core/value.h" -#include "model.h" -#include "pipeline.h" +#include "mmdeploy/handle.h" +#include "mmdeploy/model.h" +#include "mmdeploy/pipeline.h" using namespace mmdeploy; diff --git a/csrc/mmdeploy/apis/c/mmdeploy/detector.cpp b/csrc/mmdeploy/apis/c/mmdeploy/detector.cpp index 9086f3c950..7267e2d178 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/detector.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/detector.cpp @@ -1,6 +1,6 @@ // Copyright (c) OpenMMLab. All rights reserved. -#include "detector.h" +#include "mmdeploy/detector.h" #include #include diff --git a/csrc/mmdeploy/apis/c/mmdeploy/detector.h b/csrc/mmdeploy/apis/c/mmdeploy/detector.h index 5c6df5aed9..5c5ba2f356 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/detector.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/detector.h @@ -8,9 +8,9 @@ #ifndef MMDEPLOY_DETECTOR_H #define MMDEPLOY_DETECTOR_H -#include "common.h" -#include "executor.h" -#include "model.h" +#include "mmdeploy/common.h" +#include "mmdeploy/executor.h" +#include "mmdeploy/model.h" #ifdef __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/executor.cpp b/csrc/mmdeploy/apis/c/mmdeploy/executor.cpp index aa1966d30b..2fdfb9091f 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/executor.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/executor.cpp @@ -1,11 +1,11 @@ // Copyright (c) OpenMMLab. All rights reserved. -#include "executor.h" +#include "mmdeploy/executor.h" -#include "common.h" -#include "common_internal.h" -#include "executor_internal.h" +#include "mmdeploy/common.h" +#include "mmdeploy/common_internal.h" #include "mmdeploy/execution/when_all_value.h" +#include "mmdeploy/executor_internal.h" using namespace mmdeploy; diff --git a/csrc/mmdeploy/apis/c/mmdeploy/executor.h b/csrc/mmdeploy/apis/c/mmdeploy/executor.h index ecc4bf22fc..a2c8ffa387 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/executor.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/executor.h @@ -3,7 +3,7 @@ #ifndef MMDEPLOY_CSRC_APIS_C_EXECUTOR_H_ #define MMDEPLOY_CSRC_APIS_C_EXECUTOR_H_ -#include "common.h" +#include "mmdeploy/common.h" #if __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/executor_internal.h b/csrc/mmdeploy/apis/c/mmdeploy/executor_internal.h index c1a5c94526..95f39fe009 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/executor_internal.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/executor_internal.h @@ -3,8 +3,8 @@ #ifndef MMDEPLOY_CSRC_APIS_C_EXECUTOR_INTERNAL_H_ #define MMDEPLOY_CSRC_APIS_C_EXECUTOR_INTERNAL_H_ -#include "executor.h" #include "mmdeploy/execution/schedulers/registry.h" +#include "mmdeploy/executor.h" using namespace mmdeploy; diff --git a/csrc/mmdeploy/apis/c/mmdeploy/model.cpp b/csrc/mmdeploy/apis/c/mmdeploy/model.cpp index ffde1d05f1..6d202bce81 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/model.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/model.cpp @@ -1,11 +1,11 @@ // Copyright (c) OpenMMLab. All rights reserved. // clang-format off -#include "model.h" +#include "mmdeploy/model.h" #include -#include "common_internal.h" +#include "mmdeploy/common_internal.h" #include "mmdeploy/core/logger.h" #include "mmdeploy/core/model.h" // clang-format on diff --git a/csrc/mmdeploy/apis/c/mmdeploy/model.h b/csrc/mmdeploy/apis/c/mmdeploy/model.h index 32d3ef3b9c..394d2902c2 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/model.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/model.h @@ -8,7 +8,7 @@ #ifndef MMDEPLOY_SRC_APIS_C_MODEL_H_ #define MMDEPLOY_SRC_APIS_C_MODEL_H_ -#include "common.h" +#include "mmdeploy/common.h" #ifdef __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp b/csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp index 287896439e..5f7fb06e50 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp @@ -1,10 +1,10 @@ // Copyright (c) OpenMMLab. All rights reserved. -#include "pipeline.h" +#include "mmdeploy/pipeline.h" -#include "common_internal.h" -#include "executor_internal.h" -#include "handle.h" +#include "mmdeploy/common_internal.h" +#include "mmdeploy/executor_internal.h" +#include "mmdeploy/handle.h" int mmdeploy_pipeline_create_v3(mmdeploy_value_t config, mmdeploy_context_t context, mmdeploy_pipeline_t* pipeline) { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pipeline.h b/csrc/mmdeploy/apis/c/mmdeploy/pipeline.h index d517b68077..b159aad86e 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/pipeline.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/pipeline.h @@ -3,8 +3,8 @@ #ifndef MMDEPLOY_CSRC_APIS_C_PIPELINE_H_ #define MMDEPLOY_CSRC_APIS_C_PIPELINE_H_ -#include "common.h" -#include "executor.h" +#include "mmdeploy/common.h" +#include "mmdeploy/executor.h" #ifdef __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp index e4fffd4655..6a7a95d794 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp @@ -1,17 +1,17 @@ // Copyright (c) OpenMMLab. All rights reserved. -#include "pose_detector.h" +#include "mmdeploy/pose_detector.h" #include -#include "common_internal.h" -#include "handle.h" #include "mmdeploy/codebase/mmpose/mmpose.h" +#include "mmdeploy/common_internal.h" #include "mmdeploy/core/device.h" #include "mmdeploy/core/graph.h" #include "mmdeploy/core/mat.h" #include "mmdeploy/core/utils/formatter.h" -#include "pipeline.h" +#include "mmdeploy/handle.h" +#include "mmdeploy/pipeline.h" using namespace std; using namespace mmdeploy; diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h index 9d467aa907..ff0987cee4 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h @@ -8,9 +8,9 @@ #ifndef MMDEPLOY_SRC_APIS_C_POSE_DETECTOR_H_ #define MMDEPLOY_SRC_APIS_C_POSE_DETECTOR_H_ -#include "common.h" -#include "executor.h" -#include "model.h" +#include "mmdeploy/common.h" +#include "mmdeploy/executor.h" +#include "mmdeploy/model.h" #ifdef __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.cpp b/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.cpp new file mode 100644 index 0000000000..113b520c39 --- /dev/null +++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.cpp @@ -0,0 +1,225 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "mmdeploy/pose_tracker.h" + +#include "mmdeploy/archive/json_archive.h" +#include "mmdeploy/archive/value_archive.h" +#include "mmdeploy/codebase/mmpose/pose_tracker/common.h" +#include "mmdeploy/common_internal.h" +#include "mmdeploy/core/mpl/structure.h" +#include "mmdeploy/pipeline.h" + +namespace mmdeploy { + +using namespace framework; + +} // namespace mmdeploy + +using namespace mmdeploy; + +namespace { + +Value config_template() { + static const auto json = R"( +{ + "type": "Pipeline", + "input": ["img", "force_det", "state"], + "output": "targets", + "tasks": [ + { + "type": "Task", + "name": "prepare", + "module": "pose_tracker::Prepare", + "input": ["img", "force_det", "state"], + "output": "use_det" + }, + { + "type": "Task", + "module": "Transform", + "name": "preload", + "input": "img", + "output": "data", + "transforms": [ { "type": "LoadImageFromFile" } ] + }, + { + "type": "Cond", + "name": "cond", + "input": ["use_det", "data"], + "output": "dets", + "body": { + "name": "detection", + "type": "Inference", + "params": { "model": "detection" } + } + }, + { + "type": "Task", + "name": "process_bboxes", + "module": "pose_tracker::ProcessBboxes", + "input": ["dets", "data", "state"], + "output": ["rois", "track_ids"] + }, + { + "input": "*rois", + "output": "*keypoints", + "name": "pose", + "type": "Inference", + "params": { "model": "pose" } + }, + { + "type": "Task", + "name": "track_step", + "module": "pose_tracker::TrackStep", + "scheduler": "pool", + "input": ["keypoints", "track_ids", "state"], + "output": "targets" + } + ] +} +)"_json; + static const auto config = from_json(json); + return config; +} + +} // namespace + +int mmdeploy_pose_tracker_default_params(mmdeploy_pose_tracker_param_t* params) { + mmpose::_pose_tracker::SetDefaultParams(*params); + return 0; +} + +int mmdeploy_pose_tracker_create(mmdeploy_model_t det_model, mmdeploy_model_t pose_model, + mmdeploy_context_t context, mmdeploy_pose_tracker_t* pipeline) { + mmdeploy_context_add(context, MMDEPLOY_TYPE_MODEL, "detection", det_model); + mmdeploy_context_add(context, MMDEPLOY_TYPE_MODEL, "pose", pose_model); + auto config = config_template(); + return mmdeploy_pipeline_create_v3(Cast(&config), context, (mmdeploy_pipeline_t*)pipeline); +} + +void mmdeploy_pose_tracker_destroy(mmdeploy_pose_tracker_t pipeline) { + mmdeploy_pipeline_destroy((mmdeploy_pipeline_t)pipeline); +} + +int mmdeploy_pose_tracker_create_state(mmdeploy_pose_tracker_t pipeline, + const mmdeploy_pose_tracker_param_t* params, + mmdeploy_pose_tracker_state_t* state) { + try { + auto create_fn = gRegistry().Create("pose_tracker::Create", Value()).value(); + *state = reinterpret_cast(new Value( + create_fn->Process({const_cast(params)}).value()[0])); + return MMDEPLOY_SUCCESS; + } catch (const std::exception& e) { + MMDEPLOY_ERROR("unhandled exception: {}", e.what()); + } catch (...) { + MMDEPLOY_ERROR("unknown exception caught"); + } + return MMDEPLOY_E_FAIL; +} + +void mmdeploy_pose_tracker_destroy_state(mmdeploy_pose_tracker_state_t state) { + delete reinterpret_cast(state); +} + +int mmdeploy_pose_tracker_create_input(mmdeploy_pose_tracker_state_t* states, + const mmdeploy_mat_t* frames, const int32_t* use_detect, + int batch_size, mmdeploy_value_t* value) { + try { + Value::Array images; + Value::Array use_dets; + Value::Array trackers; + for (int i = 0; i < batch_size; ++i) { + images.push_back({{"ori_img", Cast(frames[i])}}); + use_dets.emplace_back(use_detect ? use_detect[i] : -1); + trackers.push_back(*reinterpret_cast(states[i])); + } + *value = Take(Value{std::move(images), std::move(use_dets), std::move(trackers)}); + return MMDEPLOY_SUCCESS; + } catch (const std::exception& e) { + MMDEPLOY_ERROR("unhandled exception: {}", e.what()); + } catch (...) { + MMDEPLOY_ERROR("unknown exception caught"); + } + return MMDEPLOY_E_FAIL; +} + +using ResultType = mmdeploy::Structure, + std::vector>; + +int mmdeploy_pose_tracker_get_result(mmdeploy_value_t output, + mmdeploy_pose_tracker_target_t** results, + int32_t** result_count) { + if (!output || !results) { + return MMDEPLOY_E_INVALID_ARG; + } + try { + // convert result from Values + std::vector res; + from_value(Cast(output)->front(), res); + + size_t total = 0; + for (const auto& r : res) { + total += r.bboxes.size(); + } + + // preserve space for the output structure + ResultType result_type({total, 1, 1}); + auto [result_data, result_cnt, result_holder] = result_type.pointers(); + + auto result_ptr = result_data; + + result_holder->swap(res); + + // build output structure + for (auto& r : *result_holder) { + for (int j = 0; j < r.bboxes.size(); ++j) { + auto& p = *result_ptr++; + p.keypoint_count = static_cast(r.keypoints[j].size()); + p.keypoints = r.keypoints[j].data(); + p.scores = r.scores[j].data(); + p.bbox = r.bboxes[j]; + p.target_id = r.track_ids[j]; + } + result_cnt->push_back(r.bboxes.size()); + // debug info + // p.reserved0 = new std::vector(r.pose_input_bboxes); + // p.reserved1 = new std::vector(r.pose_output_bboxes); + } + + *results = result_data; + *result_count = result_cnt->data(); + result_type.release(); + + return MMDEPLOY_SUCCESS; + + } catch (const std::exception& e) { + MMDEPLOY_ERROR("unhandled exception: {}", e.what()); + } catch (...) { + MMDEPLOY_ERROR("unknown exception caught"); + } + return MMDEPLOY_E_FAIL; +} + +int mmdeploy_pose_tracker_apply(mmdeploy_pose_tracker_t pipeline, + mmdeploy_pose_tracker_state_t* states, const mmdeploy_mat_t* frames, + const int32_t* use_detect, int32_t count, + mmdeploy_pose_tracker_target_t** results, int32_t** result_count) { + wrapped input; + if (auto ec = + mmdeploy_pose_tracker_create_input(states, frames, use_detect, count, input.ptr())) { + return ec; + } + wrapped output; + if (auto ec = mmdeploy_pipeline_apply((mmdeploy_pipeline_t)pipeline, input, output.ptr())) { + return ec; + } + if (auto ec = mmdeploy_pose_tracker_get_result(output, results, result_count)) { + return ec; + } + return MMDEPLOY_SUCCESS; +} + +void mmdeploy_pose_tracker_release_result(mmdeploy_pose_tracker_target_t* results, + const int32_t* result_count, int count) { + auto total = std::accumulate(result_count, result_count + count, 0); + ResultType deleter({static_cast(total), 1, 1}, results); +} diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h b/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h new file mode 100644 index 0000000000..f895af4150 --- /dev/null +++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h @@ -0,0 +1,158 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +/** + * @file pose_tracker.h + * @brief Pose tracker C API + */ + +#ifndef MMDEPLOY_POSE_TRACKER_H +#define MMDEPLOY_POSE_TRACKER_H + +#include "mmdeploy/common.h" +#include "mmdeploy/detector.h" +#include "mmdeploy/model.h" +#include "mmdeploy/pose_detector.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct mmdeploy_pose_tracker* mmdeploy_pose_tracker_t; +typedef struct mmdeploy_pose_tracker_state* mmdeploy_pose_tracker_state_t; + +typedef struct mmdeploy_pose_tracker_param_t { + // detection interval, default = 1 + int32_t det_interval; + // detection label use for pose estimation, default = 0 + int32_t det_label; + // detection score threshold, default = 0.5 + float det_thr; + // detection minimum bbox size (compute as sqrt(area)), default = -1 + float det_min_bbox_size; + // nms iou threshold for merging detected bboxes and bboxes from tracked targets, default = 0.7 + float det_nms_thr; + + // max number of bboxes used for pose estimation per frame, default = -1 + int32_t pose_max_num_bboxes; + // threshold for visible key-points, default = 0.5 + float pose_kpt_thr; + // min number of key-points for valid poses, default = -1 + int32_t pose_min_keypoints; + // scale for expanding key-points to bbox, default = 1.25 + float pose_bbox_scale; + // min pose bbox size, tracks with bbox size smaller than the threshold will be dropped, + // default = -1 + float pose_min_bbox_size; + // nms oks/iou threshold for suppressing overlapped poses, useful when multiple pose estimations + // collapse to the same target, default = 0.5 + float pose_nms_thr; + // keypoint sigmas for computing OKS, will use IOU if not set, default = nullptr + float* keypoint_sigmas; + // size of keypoint sigma array, must be consistent with the number of key-points, default = 0 + int32_t keypoint_sigmas_size; + + // iou threshold for associating missing tracks, default = 0.4 + float track_iou_thr; + // max number of missing frames before a missing tracks is removed, default = 10 + int32_t track_max_missing; + // track history size, default = 1 + int32_t track_history_size; + + // weight of position for setting covariance matrices of kalman filters, default = 0.05 + float std_weight_position; + // weight of velocity for setting covariance matrices of kalman filters, default = 0.00625 + float std_weight_velocity; + + // params for the one-euro filter for smoothing the outputs - (beta, fc_min, fc_derivative) + // default = (0.007, 1, 1) + float smooth_params[3]; +} mmdeploy_pose_tracker_param_t; + +typedef struct mmdeploy_pose_tracker_target_t { + mmdeploy_point_t* keypoints; // key-points of the target + int32_t keypoint_count; // size of `keypoints` array + float* scores; // scores of each key-point + mmdeploy_rect_t bbox; // estimated bbox from key-points + uint32_t target_id; // target id from internal tracker +} mmdeploy_pose_tracker_target_t; + +/** + * @brief Fill params with default parameters + * @param[in,out] params + * @return status of the operation + */ +MMDEPLOY_API int mmdeploy_pose_tracker_default_params(mmdeploy_pose_tracker_param_t* params); + +/** + * @brief Create pose tracker pipeline + * @param[in] det_model detection model object, created by \ref mmdeploy_model_create + * @param[in] pose_model pose model object + * @param[in] context context object describing execution environment (device, profiler, etc...), + * created by \ref mmdeploy_context_create + * @param[out] pipeline handle of the created pipeline + * @return status of the operation + */ +MMDEPLOY_API int mmdeploy_pose_tracker_create(mmdeploy_model_t det_model, + mmdeploy_model_t pose_model, + mmdeploy_context_t context, + mmdeploy_pose_tracker_t* pipeline); + +/** + * @brief Destroy pose tracker pipeline + * @param[in] pipeline + */ +MMDEPLOY_API void mmdeploy_pose_tracker_destroy(mmdeploy_pose_tracker_t pipeline); + +/** + * @brief Create a tracker state handle corresponds to a video stream + * @param[in] pipeline handle of a pose tracker pipeline + * @param[in] params params for creating the tracker state + * @param[out] state handle of the created tracker state + * @return status of the operation + */ +MMDEPLOY_API int mmdeploy_pose_tracker_create_state(mmdeploy_pose_tracker_t pipeline, + const mmdeploy_pose_tracker_param_t* params, + mmdeploy_pose_tracker_state_t* state); + +/** + * @brief Destroy tracker state + * @param[in] state handle of the tracker state + */ +MMDEPLOY_API void mmdeploy_pose_tracker_destroy_state(mmdeploy_pose_tracker_state_t state); + +/** + * @brief Apply pose tracker pipeline, notice that this function supports batch operation by feeding + * arrays of size \p count to \p states, \p frames and \p use_detect + * @param[in] pipeline handle of a pose tracker pipeline + * @param[in] states tracker states handles, array of size \p count + * @param[in] frames input frames of size \p count + * @param[in] use_detect control the use of detector, array of size \p count + * -1: use params.det_interval, 0: don't use detector, 1: force use detector + * @param[in] count batch size + * @param[out] results a linear buffer contains the tracked targets of input frames. Should be + * released by \ref mmdeploy_pose_tracker_release_result + * @param[out] result_count a linear buffer of size \p count contains the number of tracked + * targets of the frames. Should be released by \ref mmdeploy_pose_tracker_release_result + * @return status of the operation + */ +MMDEPLOY_API int mmdeploy_pose_tracker_apply(mmdeploy_pose_tracker_t pipeline, + mmdeploy_pose_tracker_state_t* states, + const mmdeploy_mat_t* frames, + const int32_t* use_detect, int32_t count, + mmdeploy_pose_tracker_target_t** results, + int32_t** result_count); + +/** + * @brief Release result objects + * @param[in] results + * @param[in] result_count + * @param[in] count + */ +MMDEPLOY_API void mmdeploy_pose_tracker_release_result(mmdeploy_pose_tracker_target_t* results, + const int32_t* result_count, int count); + +#ifdef __cplusplus +} +#endif + +#endif // MMDEPLOY_POSE_TRACKER_H diff --git a/csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp b/csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp index 14b5075006..e5b8391d6c 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp @@ -1,16 +1,16 @@ // Copyright (c) OpenMMLab. All rights reserved. -#include "restorer.h" +#include "mmdeploy/restorer.h" -#include "common_internal.h" -#include "executor_internal.h" -#include "handle.h" #include "mmdeploy/codebase/mmedit/mmedit.h" +#include "mmdeploy/common_internal.h" #include "mmdeploy/core/device.h" #include "mmdeploy/core/graph.h" #include "mmdeploy/core/mpl/structure.h" #include "mmdeploy/core/utils/formatter.h" -#include "pipeline.h" +#include "mmdeploy/executor_internal.h" +#include "mmdeploy/handle.h" +#include "mmdeploy/pipeline.h" using namespace mmdeploy; diff --git a/csrc/mmdeploy/apis/c/mmdeploy/restorer.h b/csrc/mmdeploy/apis/c/mmdeploy/restorer.h index b67c7b3d77..9ab529850f 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/restorer.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/restorer.h @@ -8,9 +8,9 @@ #ifndef MMDEPLOY_SRC_APIS_C_RESTORER_H_ #define MMDEPLOY_SRC_APIS_C_RESTORER_H_ -#include "common.h" -#include "executor.h" -#include "model.h" +#include "mmdeploy/common.h" +#include "mmdeploy/executor.h" +#include "mmdeploy/model.h" #ifdef __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.cpp b/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.cpp index 278dcfa0fc..317b31c731 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.cpp @@ -1,16 +1,16 @@ // Copyright (c) OpenMMLab. All rights reserved. -#include "rotated_detector.h" +#include "mmdeploy/rotated_detector.h" #include -#include "common_internal.h" -#include "handle.h" #include "mmdeploy/codebase/mmrotate/mmrotate.h" +#include "mmdeploy/common_internal.h" #include "mmdeploy/core/graph.h" #include "mmdeploy/core/mat.h" #include "mmdeploy/core/utils/formatter.h" -#include "pipeline.h" +#include "mmdeploy/handle.h" +#include "mmdeploy/pipeline.h" using namespace std; using namespace mmdeploy; diff --git a/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.h b/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.h index 52e2674b34..35125a74ff 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.h @@ -8,9 +8,9 @@ #ifndef MMDEPLOY_SRC_APIS_C_ROTATED_DETECTOR_H_ #define MMDEPLOY_SRC_APIS_C_ROTATED_DETECTOR_H_ -#include "common.h" -#include "executor.h" -#include "model.h" +#include "mmdeploy/common.h" +#include "mmdeploy/executor.h" +#include "mmdeploy/model.h" #ifdef __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp b/csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp index c93445b0a0..313a37dc04 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp @@ -1,17 +1,17 @@ // Copyright (c) OpenMMLab. All rights reserved. -#include "segmentor.h" +#include "mmdeploy/segmentor.h" -#include "common_internal.h" -#include "handle.h" #include "mmdeploy/codebase/mmseg/mmseg.h" +#include "mmdeploy/common_internal.h" #include "mmdeploy/core/device.h" #include "mmdeploy/core/graph.h" #include "mmdeploy/core/mat.h" #include "mmdeploy/core/mpl/structure.h" #include "mmdeploy/core/tensor.h" #include "mmdeploy/core/utils/formatter.h" -#include "pipeline.h" +#include "mmdeploy/handle.h" +#include "mmdeploy/pipeline.h" using namespace std; using namespace mmdeploy; diff --git a/csrc/mmdeploy/apis/c/mmdeploy/segmentor.h b/csrc/mmdeploy/apis/c/mmdeploy/segmentor.h index 70a0f1488c..7ae77a03f1 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/segmentor.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/segmentor.h @@ -8,9 +8,9 @@ #ifndef MMDEPLOY_SEGMENTOR_H #define MMDEPLOY_SEGMENTOR_H -#include "common.h" -#include "executor.h" -#include "model.h" +#include "mmdeploy/common.h" +#include "mmdeploy/executor.h" +#include "mmdeploy/model.h" #ifdef __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/text_detector.cpp b/csrc/mmdeploy/apis/c/mmdeploy/text_detector.cpp index ae965860a5..2b68d98636 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/text_detector.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/text_detector.cpp @@ -1,17 +1,17 @@ // Copyright (c) OpenMMLab. All rights reserved. -#include "text_detector.h" +#include "mmdeploy/text_detector.h" #include -#include "common_internal.h" -#include "executor_internal.h" #include "mmdeploy/codebase/mmocr/mmocr.h" +#include "mmdeploy/common_internal.h" #include "mmdeploy/core/model.h" #include "mmdeploy/core/status_code.h" #include "mmdeploy/core/utils/formatter.h" -#include "model.h" -#include "pipeline.h" +#include "mmdeploy/executor_internal.h" +#include "mmdeploy/model.h" +#include "mmdeploy/pipeline.h" using namespace std; using namespace mmdeploy; diff --git a/csrc/mmdeploy/apis/c/mmdeploy/text_detector.h b/csrc/mmdeploy/apis/c/mmdeploy/text_detector.h index ef3119bb20..a3c38dc6f6 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/text_detector.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/text_detector.h @@ -8,9 +8,9 @@ #ifndef MMDEPLOY_TEXT_DETECTOR_H #define MMDEPLOY_TEXT_DETECTOR_H -#include "common.h" -#include "executor.h" -#include "model.h" +#include "mmdeploy/common.h" +#include "mmdeploy/executor.h" +#include "mmdeploy/model.h" #ifdef __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.cpp b/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.cpp index 4a29ddaa80..7644da9811 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.cpp @@ -1,21 +1,21 @@ // Copyright (c) OpenMMLab. All rights reserved. -#include "text_recognizer.h" +#include "mmdeploy/text_recognizer.h" #include -#include "common_internal.h" -#include "executor_internal.h" #include "mmdeploy/archive/value_archive.h" #include "mmdeploy/codebase/mmocr/mmocr.h" +#include "mmdeploy/common_internal.h" #include "mmdeploy/core/device.h" #include "mmdeploy/core/mat.h" #include "mmdeploy/core/model.h" #include "mmdeploy/core/status_code.h" #include "mmdeploy/core/utils/formatter.h" #include "mmdeploy/core/value.h" -#include "model.h" -#include "pipeline.h" +#include "mmdeploy/executor_internal.h" +#include "mmdeploy/model.h" +#include "mmdeploy/pipeline.h" using namespace mmdeploy; diff --git a/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.h b/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.h index 638dda00ea..6c18928242 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.h @@ -8,9 +8,9 @@ #ifndef MMDEPLOY_SRC_APIS_C_TEXT_RECOGNIZER_H_ #define MMDEPLOY_SRC_APIS_C_TEXT_RECOGNIZER_H_ -#include "common.h" -#include "executor.h" -#include "text_detector.h" +#include "mmdeploy/common.h" +#include "mmdeploy/executor.h" +#include "mmdeploy/text_detector.h" #ifdef __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.cpp b/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.cpp index 60d8b5c832..3e16480d91 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.cpp +++ b/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.cpp @@ -1,22 +1,22 @@ // Copyright (c) OpenMMLab. All rights reserved. -#include "video_recognizer.h" +#include "mmdeploy/video_recognizer.h" #include #include -#include "common_internal.h" -#include "executor_internal.h" #include "mmdeploy/archive/value_archive.h" #include "mmdeploy/codebase/mmaction/mmaction.h" +#include "mmdeploy/common_internal.h" #include "mmdeploy/core/device.h" #include "mmdeploy/core/mat.h" #include "mmdeploy/core/model.h" #include "mmdeploy/core/status_code.h" #include "mmdeploy/core/utils/formatter.h" #include "mmdeploy/core/value.h" -#include "model.h" -#include "pipeline.h" +#include "mmdeploy/executor_internal.h" +#include "mmdeploy/model.h" +#include "mmdeploy/pipeline.h" using namespace mmdeploy; diff --git a/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.h b/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.h index 380da5d361..e98b2bd07e 100644 --- a/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.h +++ b/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.h @@ -8,9 +8,9 @@ #ifndef MMDEPLOY_VIDEO_RECOGNIZER_H #define MMDEPLOY_VIDEO_RECOGNIZER_H -#include "common.h" -#include "executor.h" -#include "model.h" +#include "mmdeploy/common.h" +#include "mmdeploy/executor.h" +#include "mmdeploy/model.h" #ifdef __cplusplus extern "C" { diff --git a/csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp b/csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp index c0d920180e..221b78104d 100644 --- a/csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp +++ b/csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp @@ -28,6 +28,30 @@ namespace cxx { using Rect = mmdeploy_rect_t; +template +class UniqueHandle : public NonCopyable { + public: + UniqueHandle() = default; + explicit UniqueHandle(T handle) : handle_(handle) {} + + // derived class must destroy the object and reset `handle_` + ~UniqueHandle() { assert(handle_ == nullptr); } + + UniqueHandle(UniqueHandle&& o) noexcept : handle_(std::exchange(o.handle_, nullptr)) {} + UniqueHandle& operator=(UniqueHandle&& o) noexcept { + if (this != &o) { + handle_ = std::exchange(o.handle_, nullptr); + } + return *this; + } + + explicit operator T() const noexcept { return handle_; } + T operator->() const noexcept { return handle_; } + + protected: + T handle_{}; +}; + class Model { public: explicit Model(const char* path) { @@ -39,6 +63,8 @@ class Model { model_.reset(model, [](auto p) { mmdeploy_model_destroy(p); }); } + explicit Model(const std::string& path) : Model(path.c_str()) {} + Model(const void* buffer, size_t size) { mmdeploy_model_t model{}; auto ec = mmdeploy_model_create(buffer, static_cast(size), &model); @@ -102,6 +128,8 @@ class Mat { mmdeploy_data_type_t type, uint8_t* data, mmdeploy_device_t device = nullptr) : desc_{data, height, width, channels, format, type, device} {} + Mat(const mmdeploy_mat_t& desc) : desc_(desc) {} // NOLINT + const mmdeploy_mat_t& desc() const noexcept { return desc_; } #if MMDEPLOY_CXX_USE_OPENCV @@ -146,6 +174,16 @@ class Mat { template class Result_ { public: + using value_type = T; + using size_type = size_t; + using difference_type = ptrdiff_t; + using reference = T&; + using const_reference = const T&; + using pointer = T*; + using const_pointer = const T*; + using iterator = T*; + using const_iterator = T*; + Result_(size_t offset, size_t size, std::shared_ptr data) : offset_(offset), size_(size), data_(std::move(data)) {} diff --git a/csrc/mmdeploy/apis/cxx/mmdeploy/pose_tracker.hpp b/csrc/mmdeploy/apis/cxx/mmdeploy/pose_tracker.hpp new file mode 100644 index 0000000000..077ec75700 --- /dev/null +++ b/csrc/mmdeploy/apis/cxx/mmdeploy/pose_tracker.hpp @@ -0,0 +1,151 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#ifndef MMDEPLOY_POSE_TRACKER_HPP +#define MMDEPLOY_POSE_TRACKER_HPP + +#include "mmdeploy/common.hpp" +#include "mmdeploy/pose_tracker.h" + +namespace mmdeploy { + +namespace cxx { + +class PoseTracker : public UniqueHandle { + public: + using Result = Result_; + class State; + class Params; + + public: + /** + * @brief Create pose tracker pipeline + * @param detect object detection model + * @param pose pose estimation model + * @param context execution context + */ + PoseTracker(const Model& detect, const Model& pose, const Context& context) { + auto ec = mmdeploy_pose_tracker_create(detect, pose, context, &handle_); + if (ec != MMDEPLOY_SUCCESS) { + throw_exception(static_cast(ec)); + } + } + ~PoseTracker() { + if (handle_) { + mmdeploy_pose_tracker_destroy(handle_); + handle_ = {}; + } + } + PoseTracker(PoseTracker&&) noexcept = default; + + /** + * @brief Create a tracker state corresponds to a video stream + * @param params params for creating the tracker state + * @return created tracker state + */ + State CreateState(const Params& params); + + /** + * @brief Apply pose tracker pipeline + * @param state tracker state + * @param frame input video frame + * @param detect control the use of detector + * -1: use params.det_interval, 0: don't use detector, 1: force use detector + * @return + */ + Result Apply(State& state, const Mat& frame, int detect = -1); + + /** + * @brief batched version of Apply + * @param states + * @param frames + * @param detects + * @return + */ + std::vector Apply(const Span& states, const Span& frames, + const Span& detects = {}); + + public: + /** + * see \ref mmdeploy/pose_tracker.h for detail + */ + class Params : public UniqueHandle { + public: + explicit Params() { + handle_ = new mmdeploy_pose_tracker_param_t{}; + mmdeploy_pose_tracker_default_params(handle_); + } + ~Params() { + if (handle_) { + delete handle_; + handle_ = {}; + } + } + }; + + class State : public UniqueHandle { + public: + explicit State(mmdeploy_pose_tracker_t pipeline, const mmdeploy_pose_tracker_param_t* params) { + auto ec = mmdeploy_pose_tracker_create_state(pipeline, params, &handle_); + if (ec != MMDEPLOY_SUCCESS) { + throw_exception(static_cast(ec)); + } + } + ~State() { + if (handle_) { + mmdeploy_pose_tracker_destroy_state(handle_); + handle_ = {}; + } + } + State(State&&) noexcept = default; + }; +}; + +inline PoseTracker::State PoseTracker::CreateState(const PoseTracker::Params& params) { + return State(handle_, static_cast(params)); +} + +inline std::vector PoseTracker::Apply(const Span& states, + const Span& frames, + const Span& detects) { + if (frames.empty()) { + return {}; + } + mmdeploy_pose_tracker_target_t* results{}; + int32_t* result_count{}; + + auto ec = mmdeploy_pose_tracker_apply( + handle_, reinterpret_cast(states.data()), + reinterpret(frames.data()), detects.data(), static_cast(frames.size()), &results, + &result_count); + if (ec != MMDEPLOY_SUCCESS) { + throw_exception(static_cast(ec)); + } + + std::shared_ptr data( + results, [result_count, count = frames.size()](auto p) { + mmdeploy_pose_tracker_release_result(p, result_count, count); + }); + + std::vector rets; + rets.reserve(frames.size()); + + size_t offset = 0; + for (size_t i = 0; i < frames.size(); ++i) { + offset += rets.emplace_back(offset, result_count[i], data).size(); + } + + return rets; +} + +inline PoseTracker::Result PoseTracker::Apply(PoseTracker::State& state, const Mat& frame, + int32_t detect) { + return Apply(Span(&state, 1), Span{frame}, Span{detect})[0]; +} + +} // namespace cxx + +using cxx::PoseTracker; + +} // namespace mmdeploy + +#endif // MMDEPLOY_POSE_TRACKER_HPP diff --git a/csrc/mmdeploy/apis/python/pose_tracker.cpp b/csrc/mmdeploy/apis/python/pose_tracker.cpp new file mode 100644 index 0000000000..fea6895ae8 --- /dev/null +++ b/csrc/mmdeploy/apis/python/pose_tracker.cpp @@ -0,0 +1,149 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "mmdeploy/pose_tracker.hpp" + +#include "common.h" +#include "mmdeploy/common.hpp" + +namespace mmdeploy::python { + +namespace { + +std::vector Apply(mmdeploy::PoseTracker* self, + const std::vector& _states, + const std::vector& _frames, std::vector detect) { + std::vector tmp; + for (const auto& s : _states) { + tmp.push_back(static_cast(*s)); + } + mmdeploy::Span states(reinterpret_cast(tmp.data()), tmp.size()); + std::vector frames; + for (const auto& f : _frames) { + frames.emplace_back(GetMat(f)); + } + if (detect.empty()) { + detect.resize(frames.size(), -1); + } + assert(states.size() == frames.size()); + assert(states.size() == detect.size()); + auto results = self->Apply(states, frames, detect); + std::vector batch_ret; + batch_ret.reserve(frames.size()); + for (const auto& rs : results) { + py::array_t keypoints({static_cast(rs.size()), rs[0].keypoint_count, 3}); + py::array_t bboxes({static_cast(rs.size()), 4}); + py::array_t track_ids(static_cast(rs.size())); + auto kpts_ptr = keypoints.mutable_data(); + auto bbox_ptr = bboxes.mutable_data(); + auto track_id_ptr = track_ids.mutable_data(); + for (const auto& r : rs) { + for (int i = 0; i < r.keypoint_count; ++i) { + kpts_ptr[0] = r.keypoints[i].x; + kpts_ptr[1] = r.keypoints[i].y; + kpts_ptr[2] = r.scores[i]; + kpts_ptr += 3; + } + { + auto tmp_bbox = (std::array&)r.bbox; + bbox_ptr[0] = tmp_bbox[0]; + bbox_ptr[1] = tmp_bbox[1]; + bbox_ptr[2] = tmp_bbox[2]; + bbox_ptr[3] = tmp_bbox[3]; + bbox_ptr += 4; + } + *track_id_ptr++ = r.target_id; + } + batch_ret.push_back( + py::make_tuple(std::move(keypoints), std::move(bboxes), std::move(track_ids))); + } + return batch_ret; +} + +template +void Copy(const py::handle& h, T (&a)[N]) { + auto array = h.cast>(); + assert(array.size() == N); + auto data = array.data(); + for (int i = 0; i < N; ++i) { + a[i] = data[i]; + } +} + +void Parse(const py::dict& dict, PoseTracker::Params& params, py::array_t& sigmas) { + for (const auto& [_name, value] : dict) { + auto name = _name.cast(); + if (name == "det_interval") { + params->det_interval = value.cast(); + } else if (name == "det_label") { + params->det_label = value.cast(); + } else if (name == "det_thr") { + params->det_thr = value.cast(); + } else if (name == "det_min_bbox_size") { + params->det_min_bbox_size = value.cast(); + } else if (name == "det_nms_thr") { + params->det_nms_thr = value.cast(); + } else if (name == "pose_max_num_bboxes") { + params->pose_max_num_bboxes = value.cast(); + } else if (name == "pose_min_keypoints") { + params->pose_min_keypoints = value.cast(); + } else if (name == "pose_min_bbox_size") { + params->pose_min_bbox_size = value.cast(); + } else if (name == "pose_nms_thr") { + params->pose_nms_thr = value.cast(); + } else if (name == "track_kpt_thr") { + params->pose_kpt_thr = value.cast(); + } else if (name == "track_iou_thr") { + params->track_iou_thr = value.cast(); + } else if (name == "pose_bbox_scale") { + params->pose_bbox_scale = value.cast(); + } else if (name == "track_max_missing") { + params->track_max_missing = value.cast(); + } else if (name == "track_history_size") { + params->track_history_size = value.cast(); + } else if (name == "keypoint_sigmas") { + sigmas = value.cast>(); + params->keypoint_sigmas = const_cast(sigmas.data()); + params->keypoint_sigmas_size = sigmas.size(); + } else if (name == "std_weight_position") { + params->std_weight_position = value.cast(); + } else if (name == "std_weight_velocity") { + params->std_weight_velocity = value.cast(); + } else if (name == "smooth_params") { + Copy(value, params->smooth_params); + } else { + MMDEPLOY_ERROR("unused argument: {}", name); + } + } +} + +} // namespace + +static PythonBindingRegisterer register_pose_tracker{[](py::module& m) { + py::class_(m, "PoseTracker.State"); + py::class_(m, "PoseTracker") + .def(py::init([](const char* det_model_path, const char* pose_model_path, + const char* device_name, int device_id) { + return mmdeploy::PoseTracker( + mmdeploy::Model(det_model_path), mmdeploy::Model(pose_model_path), + mmdeploy::Context(mmdeploy::Device(device_name, device_id))); + }), + py::arg("det_model"), py::arg("pose_model"), py::arg("device_name"), + py::arg("device_id") = 0) + .def( + "__call__", + [](mmdeploy::PoseTracker* self, mmdeploy::PoseTracker::State* state, const PyImage& img, + int detect) { return Apply(self, {state}, {img}, {detect})[0]; }, + py::arg("state"), py::arg("frame"), py::arg("detect") = -1) + .def("batch", &Apply, py::arg("states"), py::arg("frames"), + py::arg("detects") = std::vector{}) + .def("create_state", [](mmdeploy::PoseTracker* self, const py::kwargs& kwargs) { + PoseTracker::Params params; + py::array_t sigmas; + if (kwargs) { + Parse(kwargs, params, sigmas); + } + return self->CreateState(params); + }); +}}; + +} // namespace mmdeploy::python diff --git a/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt b/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt index 86b244a662..4c165c0d10 100644 --- a/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt +++ b/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt @@ -5,12 +5,17 @@ project(mmdeploy_mmpose) find_package(OpenCV REQUIRED) aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} MMPOSE_SRCS) +aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/pose_tracker POSE_TRACKER_SRCS) -mmdeploy_add_module(${PROJECT_NAME} ${MMPOSE_SRCS}) +mmdeploy_add_module(${PROJECT_NAME} ${MMPOSE_SRCS} ${POSE_TRACKER_SRCS}) target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy::transform mmdeploy_operation + mmdeploy_opencv_utils ${OpenCV_LIBS}) +target_include_directories(${PROJECT_NAME} PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/../../apis/c) add_library(mmdeploy::mmpose ALIAS ${PROJECT_NAME}) -set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} pose_detector CACHE INTERNAL "") +set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} pose_detector pose_tracker CACHE INTERNAL "") diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/common.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/common.h new file mode 100644 index 0000000000..ec24648148 --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/common.h @@ -0,0 +1,57 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_COMMON_H +#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_COMMON_H + +#include +#include + +#include "mmdeploy/core/mpl/type_traits.h" +#include "mmdeploy/pose_tracker.h" + +namespace mmdeploy::mmpose::_pose_tracker { + +struct TrackerResult { + std::vector> keypoints; + std::vector> scores; + std::vector bboxes; + std::vector track_ids; + // debug info + std::vector> pose_input_bboxes; + std::vector> pose_output_bboxes; +}; + +inline void SetDefaultParams(mmdeploy_pose_tracker_param_t& p) { + p.det_interval = 1; + p.det_label = 0; + p.det_min_bbox_size = -1; + p.det_thr = .5f; + p.det_nms_thr = .7f; + p.pose_max_num_bboxes = -1; + p.pose_min_keypoints = -1; + p.pose_min_bbox_size = 0; + p.pose_kpt_thr = .5f; + p.pose_nms_thr = .5f; + p.keypoint_sigmas = nullptr; + p.keypoint_sigmas_size = 0; + p.track_iou_thr = .4f; + p.pose_bbox_scale = 1.25f; + p.track_max_missing = 10; + p.track_history_size = 1; + + p.std_weight_position = 1 / 20.f; + p.std_weight_velocity = 1 / 160.f; + + (std::array&)p.smooth_params = {0.007, 1., 1.}; +} + +} // namespace mmdeploy::mmpose::_pose_tracker + +namespace mmdeploy { + +MMDEPLOY_REGISTER_TYPE_ID(mmdeploy_pose_tracker_param_t*, 0x32bc6919d5287035); +MMDEPLOY_REGISTER_TYPE_ID(mmpose::_pose_tracker::TrackerResult, 0xacb6ddb7dc1ffbca); + +} // namespace mmdeploy + +#endif // MMDEPLOY_COMMON_H diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/pipeline.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pipeline.cpp new file mode 100644 index 0000000000..6ae6012b2f --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pipeline.cpp @@ -0,0 +1,117 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "mmdeploy/archive/value_archive.h" +#include "mmdeploy/core/mat.h" +#include "mmdeploy/core/module.h" +#include "mmdeploy/experimental/module_adapter.h" +#include "pose_tracker/common.h" +#include "pose_tracker/pose_tracker.h" + +namespace mmdeploy { + +MMDEPLOY_REGISTER_TYPE_ID(mmpose::_pose_tracker::Tracker, 0xcfe87980aa895d3a); + +} + +namespace mmdeploy::mmpose::_pose_tracker { + +#define REGISTER_SIMPLE_MODULE(name, fn) \ + MMDEPLOY_REGISTER_FACTORY_FUNC(Module, (name, 0), [](const Value&) { return CreateTask(fn); }); + +Value Prepare(const Value& data, const Value& use_det, Value state) { + auto& tracker = state.get_ref(); + // set frame size for the first frame + if (tracker.frame_id() == 0) { + auto& frame = data["ori_img"].get_ref(); + tracker.SetFrameSize(frame.height(), frame.width()); + } + // use_det is set to non-auto mode + if (use_det.get() != -1) { + return use_det; + } + auto interval = tracker.params().det_interval; + return interval > 0 && tracker.frame_id() % interval == 0; +} + +REGISTER_SIMPLE_MODULE(pose_tracker::Prepare, Prepare); + +std::tuple ProcessBboxes(const Value& det_val, const Value& data, + Value state) noexcept { + auto& tracker = state.get_ref(); + + std::optional dets; + + if (det_val.is_array()) { // has detections + auto& [bboxes, scores, labels] = dets.emplace(); + for (const auto& det : det_val.array()) { + bboxes.push_back(from_value(det["bbox"])); + scores.push_back(det["score"].get()); + labels.push_back(det["label_id"].get()); + } + } + + auto [bboxes, ids] = tracker.ProcessBboxes(dets); + + Value::Array bbox_array; + Value track_ids_array; + // attach bboxes to image data + for (auto& bbox : bboxes) { + cv::Rect rect(cv::Rect2f(cv::Point2f(bbox[0], bbox[1]), cv::Point2f(bbox[2], bbox[3]))); + bbox_array.push_back({ + {"img", data["img"]}, // img + {"bbox", {rect.x, rect.y, rect.width, rect.height}}, // bbox + }); + } + + track_ids_array = to_value(ids); + return {std::move(bbox_array), std::move(track_ids_array)}; +} +REGISTER_SIMPLE_MODULE(pose_tracker::ProcessBboxes, ProcessBboxes); + +Value TrackStep(const Value& poses, const Value& track_indices, Value state) noexcept { + assert(poses.is_array()); + vector keypoints; + vector scores; + for (auto& output : poses.array()) { + auto& k = keypoints.emplace_back(); + auto& s = scores.emplace_back(); + float avg = 0.f; + for (auto& kpt : output["key_points"].array()) { + k.emplace_back(kpt["bbox"][0].get(), kpt["bbox"][1].get()); + s.push_back(kpt["score"].get()); + avg += s.back(); + } + } + vector track_ids; + from_value(track_indices, track_ids); + auto& tracker = state.get_ref(); + tracker.TrackStep(keypoints, scores, track_ids); + TrackerResult result; + for (const auto& track : tracker.tracks()) { + if (track->missing()) { + continue; + } + vector kpts; + kpts.reserve(track->smoothed_kpts().size()); + for (const auto& kpt : track->smoothed_kpts()) { + kpts.push_back({kpt.x, kpt.y}); + } + result.keypoints.push_back(std::move(kpts)); + result.scores.push_back(track->scores()); + auto& bbox = track->smoothed_bbox(); + result.bboxes.push_back({bbox[0], bbox[1], bbox[2], bbox[3]}); + result.track_ids.push_back(track->track_id()); + } + result.pose_input_bboxes = tracker.pose_input_bboxes(); + result.pose_output_bboxes = tracker.pose_output_bboxes(); + return result; +} +REGISTER_SIMPLE_MODULE(pose_tracker::TrackStep, TrackStep); + +// MSVC toolset v143 keeps ICEing when using a lambda here +static Value CreateTracker(mmdeploy_pose_tracker_param_t* param) { + return make_pointer(Tracker{*param}); +} +REGISTER_SIMPLE_MODULE(pose_tracker::Create, CreateTracker); + +} // namespace mmdeploy::mmpose::_pose_tracker diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.cpp new file mode 100644 index 0000000000..e1671dd7ac --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.cpp @@ -0,0 +1,399 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "pose_tracker/pose_tracker.h" + +#include +#include +#include + +#include "mmdeploy/core/utils/formatter.h" +#include "pose_tracker/utils.h" + +namespace mmdeploy::mmpose::_pose_tracker { + +Tracker::Tracker(const mmdeploy_pose_tracker_param_t& _params) : params_(_params) { + if (params_.keypoint_sigmas && params_.keypoint_sigmas_size) { + std::copy_n(params_.keypoint_sigmas, params_.keypoint_sigmas_size, + std::back_inserter(key_point_sigmas_)); + params_.keypoint_sigmas = key_point_sigmas_.data(); + } +} + +void Tracker::SuppressOverlappingBoxes(const vector& bboxes, + vector>& ranks, + vector& is_valid_bbox) const { + vector iou(ranks.size() * ranks.size()); + for (int i = 0; i < bboxes.size(); ++i) { + for (int j = 0; j < i; ++j) { + iou[i * bboxes.size() + j] = iou[j * bboxes.size() + i] = + intersection_over_union(bboxes[i], bboxes[j]); + } + } + suppress_non_maximum(ranks, iou, is_valid_bbox, params_.det_nms_thr); +} + +void Tracker::SuppressOverlappingPoses(const vector& keypoints, + const vector& scores, const vector& bboxes, + const vector& track_ids, vector& is_valid, + float oks_thr) { + assert(keypoints.size() == is_valid.size()); + assert(scores.size() == is_valid.size()); + assert(bboxes.size() == is_valid.size()); + const auto size = is_valid.size(); + vector similarity(size * size); + for (int i = 0; i < size; ++i) { + if (is_valid[i]) { + for (int j = 0; j < i; ++j) { + if (is_valid[j]) { + similarity[i * size + j] = similarity[j * size + i] = + GetPosePoseSimilarity(bboxes[i], keypoints[i], bboxes[j], keypoints[j]); + } + } + } + } + vector> ranks; + ranks.reserve(size); + for (int i = 0; i < size; ++i) { + bool is_visible = false; + for (const auto& track : tracks_) { + if (track->track_id() == track_ids[i]) { + is_visible = track->missing() == 0; + break; + } + } + auto score = std::accumulate(scores[i].begin(), scores[i].end(), 0.f); + // prevents bboxes from missing tracks to suppress visible tracks + ranks.emplace_back(is_visible, score); + } + suppress_non_maximum(ranks, similarity, is_valid, oks_thr); +} + +std::tuple, vector> Tracker::ProcessBboxes( + const std::optional& dets) { + vector bboxes; + vector prev_ids; + + // 2 - visible tracks + // 1 - detection + // 0 - missing tracks + vector types; + + GetDetectedObjects(dets, bboxes, prev_ids, types); + + GetTrackedObjects(bboxes, prev_ids, types); + + vector is_valid_bboxes(bboxes.size(), 1); + + auto count = [&] { + std::array acc{}; + for (size_t i = 0; i < is_valid_bboxes.size(); ++i) { + if (is_valid_bboxes[i]) { + ++acc[types[i]]; + } + } + return acc; + }; + POSE_TRACKER_DEBUG("frame {}, bboxes {}", frame_id_, count()); + + vector> ranks; + ranks.reserve(bboxes.size()); + for (int i = 0; i < bboxes.size(); ++i) { + ranks.emplace_back(types[i], get_area(bboxes[i])); + } + SuppressOverlappingBoxes(bboxes, ranks, is_valid_bboxes); + POSE_TRACKER_DEBUG("frame {}, bboxes after nms: {}", frame_id_, count()); + + vector idxs; + idxs.reserve(bboxes.size()); + for (int i = 0; i < bboxes.size(); ++i) { + if (is_valid_bboxes[i]) { + idxs.push_back(i); + } + } + + std::stable_sort(idxs.begin(), idxs.end(), [&](int i, int j) { return ranks[i] > ranks[j]; }); + std::fill(is_valid_bboxes.begin(), is_valid_bboxes.end(), 0); + { + vector tmp_bboxes; + vector tmp_track_ids; + for (const auto& i : idxs) { + if (tmp_bboxes.size() >= params_.pose_max_num_bboxes) { + break; + } + tmp_bboxes.push_back(bboxes[i]); + tmp_track_ids.push_back(prev_ids[i]); + is_valid_bboxes[i] = 1; + } + bboxes = std::move(tmp_bboxes); + prev_ids = std::move(tmp_track_ids); + } + + pose_input_bboxes_ = bboxes; + + POSE_TRACKER_DEBUG("frame {}, bboxes after sort: {}", frame_id_, count()); + return {bboxes, prev_ids}; +} + +void Tracker::TrackStep(vector& keypoints, vector& scores, + const vector& prev_ids) noexcept { + vector bboxes(keypoints.size()); + vector is_unused_bbox(keypoints.size(), 1); + + // key-points to bboxes + for (size_t i = 0; i < keypoints.size(); ++i) { + if (auto bbox = KeypointsToBbox(keypoints[i], scores[i])) { + bboxes[i] = *bbox; + } else { + is_unused_bbox[i] = false; + } + } + + pose_output_bboxes_ = bboxes; + + SuppressOverlappingPoses(keypoints, scores, bboxes, prev_ids, is_unused_bbox, + params_.pose_nms_thr); + assert(is_unused_bbox.size() == bboxes.size()); + + vector similarity0; // average mahalanobis dist - proportion of tracked key-points + vector similarity1; // iou + vector> gating; // key-point gating result + GetSimilarityMatrices(bboxes, keypoints, prev_ids, similarity0, similarity1, gating); + + vector is_unused_track(tracks_.size(), 1); + // disable missing tracks in the #1 assignment + for (int i = 0; i < tracks_.size(); ++i) { + if (tracks_[i]->missing()) { + is_unused_track[i] = 0; + } + } + const auto assignment_visible = + greedy_assignment(similarity0, is_unused_bbox, is_unused_track, -kInf / 10); + POSE_TRACKER_DEBUG("frame {}, assignment for visible {}", frame_id_, assignment_visible); + + // enable missing tracks in the #2 assignment + for (int i = 0; i < tracks_.size(); ++i) { + if (tracks_[i]->missing()) { + is_unused_track[i] = 1; + } + } + const auto assignment_missing = + greedy_assignment(similarity1, is_unused_bbox, is_unused_track, params_.track_iou_thr); + POSE_TRACKER_DEBUG("frame {}, assignment for missing {}", frame_id_, assignment_missing); + + // update assigned tracks + for (auto [i, j, _] : assignment_visible) { + tracks_[j]->UpdateVisible(bboxes[i], keypoints[i], scores[i], gating[i * tracks_.size() + j]); + } + + // update recovered tracks + for (auto [i, j, _] : assignment_missing) { + tracks_[j]->UpdateRecovered(bboxes[i], keypoints[i], scores[i]); + } + + // generating new tracks from detected bboxes + for (size_t i = 0; i < is_unused_bbox.size(); ++i) { + if (is_unused_bbox[i] && prev_ids[i] == -1) { + CreateTrack(bboxes[i], keypoints[i], scores[i]); + } + } + + // update missing tracks + for (size_t i = 0; i < is_unused_track.size(); ++i) { + if (is_unused_track[i]) { + tracks_[i]->UpdateMissing(); + } + } + + // diagnostic for missing tracks + DiagnosticMissingTracks(is_unused_track, is_unused_bbox, similarity0, similarity1); + + RemoveMissingTracks(); + + for (auto& track : tracks_) { + track->Predict(); + } + + ++frame_id_; + + // print track summary + // SummaryTracks(); +} + +void Tracker::GetTrackedObjects(vector& bboxes, vector& track_ids, + vector& types) const { + for (auto& track : tracks_) { + std::optional bbox; + if (track->missing()) { + bbox = track->predicted_bbox(); + } else { + bbox = keypoints_to_bbox(track->predicted_kpts(), track->scores(), frame_h_, frame_w_, + params_.pose_bbox_scale, params_.pose_kpt_thr, + params_.pose_min_keypoints); + } + if (bbox) { + auto& b = *bbox; + cv::Rect_ img_rect(0, 0, frame_w_, frame_h_); + cv::Rect_ box_rect(b[0], b[1], b[2] - b[0], b[3] - b[1]); + auto roi = img_rect & box_rect; + if (roi.area() > 0 && get_area(b) > params_.pose_min_bbox_size * params_.pose_min_bbox_size) { + bboxes.push_back(*bbox); + track_ids.push_back(track->track_id()); + types.push_back(track->missing() ? 0 : 2); + } + } + } +} + +void Tracker::GetDetectedObjects(const std::optional& dets, vector& _bboxes, + vector& track_ids, vector& types) const { + if (dets) { + auto& [bboxes, scores, labels] = *dets; + for (size_t i = 0; i < bboxes.size(); ++i) { + if (labels[i] == params_.det_label && scores[i] > params_.det_thr && + get_area(bboxes[i]) >= params_.det_min_bbox_size * params_.det_min_bbox_size) { + _bboxes.push_back(bboxes[i]); + track_ids.push_back(-1); + types.push_back(1); + } + } + } +} + +std::tuple> Tracker::GetTrackPoseSimilarity(Track& track, + const Bbox& bbox, + const Points& kpts) const { + static constexpr const std::array chi2inv95{0.f, 3.8415f, 5.9915f, 7.8147f, 9.4877f, + 11.070f, 12.592f, 14.067f, 15.507f, 16.919f}; + auto dists = track.KeyPointDistance(kpts); + vector gating; + gating.reserve(dists.size()); + float dist = 0.f; + int count = 0; + for (const auto& d : dists) { + if (d < chi2inv95[2]) { + dist += d; + ++count; + gating.push_back(true); + } else { + gating.push_back(false); + } + } + auto count_thr = + params_.pose_min_keypoints >= 0 ? params_.pose_min_keypoints : (dists.size() + 1) / 2; + if (count >= count_thr) { + auto fcount = static_cast(count); + dist = dist / fcount - fcount / static_cast(dists.size()); + } else { + dist = kInf; + } + + auto iou = intersection_over_union(track.predicted_bbox(), bbox); + if (key_point_sigmas_.empty()) { + return {dist, iou, gating}; + } + + return {dist, iou, gating}; +} + +void Tracker::GetSimilarityMatrices(const vector& bboxes, const vector& keypoints, + const vector& prev_ids, vector& similarity0, + vector& similarity1, vector>& gating) { + const auto n_rows = static_cast(bboxes.size()); + const auto n_cols = static_cast(tracks_.size()); + + // generate similarity matrix + similarity0 = vector(n_rows * n_cols, -kInf); + similarity1 = vector(n_rows * n_cols, -kInf); + gating = vector>(n_rows * n_cols); + for (size_t i = 0; i < n_rows; ++i) { + const auto& bbox = bboxes[i]; + const auto& kpts = keypoints[i]; + for (size_t j = 0; j < n_cols; ++j) { + auto& track = *tracks_[j]; + if (prev_ids[i] != -1 && prev_ids[i] != track.track_id()) { + continue; + } + const auto index = i * n_cols + j; + auto&& [dist, iou, gate] = GetTrackPoseSimilarity(track, bbox, kpts); + similarity0[index] = -dist; + similarity1[index] = iou; + gating.push_back(std::move(gate)); + } + } +} + +float Tracker::GetPosePoseSimilarity(const Bbox& bbox0, const Points& kpts0, const Bbox& bbox1, + const Points& kpts1) { + if (key_point_sigmas_.empty()) { + return intersection_over_union(bbox0, bbox1); + } + // symmetric + return object_keypoint_similarity(kpts0, bbox0, kpts1, bbox1, key_point_sigmas_); +} + +void Tracker::CreateTrack(const Bbox& bbox, const Points& kpts, const Scores& scores) { + *tracks_.emplace_back(std::make_unique(¶ms_, bbox, kpts, scores, next_id_++)); +} + +std::optional Tracker::KeypointsToBbox(const Points& kpts, const Scores& scores) const { + return keypoints_to_bbox(kpts, scores, frame_h_, frame_w_, params_.pose_bbox_scale, + params_.pose_kpt_thr, params_.pose_min_keypoints); +} + +void Tracker::RemoveMissingTracks() { + size_t count{}; + for (auto& track : tracks_) { + if (track->missing() <= params_.track_max_missing) { + tracks_[count++] = std::move(track); + } + } + tracks_.resize(count); +} + +void Tracker::DiagnosticMissingTracks(const vector& is_unused_track, + const vector& is_unused_bbox, + const vector& similarity0, + const vector& similarity1) { + int missing = 0; + const auto n_cols = static_cast(is_unused_track.size()); + const auto n_rows = static_cast(is_unused_bbox.size()); + for (int i = 0; i < is_unused_track.size(); ++i) { + if (is_unused_track[i]) { + float best_s0 = 0.f; + float best_s1 = 0.f; + for (int j = 0; j < is_unused_bbox.size(); ++j) { + if (is_unused_bbox[j]) { + best_s0 = std::max(similarity0[j * n_cols + i], best_s0); + best_s1 = std::max(similarity1[j * n_cols + i], best_s1); + } + } + POSE_TRACKER_DEBUG("frame {}: track missing {}, best_s0={}, best_s1={}", frame_id_, + tracks_[i]->track_id(), best_s0, best_s1); + ++missing; + } + } + if (missing) { + std::stringstream ss; + ss << cv::Mat_(n_rows, n_cols, const_cast(similarity0.data())); + POSE_TRACKER_DEBUG("frame {}, similarity#0: \n{}", frame_id_, ss.str()); + ss = std::stringstream{}; + ss << cv::Mat_(n_rows, n_cols, const_cast(similarity1.data())); + POSE_TRACKER_DEBUG("frame {}, similarity#1: \n{}", frame_id_, ss.str()); + } +} + +void Tracker::SummaryTracks() { + vector> summary; + for (const auto& track : tracks_) { + summary.emplace_back(track->track_id(), track->missing()); + } + POSE_TRACKER_DEBUG("frame {}, track summary {}", frame_id_, summary); + for (const auto& track : tracks_) { + if (!track->missing()) { + POSE_TRACKER_DEBUG("frame {}, track {}, scores {}", frame_id_, track->track_id(), + track->scores()); + } + } +} + +} // namespace mmdeploy::mmpose::_pose_tracker diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.h new file mode 100644 index 0000000000..c27ae0b85b --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.h @@ -0,0 +1,101 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP +#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP + +#include "mmdeploy/pose_tracker.h" +#include "pose_tracker/common.h" +#include "pose_tracker/track.h" + +namespace mmdeploy::mmpose::_pose_tracker { + +class Tracker { + public: + explicit Tracker(const mmdeploy_pose_tracker_param_t& _params); + + Tracker(const Tracker&) { assert(0); } + Tracker(Tracker&& o) noexcept = default; + + struct Detections { + Bboxes bboxes; + Scores scores; + vector labels; + }; + + void SetFrameSize(int height, int width) { + frame_h_ = static_cast(height); + frame_w_ = static_cast(width); + } + + const mmdeploy_pose_tracker_param_t& params() const noexcept { return params_; } + + int64_t frame_id() const noexcept { return frame_id_; } + + const vector>& tracks() const noexcept { return tracks_; } + + std::tuple, vector> ProcessBboxes(const std::optional& dets); + + void TrackStep(vector& keypoints, vector& scores, + const vector& prev_ids) noexcept; + + private: + void GetDetectedObjects(const std::optional& dets, vector& _bboxes, + vector& track_ids, vector& types) const; + + void GetTrackedObjects(vector& bboxes, vector& track_ids, + vector& types) const; + + void SuppressOverlappingBoxes(const vector& bboxes, vector>& ranks, + vector& is_valid_bbox) const; + + void SuppressOverlappingPoses(const vector& keypoints, const vector& scores, + const vector& bboxes, const vector& track_ids, + vector& is_valid, float oks_thr); + + std::optional KeypointsToBbox(const Points& kpts, const Scores& scores) const; + + float GetPosePoseSimilarity(const Bbox& bbox0, const Points& kpts0, const Bbox& bbox1, + const Points& kpts1); + + void GetSimilarityMatrices(const vector& bboxes, const vector& keypoints, + const vector& prev_ids, vector& similarity0, + vector& similarity1, vector>& gating); + + std::tuple> GetTrackPoseSimilarity(Track& track, const Bbox& bbox, + const Points& kpts) const; + + void CreateTrack(const Bbox& bbox, const Points& kpts, const Scores& scores); + + void RemoveMissingTracks(); + + void DiagnosticMissingTracks(const vector& is_unused_track, + const vector& is_unused_bbox, const vector& similarity0, + const vector& similarity1); + + void SummaryTracks(); + + private: + static constexpr const auto kInf = 1000.f; + + float frame_h_ = 0; + float frame_w_ = 0; + + vector> tracks_; + int64_t next_id_{0}; + + std::vector key_point_sigmas_; + mmdeploy_pose_tracker_param_t params_; + + vector pose_input_bboxes_; + vector pose_output_bboxes_; + + int64_t frame_id_ = 0; + + public: + const vector& pose_input_bboxes() const noexcept { return pose_input_bboxes_; } + const vector& pose_output_bboxes() const noexcept { return pose_output_bboxes_; } +}; + +} // namespace mmdeploy::mmpose::_pose_tracker + +#endif // MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.cpp new file mode 100644 index 0000000000..9c2711a4f5 --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.cpp @@ -0,0 +1,49 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "smoothing_filter.h" + +namespace mmdeploy::mmpose::_pose_tracker { + +SmoothingFilter::SmoothingFilter(const Bbox& bbox, const Points& pts, + const SmoothingFilter::Params& params) + : params_(params), + pts_v_(pts.size()), + pts_x_(pts), + center_v_{}, + center_x_{get_center(bbox)}, + scale_v_{}, + scale_x_{get_scale(bbox)} {} + +std::pair SmoothingFilter::Step(const Bbox& bbox, const Points& kpts) { + constexpr auto abs = [](const Point& p) { return std::sqrt(p.dot(p)); }; + + // filter key-points + step(pts_x_, pts_v_, kpts, params_, abs); + + // filter bbox center + std::array c{get_center(bbox)}; + step(center_x_, center_v_, c, params_, abs); + + // filter bbox scales + auto s = get_scale(bbox); + step(scale_x_, scale_v_, s, params_, [](auto x) { return x; }); + + return {get_bbox(center_x_[0], scale_x_), pts_x_}; +} + +void SmoothingFilter::Reset(const Bbox& bbox, const Points& pts) { + pts_v_ = Points(pts_v_.size()); + center_v_ = {}; + scale_v_ = {}; + pts_x_ = pts; + center_v_ = {get_center(bbox)}; + scale_v_ = get_scale(bbox); +} + +float SmoothingFilter::smoothing_factor(float cutoff) { + static constexpr float kPi = 3.1415926; + auto r = 2.f * kPi * cutoff; + return r / (r + 1.f); +} + +} // namespace mmdeploy::mmpose::_pose_tracker diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.h new file mode 100644 index 0000000000..c136beb2d6 --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.h @@ -0,0 +1,58 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_SMOOTHING_FILTER_H +#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_SMOOTHING_FILTER_H + +#include "mmdeploy/core/mpl/span.h" +#include "pose_tracker/utils.h" + +namespace mmdeploy::mmpose::_pose_tracker { + +template +using span = mmdeploy::Span; + +class SmoothingFilter { + public: + struct Params { + float beta; + float fc_min; + float fc_v; + }; + explicit SmoothingFilter(const Bbox& bbox, const Points& pts, const Params& params); + + std::pair Step(const Bbox& bbox, const Points& kpts); + + void Reset(const Bbox& bbox, const Points& pts); + + private: + static float smoothing_factor(float cutoff); + + template + static void step(span x, span v, span x1, const Params& params, Norm norm) { + auto a_v = smoothing_factor(params.fc_v); + for (int i = 0; i < v.size(); ++i) { + v[i] = smooth(a_v, v[i], x1[i] - x[i]); + auto fc = params.fc_min + params.beta * norm(v[i]); + auto a_x = smoothing_factor(fc); + x[i] = smooth(a_x, x[i], x1[i]); + } + } + + template + static T smooth(float a, const T& x0, const T& x1) { + return (1.f - a) * x0 + a * x1; + } + + private: + Params params_; + std::vector pts_v_; + std::vector pts_x_; + std::array center_v_; + std::array center_x_; + std::array scale_v_; + std::array scale_x_; +}; + +} // namespace mmdeploy::mmpose::_pose_tracker + +#endif // MMDEPLOY_SMOOTHING_FILTER_H diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.cpp new file mode 100644 index 0000000000..8566433ba0 --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.cpp @@ -0,0 +1,70 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "pose_tracker/track.h" + +namespace mmdeploy::mmpose::_pose_tracker { + +Track::Track(const mmdeploy_pose_tracker_param_t* params, const Bbox& bbox, const Points& kpts, + const Scores& ss, int64_t id) + : params_(params), + filter_(CreateFilter(bbox, kpts)), + smoother_(CreateSmoother(bbox, kpts)), + track_id_(id) { + POSE_TRACKER_DEBUG("new track {}", track_id_); + Add(bbox, kpts, ss); +} + +Track::~Track() { POSE_TRACKER_DEBUG("track lost {}", track_id_); } + +void Track::UpdateVisible(const Bbox& bbox, const Points& kpts, const Scores& scores, + const vector& tracked) { + auto [bbox_corr, kpts_corr] = filter_.Correct(bbox, kpts, tracked); + Add(bbox_corr, kpts_corr, scores); +} + +void Track::UpdateRecovered(const Bbox& bbox, const Points& kpts, const Scores& scores) { + POSE_TRACKER_DEBUG("track recovered {}", track_id_); + filter_ = CreateFilter(bbox, kpts); + smoother_.Reset(bbox, kpts); + Add(bbox, kpts, scores); + missing_ = 0; +} + +void Track::UpdateMissing() { + missing_++; + if (missing_ <= params_->track_max_missing) { + // use predicted state to update the missing tracks + Add(bbox_predict_, kpts_predict_, vector(kpts_predict_.size())); + } +} + +void Track::Predict() { + // TODO: velocity decay for missing tracks + std::tie(bbox_predict_, kpts_predict_) = filter_.Predict(); +} + +void Track::Add(const Bbox& bbox, const Points& kpts, const Scores& ss) { + bboxes_.push_back(bbox); + keypoints_.push_back(kpts); + scores_.push_back(ss); + if (bboxes_.size() > params_->track_history_size) { + std::rotate(bboxes_.begin(), bboxes_.begin() + 1, bboxes_.end()); + std::rotate(keypoints_.begin(), keypoints_.begin() + 1, keypoints_.end()); + std::rotate(scores_.begin(), scores_.begin() + 1, scores_.end()); + bboxes_.pop_back(); + keypoints_.pop_back(); + scores_.pop_back(); + } + std::tie(bbox_smooth_, kpts_smooth_) = smoother_.Step(bbox, kpts); +} + +TrackingFilter Track::CreateFilter(const Bbox& bbox, const Points& pts) { + return {bbox, pts, params_->std_weight_position, params_->std_weight_velocity}; +} + +SmoothingFilter Track::CreateSmoother(const Bbox& bbox, const Points& pts) { + return SmoothingFilter( + bbox, pts, {params_->smooth_params[0], params_->smooth_params[1], params_->smooth_params[2]}); +} + +} // namespace mmdeploy::mmpose::_pose_tracker diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.h new file mode 100644 index 0000000000..5be73168ac --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.h @@ -0,0 +1,65 @@ +// +// Created by zhangli on 1/9/23. +// + +#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACK_H +#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACK_H + +#include "pose_tracker/common.h" +#include "pose_tracker/smoothing_filter.h" +#include "pose_tracker/tracking_filter.h" +#include "pose_tracker/utils.h" + +namespace mmdeploy::mmpose::_pose_tracker { + +class Track { + public: + Track(const mmdeploy_pose_tracker_param_t* params, const Bbox& bbox, const Points& kpts, + const Scores& ss, int64_t id); + ~Track(); + + void UpdateVisible(const Bbox& bbox, const Points& kpts, const Scores& scores, + const vector& tracked); + void UpdateRecovered(const Bbox& bbox, const Points& kpts, const Scores& scores); + void UpdateMissing(); + void Predict(); + + float BboxDistance(const Bbox& bbox) { return filter_.BboxDistance(bbox); } + + vector KeyPointDistance(const Points& kpts) { return filter_.KeyPointDistance(kpts); } + + uint32_t track_id() const noexcept { return track_id_; } + uint32_t missing() const noexcept { return missing_; } + + const Bbox& predicted_bbox() const noexcept { return bbox_predict_; } + const Bbox& smoothed_bbox() const noexcept { return bbox_smooth_; } + + const Points& predicted_kpts() const noexcept { return kpts_predict_; } + const Points& smoothed_kpts() const noexcept { return kpts_smooth_; } + + const Scores& scores() const noexcept { return scores_.back(); } + + private: + void Add(const Bbox& bbox, const Points& kpts, const Scores& ss); + + TrackingFilter CreateFilter(const Bbox& bbox, const Points& pts); + SmoothingFilter CreateSmoother(const Bbox& bbox, const Points& pts); + + private: + const mmdeploy_pose_tracker_param_t* params_; + vector bboxes_; + vector keypoints_; + vector scores_; + uint32_t track_id_{}; + Bbox bbox_predict_{}; + Bbox bbox_smooth_{}; + Points kpts_predict_; + Points kpts_smooth_; + uint32_t missing_{0}; + TrackingFilter filter_; + SmoothingFilter smoother_; +}; + +} // namespace mmdeploy::mmpose::_pose_tracker + +#endif // MMDEPLOY_TRACK_H diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.cpp new file mode 100644 index 0000000000..03745a3c8f --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.cpp @@ -0,0 +1,199 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "pose_tracker/tracking_filter.h" + +namespace mmdeploy::mmpose::_pose_tracker { + +float get_mean_scale(float scale_w, float scale_h) { return std::sqrt(scale_w * scale_h); } + +TrackingFilter::TrackingFilter(const Bbox& bbox, const vector& kpts, + float std_weight_position, float std_weight_velocity) + : std_weight_position_(std_weight_position), std_weight_velocity_(std_weight_velocity) { + auto center = get_center(bbox); + auto scale = get_scale(bbox); + + auto mean_scale = get_mean_scale(scale[0], scale[1]); + + const auto n = kpts.size(); + pt_filters_.resize(n); + for (int i = 0; i < n; ++i) { + auto& f = pt_filters_[i]; + f.init(4, 2); + SetKeyPointTransitionMat(i); + SetKeyPointMeasurementMat(i); + + ResetKeyPoint(i, kpts[i], mean_scale); + } + + { + // [x, y, w, h, dx, dy, dw, dh] + auto& f = bbox_filter_; + + f.init(8, 4); + + SetBboxTransitionMat(); + SetBboxMeasurementMat(); + + SetBboxErrorCov(2 * std_weight_position * mean_scale, // + 10 * std_weight_velocity * mean_scale); + + f.statePost.at(0) = center.x; + f.statePost.at(1) = center.y; + f.statePost.at(2) = scale[0]; + f.statePost.at(3) = scale[1]; + } +} + +std::pair TrackingFilter::Predict() { + auto mean_scale = get_mean_scale(bbox_filter_.statePost.at(2), // + bbox_filter_.statePost.at(3)); + const auto n = pt_filters_.size(); + Points pts(n); + for (int i = 0; i < n; ++i) { + SetKeyPointProcessCov(i, std_weight_position_ * mean_scale, std_weight_velocity_ * mean_scale); + auto mat = pt_filters_[i].predict(); + pts[i].x = mat.at(0); + pts[i].y = mat.at(1); + } + Bbox bbox; + { + SetBboxProcessCov(std_weight_position_ * mean_scale, std_weight_velocity_ * mean_scale); + auto mat = bbox_filter_.predict(); + auto x = mat.ptr(); + bbox = get_bbox({x[0], x[1]}, {x[2], x[3]}); + } + return {bbox, pts}; +} + +std::pair TrackingFilter::Correct(const Bbox& bbox, const Points& kpts, + const vector& tracked) { + auto mean_scale = get_mean_scale(bbox_filter_.statePre.at(2), // + bbox_filter_.statePre.at(3)); + const auto n = pt_filters_.size(); + Points corr_kpts(n); + for (int i = 0; i < n; ++i) { + if (!tracked.empty() && tracked[i]) { + SetKeyPointMeasurementCov(i, std_weight_position_ * mean_scale); + std::array m{kpts[i].x, kpts[i].y}; + auto mat = pt_filters_[i].correct(as_mat(m)); + corr_kpts[i].x = mat.at(0); + corr_kpts[i].y = mat.at(1); + } else { + ResetKeyPoint(i, kpts[i], mean_scale); + corr_kpts[i] = kpts[i]; + } + } + Bbox corr_bbox; + { + SetBboxMeasurementCov(std_weight_position_ * mean_scale); + auto c = get_center(bbox); + auto s = get_scale(bbox); + std::array m{c.x, c.y, s[0], s[1]}; + auto mat = bbox_filter_.correct(as_mat(m)); + auto x = mat.ptr(); + corr_bbox = get_bbox({x[0], x[1]}, {x[2], x[3]}); + } + return {corr_bbox, corr_kpts}; +} + +float TrackingFilter::BboxDistance(const Bbox& bbox) { + auto mean_scale = get_mean_scale(bbox_filter_.statePre.at(2), // + bbox_filter_.statePre.at(3)); + SetBboxMeasurementCov(std_weight_position_ * mean_scale); + auto c = get_center(bbox); + auto s = get_scale(bbox); + std::array m{c.x, c.y, s[0], s[1]}; + cv::Mat z = as_mat(m); + auto& f = bbox_filter_; + cv::Mat sigma; + cv::gemm(f.measurementMatrix * f.errorCovPre, f.measurementMatrix, 1, f.measurementNoiseCov, 1, + sigma, cv::GEMM_2_T); + cv::Mat r = z - f.measurementMatrix * f.statePre; + // ignore contribution of scales as it is unstable when inferred from key-points + r.at(2) = 0; + r.at(3) = 0; + cv::Mat d = r.t() * sigma.inv() * r; + return d.at(); +} + +vector TrackingFilter::KeyPointDistance(const Points& kpts) { + auto mean_scale = get_mean_scale(bbox_filter_.statePre.at(2), // + bbox_filter_.statePre.at(3)); + + const auto n = pt_filters_.size(); + vector dists(n); + for (int i = 0; i < n; ++i) { + SetKeyPointMeasurementCov(i, std_weight_position_ * mean_scale); + std::array m{kpts[i].x, kpts[i].y}; + cv::Mat z = as_mat(m); + auto& f = pt_filters_[i]; + cv::Mat sigma; + cv::gemm(f.measurementMatrix * f.errorCovPre, f.measurementMatrix, 1, f.measurementNoiseCov, 1, + sigma, cv::GEMM_2_T); + cv::Mat r = z - f.measurementMatrix * f.statePre; + cv::Mat d = r.t() * sigma.inv() * r; + dists[i] = d.at(); + } + return dists; +} + +void TrackingFilter::SetBboxProcessCov(float sigma_p, float sigma_v) { + auto& m = bbox_filter_.processNoiseCov; + cv::setIdentity(m(cv::Rect(0, 0, 4, 4)), sigma_p * sigma_p); + cv::setIdentity(m(cv::Rect(4, 4, 4, 4)), sigma_v * sigma_v); +} +void TrackingFilter::SetBboxMeasurementCov(float sigma_p) { + auto& m = bbox_filter_.measurementNoiseCov; + cv::setIdentity(m, sigma_p * sigma_p); +} +void TrackingFilter::SetBboxErrorCov(float sigma_p, float sigma_v) { + auto& m = bbox_filter_.errorCovPost; + cv::setIdentity(m(cv::Rect(0, 0, 4, 4)), sigma_p * sigma_p); + cv::setIdentity(m(cv::Rect(4, 4, 4, 4)), sigma_v * sigma_v); +} +void TrackingFilter::SetBboxTransitionMat() { + auto& m = bbox_filter_.transitionMatrix; + cv::setIdentity(m(cv::Rect(4, 0, 4, 4))); // with scale velocity + // cv::setIdentity(m(cv::Rect(4, 0, 2, 2))); // w/o scale velocity +} +void TrackingFilter::SetBboxMeasurementMat() { + auto& m = bbox_filter_.measurementMatrix; + cv::setIdentity(m(cv::Rect(0, 0, 4, 4))); +} + +void TrackingFilter::SetKeyPointProcessCov(int index, float sigma_p, float sigma_v) { + auto& m = pt_filters_[index].processNoiseCov; + m.at(0, 0) = sigma_p * sigma_p; + m.at(1, 1) = sigma_p * sigma_p; + m.at(2, 2) = sigma_v * sigma_v; + m.at(3, 3) = sigma_v * sigma_v; +} +void TrackingFilter::SetKeyPointMeasurementCov(int index, float sigma_p) { + auto& m = pt_filters_[index].measurementNoiseCov; + m.at(0, 0) = sigma_p * sigma_p; + m.at(1, 1) = sigma_p * sigma_p; +} +void TrackingFilter::SetKeyPointErrorCov(int index, float sigma_p, float sigma_v) { + auto& m = pt_filters_[index].errorCovPost; + m.at(0, 0) = sigma_p * sigma_p; + m.at(1, 1) = sigma_p * sigma_p; + m.at(2, 2) = sigma_v * sigma_v; + m.at(3, 3) = sigma_v * sigma_v; +} +void TrackingFilter::SetKeyPointTransitionMat(int index) { + auto& m = pt_filters_[index].transitionMatrix; + cv::setIdentity(m(cv::Rect(2, 0, 2, 2))); +} +void TrackingFilter::SetKeyPointMeasurementMat(int index) { + auto& m = pt_filters_[index].measurementMatrix; + cv::setIdentity(m(cv::Rect(0, 0, 2, 2))); +} + +void TrackingFilter::ResetKeyPoint(int index, const Point& kpt, float scale) { + auto& f = pt_filters_[index]; + SetKeyPointErrorCov(index, 2 * std_weight_position_ * scale, 10 * std_weight_velocity_ * scale); + f.statePost.at(0) = kpt.x; + f.statePost.at(1) = kpt.y; +} + +} // namespace mmdeploy::mmpose::_pose_tracker diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.h new file mode 100644 index 0000000000..593fddc369 --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.h @@ -0,0 +1,50 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACKING_FILTER_H +#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACKING_FILTER_H + +#include "opencv2/video/video.hpp" +#include "pose_tracker/utils.h" + +namespace mmdeploy::mmpose::_pose_tracker { + +// use Kalman filter to estimate and predict true states +class TrackingFilter { + public: + TrackingFilter(const Bbox& bbox, const vector& kpts, float std_weight_position, + float std_weight_velocity); + + std::pair Predict(); + + vector KeyPointDistance(const Points& kpts); + + float BboxDistance(const Bbox& bbox); + + std::pair Correct(const Bbox& bbox, const Points& kpts, + const vector& tracked); + + private: + void SetBboxProcessCov(float sigma_p, float sigma_v); + void SetBboxMeasurementCov(float sigma_p); + void SetBboxErrorCov(float sigma_p, float sigma_v); + void SetBboxTransitionMat(); + void SetBboxMeasurementMat(); + + void SetKeyPointProcessCov(int index, float sigma_p, float sigma_v); + void SetKeyPointMeasurementCov(int index, float sigma_p); + void SetKeyPointErrorCov(int index, float sigma_p, float sigma_v); + void SetKeyPointTransitionMat(int index); + void SetKeyPointMeasurementMat(int index); + + void ResetKeyPoint(int index, const Point& kpt, float scale); + + private: + std::vector pt_filters_; + cv::KalmanFilter bbox_filter_; + float std_weight_position_; + float std_weight_velocity_; +}; + +} // namespace mmdeploy::mmpose::_pose_tracker + +#endif // MMDEPLOY_TRACKING_FILTER_H diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.cpp new file mode 100644 index 0000000000..e264a7b4f1 --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.cpp @@ -0,0 +1,136 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "pose_tracker/utils.h" + +namespace mmdeploy::mmpose::_pose_tracker { + +vector> greedy_assignment(const vector& scores, + vector& is_valid_row, + vector& is_valid_col, float thr) { + const auto n_rows = is_valid_row.size(); + const auto n_cols = is_valid_col.size(); + vector> assignment; + assignment.reserve(std::max(n_rows, n_cols)); + while (true) { + auto max_score = std::numeric_limits::lowest(); + int max_row = -1; + int max_col = -1; + for (int i = 0; i < n_rows; ++i) { + if (is_valid_row[i]) { + for (int j = 0; j < n_cols; ++j) { + if (is_valid_col[j]) { + if (scores[i * n_cols + j] > max_score) { + max_score = scores[i * n_cols + j]; + max_row = i; + max_col = j; + } + } + } + } + } + if (max_score < thr) { + break; + } + is_valid_row[max_row] = 0; + is_valid_col[max_col] = 0; + assignment.emplace_back(max_row, max_col, max_score); + } + return assignment; +} + +float intersection_over_union(const Bbox& a, const Bbox& b) { + auto x1 = std::max(a[0], b[0]); + auto y1 = std::max(a[1], b[1]); + auto x2 = std::min(a[2], b[2]); + auto y2 = std::min(a[3], b[3]); + + auto inter_area = std::max(0.f, x2 - x1) * std::max(0.f, y2 - y1); + + auto a_area = get_area(a); + auto b_area = get_area(b); + auto union_area = a_area + b_area - inter_area; + + if (union_area == 0.f) { + return 0; + } + + return inter_area / union_area; +} + +float object_keypoint_similarity(const Points& pts_a, const Bbox& box_a, const Points& pts_b, + const Bbox& box_b, const vector& sigmas) { + assert(pts_a.size() == sigmas.size()); + assert(pts_b.size() == sigmas.size()); + auto scale = [](const Bbox& bbox) -> float { + auto a = bbox[2] - bbox[0]; + auto b = bbox[3] - bbox[1]; + return std::sqrt(a * a + b * b); + }; + auto oks = [](const Point& pa, const Point& pb, float s, float k) { + return std::exp(-(pa - pb).dot(pa - pb) / (2.f * s * s * k * k)); + }; + auto sum = 0.f; + const auto s = .5f * (scale(box_a) + scale(box_b)); + for (int i = 0; i < sigmas.size(); ++i) { + sum += oks(pts_a[i], pts_b[i], s, sigmas[i]); + } + sum /= static_cast(sigmas.size()); + return sum; +} + +std::optional keypoints_to_bbox(const Points& keypoints, const Scores& scores, float img_h, + float img_w, float scale, float kpt_thr, int min_keypoints) { + int valid = 0; + auto x1 = static_cast(img_w); + auto y1 = static_cast(img_h); + auto x2 = 0.f; + auto y2 = 0.f; + for (size_t i = 0; i < keypoints.size(); ++i) { + auto& kpt = keypoints[i]; + if (scores[i] >= kpt_thr) { + x1 = std::min(x1, kpt.x); + y1 = std::min(y1, kpt.y); + x2 = std::max(x2, kpt.x); + y2 = std::max(y2, kpt.y); + ++valid; + } + } + if (min_keypoints < 0) { + min_keypoints = (static_cast(scores.size()) + 1) / 2; + } + if (valid < min_keypoints) { + return std::nullopt; + } + auto xc = .5f * (x1 + x2); + auto yc = .5f * (y1 + y2); + auto w = (x2 - x1) * scale; + auto h = (y2 - y1) * scale; + + return std::array{ + std::max(0.f, std::min(img_w, xc - .5f * w)), + std::max(0.f, std::min(img_h, yc - .5f * h)), + std::max(0.f, std::min(img_w, xc + .5f * w)), + std::max(0.f, std::min(img_h, yc + .5f * h)), + }; +} + +Bbox map_bbox(const Bbox& box) { + Point p0(box[0], box[1]); + Point p1(box[2], box[3]); + auto c = .5f * (p0 + p1); + auto s = p1 - p0; + static constexpr std::array image_size{192.f, 256.f}; + float aspect_ratio = image_size[0] * 1.0 / image_size[1]; + if (s.x > aspect_ratio * s.y) { + s.y = s.x / aspect_ratio; + } else if (s.x < aspect_ratio * s.y) { + s.x = s.y * aspect_ratio; + } + s.x *= 1.25f; + s.y *= 1.25f; + p0 = c - .5f * s; + p1 = c + .5f * s; + return {p0.x, p0.y, p1.x, p1.y}; +} + +} // namespace mmdeploy::mmpose::_pose_tracker diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.h new file mode 100644 index 0000000000..3643c99387 --- /dev/null +++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.h @@ -0,0 +1,96 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_UTILS_H +#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_UTILS_H + +#include +#include +#include +#include + +#include "mmdeploy/core/utils/formatter.h" +#include "opencv2/core/core.hpp" +#include "pose_tracker/common.h" + +namespace mmdeploy::mmpose::_pose_tracker { + +using std::vector; +using Bbox = std::array; +using Bboxes = vector; +using Point = cv::Point2f; +using Points = vector; +using Score = float; +using Scores = vector; + +#define POSE_TRACKER_DEBUG(...) MMDEPLOY_INFO(__VA_ARGS__) + +// opencv3 can't construct cv::Mat from std::array +template +cv::Mat as_mat(const std::array& a) { + return cv::Mat_(a.size(), 1, const_cast(a.data())); +} + +// scale = 1.5, kpt_thr = 0.3 +std::optional keypoints_to_bbox(const Points& keypoints, const Scores& scores, float img_h, + float img_w, float scale, float kpt_thr, int min_keypoints); + +// xyxy format +float intersection_over_union(const Bbox& a, const Bbox& b); + +float object_keypoint_similarity(const Points& pts_a, const Bbox& box_a, const Points& pts_b, + const Bbox& box_b, const vector& sigmas); + +template +void suppress_non_maximum(const vector& scores, const vector& similarities, + vector& is_valid, float thresh); + +inline float get_area(const Bbox& bbox) { return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]); } + +inline Point get_center(const Bbox& bbox) { + return {.5f * (bbox[0] + bbox[2]), .5f * (bbox[1] + bbox[3])}; +} + +inline std::array get_scale(const Bbox& bbox) { + return {bbox[2] - bbox[0], bbox[3] - bbox[1]}; +} + +inline Bbox get_bbox(const Point& center, const std::array& scale) { + return { + center.x - .5f * scale[0], + center.y - .5f * scale[1], + center.x + .5f * scale[0], + center.y + .5f * scale[1], + }; +} + +vector> greedy_assignment(const vector& scores, + vector& is_valid_row, + vector& is_valid_col, float thr); + +template +inline void suppress_non_maximum(const vector& scores, const vector& similarities, + vector& is_valid, float thresh) { + assert(is_valid.size() == scores.size()); + vector indices(scores.size()); + std::iota(indices.begin(), indices.end(), 0); + std::sort(indices.begin(), indices.end(), [&](int i, int j) { return scores[i] > scores[j]; }); + // suppress similar samples + for (int i = 0; i < indices.size(); ++i) { + if (auto u = indices[i]; is_valid[u]) { + for (int j = i + 1; j < indices.size(); ++j) { + if (auto v = indices[j]; is_valid[v]) { + if (similarities[u * scores.size() + v] >= thresh) { + is_valid[v] = false; + } + } + } + } + } +} + +// TopDownAffine's internal logic for mapping pose model inputs +Bbox map_bbox(const Bbox& box); + +} // namespace mmdeploy::mmpose::_pose_tracker + +#endif // MMDEPLOY_UTILS_H diff --git a/csrc/mmdeploy/experimental/module_adapter.h b/csrc/mmdeploy/experimental/module_adapter.h index 74dd9d9b86..763a71ba6b 100644 --- a/csrc/mmdeploy/experimental/module_adapter.h +++ b/csrc/mmdeploy/experimental/module_adapter.h @@ -9,7 +9,7 @@ namespace mmdeploy { -namespace module_detail { +namespace module_adapter { template struct is_tuple : std::false_type {}; @@ -20,16 +20,13 @@ struct is_tuple> : std::true_type {}; template inline constexpr auto is_tuple_v = is_tuple::value; -template +template struct InvokeImpl { - template - static Result apply(F&& f, const Value& params, Ts&&... ts) { - std::tuple...> args; + template + static Result apply(F&& f, const Value& args) { try { - from_value(params, args); - auto ret = apply_impl(std::forward(f), std::move(args), std::index_sequence_for{}, - std::forward(ts)...); - return make_ret_val(std::move(ret)); + using ArgsType = std::tuple...>; + return make_ret_val(std::apply((F &&) f, from_value(args))); } catch (const std::exception& e) { MMDEPLOY_ERROR("unhandled exception: {}", e.what()); return Status(eFail); @@ -38,15 +35,9 @@ struct InvokeImpl { } } - template - static decltype(auto) apply_impl(F&& f, Tuple&& tuple, std::index_sequence, Ts&&... ts) { - return std::invoke(std::forward(f), std::forward(ts)..., - std::get(std::forward(tuple))...); - } - template > static Result make_ret_val(T&& ret) { - if constexpr (module_detail::is_tuple_v) { + if constexpr (is_tuple_v) { return to_value(std::forward(ret)); } else if constexpr (is_result_v) { return ret ? make_ret_val(std::forward(ret).value()) : std::forward(ret).as_failure(); @@ -56,60 +47,49 @@ struct InvokeImpl { } }; -// function pointer +// match function pointer template Result Invoke(Ret (*f)(Args...), const Value& args) { - return InvokeImpl::apply(f, args); + return InvokeImpl::apply(f, args); } -// member function pointer -template -Result Invoke(Ret (C0::*f)(Args...) const, C1* inst, const Value& args) { - return InvokeImpl::apply(f, args, inst); +// match member function pointer `&C::operator()` +template +Result Invoke(Ret (C::*)(Args...) const, const F& f, const Value& args) { + return InvokeImpl::apply(f, args); } -template -Result Invoke(Ret (C0::*f)(Args...), C1* inst, const Value& args) { - return InvokeImpl::apply(f, args, inst); +template +Result Invoke(Ret (C::*)(Args...), F& f, const Value& args) { + return InvokeImpl::apply(f, args); } -// function object -template , +// match function object +template , typename = std::void_t> -Result Invoke(T&& t, const Value& args) { - return Invoke(&C::operator(), &t, args); +Result Invoke(F&& f, const Value& args) { + return Invoke(&C::operator(), (F &&) f, args); } -template -struct IsPointer : std::false_type {}; -template -struct IsPointer : std::false_type {}; -template -struct IsPointer> : std::true_type {}; -template -struct IsPointer> : std::true_type {}; -template -struct IsPointer : std::true_type {}; - -template -struct AccessPolicy { - static constexpr auto apply = [](auto& x) -> decltype(auto) { return x; }; -}; -template -struct AccessPolicy::value>> { - static constexpr auto apply = [](auto& x) -> decltype(auto) { return *x; }; -}; +template +Result Invoke(const std::unique_ptr& f, const Value& args) { + return Invoke(*f, args); +} +template +Result Invoke(const std::shared_ptr& f, const Value& args) { + return Invoke(*f, args); +} -template > +template class Task : public Module { public: - explicit Task(T task) : task_(std::move(task)) {} + explicit Task(Func func) : func_(std::move(func)) {} Result Process(const Value& arg) override { - return module_detail::Invoke(A::apply(task_), arg); + return ::mmdeploy::module_adapter::Invoke(func_, arg); } private: - T task_; + Func func_; }; template @@ -122,11 +102,10 @@ auto MakeTask(T&& x) { return Task(std::forward(x)); } -} // namespace module_detail - -using module_detail::CreateTask; +} // namespace module_adapter -using module_detail::MakeTask; +using module_adapter::CreateTask; +using module_adapter::MakeTask; } // namespace mmdeploy diff --git a/csrc/mmdeploy/graph/inference.cpp b/csrc/mmdeploy/graph/inference.cpp index ec6cb59076..9f722d0e62 100644 --- a/csrc/mmdeploy/graph/inference.cpp +++ b/csrc/mmdeploy/graph/inference.cpp @@ -41,7 +41,7 @@ Result> InferenceBuilder::BuildImpl() { } pipeline_config["context"] = context; - MMDEPLOY_INFO("{}", pipeline_config); + MMDEPLOY_DEBUG("{}", pipeline_config); OUTCOME_TRY(auto pipeline_builder, Builder::CreateFromConfig(pipeline_config)); OUTCOME_TRY(auto node, pipeline_builder->Build()); diff --git a/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp b/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp index 8bf3232d03..e12ae06a1e 100644 --- a/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp +++ b/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp @@ -21,15 +21,17 @@ class CropResizePadImpl : public CropResizePad { Tensor dst_tensor(desc); cudaMemsetAsync(dst_tensor.data(), 0, dst_tensor.byte_size(), cuda_stream); - if (src.data_type() == DataType::kINT8) { - OUTCOME_TRY( - ResizeDispatch(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream)); - } else if (src.data_type() == DataType::kFLOAT) { - OUTCOME_TRY( - ResizeDispatch(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream)); - } else { - MMDEPLOY_ERROR("unsupported data type {}", src.data_type()); - return Status(eNotSupported); + if (crop_rect[2] - crop_rect[0] + 1 > 0 && crop_rect[3] - crop_rect[1] + 1 > 0) { + if (src.data_type() == DataType::kINT8) { + OUTCOME_TRY(ResizeDispatch(src, crop_rect, target_size, pad_rect, dst_tensor, + cuda_stream)); + } else if (src.data_type() == DataType::kFLOAT) { + OUTCOME_TRY( + ResizeDispatch(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream)); + } else { + MMDEPLOY_ERROR("unsupported data type {}", src.data_type()); + return Status(eNotSupported); + } } dst = std::move(dst_tensor); diff --git a/csrc/mmdeploy/utils/opencv/opencv_utils.cpp b/csrc/mmdeploy/utils/opencv/opencv_utils.cpp index 17a87bbf9b..d3cd1ad87b 100644 --- a/csrc/mmdeploy/utils/opencv/opencv_utils.cpp +++ b/csrc/mmdeploy/utils/opencv/opencv_utils.cpp @@ -295,10 +295,12 @@ cv::Mat CropResizePad(const cv::Mat& src, const std::vector& crop_rect, int width = target_size[0] + pad_rect[1] + pad_rect[3]; int height = target_size[1] + pad_rect[0] + pad_rect[2]; cv::Mat dst = cv::Mat::zeros(height, width, src.type()); - cv::Rect roi1 = {crop_rect[1], crop_rect[0], crop_rect[3] - crop_rect[1] + 1, - crop_rect[2] - crop_rect[0] + 1}; - cv::Rect roi2 = {pad_rect[1], pad_rect[0], target_size[0], target_size[1]}; - cv::resize(src(roi1), dst(roi2), {target_size[0], target_size[1]}); + if (crop_rect[2] - crop_rect[0] + 1 > 0 && crop_rect[3] - crop_rect[1] + 1 > 0) { + cv::Rect roi1 = {crop_rect[1], crop_rect[0], crop_rect[3] - crop_rect[1] + 1, + crop_rect[2] - crop_rect[0] + 1}; + cv::Rect roi2 = {pad_rect[1], pad_rect[0], target_size[0], target_size[1]}; + cv::resize(src(roi1), dst(roi2), {target_size[0], target_size[1]}); + } return dst; } diff --git a/demo/csrc/CMakeLists.txt b/demo/csrc/CMakeLists.txt index 85ffc6f32c..8d25e43597 100644 --- a/demo/csrc/CMakeLists.txt +++ b/demo/csrc/CMakeLists.txt @@ -41,7 +41,6 @@ add_example(rotated_detector c rotated_object_detection) add_example(video_recognizer c video_recognition) # TODO: figure out a better way # add_example("" c det_cls) -# add_example("" c det_pose) if (MMDEPLOY_BUILD_SDK_CXX_API) add_example(classifier cpp classifier) @@ -52,6 +51,7 @@ if (MMDEPLOY_BUILD_SDK_CXX_API) add_example(text_detector cpp text_det_recog) add_example(pose_detector cpp pose_detector) add_example(rotated_detector cpp rotated_detector) - add_example(pose_detector cpp pose_tracker) + add_example(pose_tracker cpp pose_tracker) + add_example(pose_detector cpp det_pose) add_example(video_recognizer cpp video_cls) endif () diff --git a/demo/csrc/cpp/det_pose.cpp b/demo/csrc/cpp/det_pose.cpp new file mode 100644 index 0000000000..a12dd3c0b1 --- /dev/null +++ b/demo/csrc/cpp/det_pose.cpp @@ -0,0 +1,113 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include + +#include "mmdeploy/detector.hpp" +#include "mmdeploy/pose_detector.hpp" +#include "opencv2/imgcodecs/imgcodecs.hpp" +#include "opencv2/imgproc/imgproc.hpp" + +using std::vector; + +cv::Mat Visualize(cv::Mat frame, const std::vector& poses, int size); + +int main(int argc, char* argv[]) { + const auto device_name = argv[1]; + const auto det_model_path = argv[2]; + const auto pose_model_path = argv[3]; + const auto image_path = argv[4]; + + if (argc != 5) { + std::cerr << "usage:\n\tpose_tracker device_name det_model_path pose_model_path image_path\n"; + return -1; + } + auto img = cv::imread(image_path); + if (!img.data) { + std::cerr << "failed to load image: " << image_path << "\n"; + return -1; + } + + using namespace mmdeploy; + + Context context(Device{device_name}); // create context for holding the device handle + Detector detector(Model(det_model_path), context); // create object detector + PoseDetector pose(Model(pose_model_path), context); // create pose detector + + // apply detector + auto dets = detector.Apply(img); + + // filter detections and extract bboxes for pose model + std::vector bboxes; + for (const auto& det : dets) { + if (det.label_id == 0 && det.score > .6f) { + bboxes.push_back(det.bbox); + } + } + // apply pose detector + auto poses = pose.Apply(img, bboxes); + + // visualize + auto vis = Visualize(img, {poses.begin(), poses.end()}, 1280); + cv::imwrite("det_pose_output.jpg", vis); + + return 0; +} + +struct Skeleton { + vector> skeleton; + vector palette; + vector link_color; + vector point_color; +}; + +const Skeleton& gCocoSkeleton() { + static const Skeleton inst{ + { + {15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, {6, 12}, + {5, 6}, {5, 7}, {6, 8}, {7, 9}, {8, 10}, {1, 2}, {0, 1}, + {0, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}, + }, + { + {255, 128, 0}, {255, 153, 51}, {255, 178, 102}, {230, 230, 0}, {255, 153, 255}, + {153, 204, 255}, {255, 102, 255}, {255, 51, 255}, {102, 178, 255}, {51, 153, 255}, + {255, 153, 153}, {255, 102, 102}, {255, 51, 51}, {153, 255, 153}, {102, 255, 102}, + {51, 255, 51}, {0, 255, 0}, {0, 0, 255}, {255, 0, 0}, {255, 255, 255}, + }, + {0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16}, + {16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0}, + }; + return inst; +} + +cv::Mat Visualize(cv::Mat frame, const vector& poses, int size) { + auto& [skeleton, palette, link_color, point_color] = gCocoSkeleton(); + auto scale = (float)size / (float)std::max(frame.cols, frame.rows); + if (scale != 1) { + cv::resize(frame, frame, {}, scale, scale); + } else { + frame = frame.clone(); + } + for (const auto& pose : poses) { + vector kpts(&pose.point[0].x, &pose.point[pose.length - 1].y + 1); + vector scores(pose.score, pose.score + pose.length); + std::for_each(kpts.begin(), kpts.end(), [&](auto& x) { x *= scale; }); + constexpr auto score_thr = .5f; + vector used(kpts.size()); + for (size_t i = 0; i < skeleton.size(); ++i) { + auto [u, v] = skeleton[i]; + if (scores[u] > score_thr && scores[v] > score_thr) { + used[u] = used[v] = 1; + cv::Point p_u(kpts[u * 2], kpts[u * 2 + 1]); + cv::Point p_v(kpts[v * 2], kpts[v * 2 + 1]); + cv::line(frame, p_u, p_v, palette[link_color[i]], 1, cv::LINE_AA); + } + } + for (size_t i = 0; i < kpts.size(); i += 2) { + if (used[i / 2]) { + cv::Point p(kpts[i], kpts[i + 1]); + cv::circle(frame, p, 1, palette[point_color[i / 2]], 2, cv::LINE_AA); + } + } + } + return frame; +} diff --git a/demo/csrc/cpp/pose_tracker.cpp b/demo/csrc/cpp/pose_tracker.cpp index ec95afe03d..077b3d39ea 100644 --- a/demo/csrc/cpp/pose_tracker.cpp +++ b/demo/csrc/cpp/pose_tracker.cpp @@ -1,1064 +1,180 @@ +// Copyright (c) OpenMMLab. All rights reserved. +#include "mmdeploy/pose_tracker.hpp" -#include -#include +#include -#include "mmdeploy/archive/json_archive.h" -#include "mmdeploy/archive/value_archive.h" -#include "mmdeploy/common.hpp" -#include "mmdeploy/core/mat.h" -#include "mmdeploy/core/module.h" -#include "mmdeploy/core/utils/formatter.h" -#include "mmdeploy/experimental/module_adapter.h" -#include "mmdeploy/pipeline.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/videoio.hpp" +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/imgcodecs/imgcodecs.hpp" +#include "opencv2/imgproc/imgproc.hpp" +#include "opencv2/videoio/videoio.hpp" -const auto config_json = R"( -{ - "type": "Pipeline", - "input": ["img", "use_det", "state"], - "output": "targets", - "tasks": [ - { - "type": "Task", - "module": "Transform", - "name": "preload", - "input": "img", - "output": "data", - "transforms": [ { "type": "LoadImageFromFile" } ] - }, - { - "type": "Cond", - "input": ["use_det", "data"], - "output": "dets", - "body": { - "name": "detection", - "type": "Inference", - "params": { "model": "detection" } - } - }, - { - "type": "Task", - "module": "ProcessBboxes", - "input": ["dets", "data", "state"], - "output": ["rois", "track_ids"] - }, - { - "input": "*rois", - "output": "*keypoints", - "name": "pose", - "type": "Inference", - "params": { "model": "pose" } - }, - { - "type": "Task", - "module": "TrackPose", - "scheduler": "pool", - "input": ["keypoints", "track_ids", "state"], - "output": "targets" - } - ] -} -)"_json; - -namespace mmdeploy { - -#define REGISTER_SIMPLE_MODULE(name, fn) \ - MMDEPLOY_REGISTER_FACTORY_FUNC(Module, (name, 0), [](const Value&) { return CreateTask(fn); }); - -#define POSE_TRACKER_DEBUG(...) MMDEPLOY_INFO(__VA_ARGS__) - -using std::vector; -using Bbox = std::array; -using Bboxes = vector; -using Point = cv::Point2f; -using Points = vector; -using Score = float; -using Scores = vector; - -// scale = 1.5, kpt_thr = 0.3 -std::optional keypoints_to_bbox(const Points& keypoints, const Scores& scores, float img_h, - float img_w, float scale, float kpt_thr, int min_keypoints) { - int valid = 0; - auto x1 = static_cast(img_w); - auto y1 = static_cast(img_h); - auto x2 = 0.f; - auto y2 = 0.f; - for (size_t i = 0; i < keypoints.size(); ++i) { - auto& kpt = keypoints[i]; - if (scores[i] >= kpt_thr) { - x1 = std::min(x1, kpt.x); - y1 = std::min(y1, kpt.y); - x2 = std::max(x2, kpt.x); - y2 = std::max(y2, kpt.y); - ++valid; - } - } - if (min_keypoints < 0) { - min_keypoints = (static_cast(scores.size()) + 1) / 2; - } - if (valid < min_keypoints) { - return std::nullopt; - } - auto xc = .5f * (x1 + x2); - auto yc = .5f * (y1 + y2); - auto w = (x2 - x1) * scale; - auto h = (y2 - y1) * scale; - - return std::array{ - std::max(0.f, std::min(img_w, xc - .5f * w)), - std::max(0.f, std::min(img_h, yc - .5f * h)), - std::max(0.f, std::min(img_w, xc + .5f * w)), - std::max(0.f, std::min(img_h, yc + .5f * h)), - }; -} - -class Filter { - public: - virtual ~Filter() = default; - virtual cv::Mat_ Predict(float t) = 0; - virtual cv::Mat_ Correct(const cv::Mat_& x) = 0; -}; - -class OneEuroFilter : public Filter { - public: - explicit OneEuroFilter(const cv::Mat_& x, float beta, float fc_min, float fc_d) - : x_(x.clone()), beta_(beta), fc_min_(fc_min), fc_d_(fc_d) { - v_ = cv::Mat::zeros(x_.size(), x.type()); - } - - cv::Mat_ Predict(float t) override { return x_ + v_; } - - cv::Mat_ Correct(const cv::Mat_& x) override { - auto a_v = SmoothingFactor(fc_d_); - v_ = ExponentialSmoothing(a_v, x - x_, v_); - auto fc = fc_min_ + beta_ * (float)cv::norm(v_); - auto a_x = SmoothingFactor(fc); - x_ = ExponentialSmoothing(a_x, x, x_); - return x_.clone(); - } - - private: - static float SmoothingFactor(float cutoff) { - static constexpr float kPi = 3.1415926; - auto r = 2 * kPi * cutoff; - return r / (r + 1); - } - - static cv::Mat_ ExponentialSmoothing(float a, const cv::Mat_& x, - const cv::Mat_& x0) { - return a * x + (1 - a) * x0; - } - - private: - cv::Mat_ x_; - cv::Mat_ v_; - float beta_; - float fc_min_; - float fc_d_; -}; - -template -class PointFilterArray : public Filter { - public: - template - explicit PointFilterArray(const Points& ps, const Args&... args) { - for (const auto& p : ps) { - fs_.emplace_back(cv::Mat_(p, false), args...); - } - } - - cv::Mat_ Predict(float t) override { - cv::Mat_ m(fs_.size() * 2, 1); - for (int i = 0; i < fs_.size(); ++i) { - cv::Range r(i * 2, i * 2 + 2); - fs_[i].Predict(1).copyTo(m.rowRange(r)); - } - return m.reshape(0, fs_.size()); - } - - cv::Mat_ Correct(const cv::Mat_& x) override { - cv::Mat_ m(fs_.size() * 2, 1); - auto _x = x.reshape(1, x.rows * x.cols); - for (int i = 0; i < fs_.size(); ++i) { - cv::Range r(i * 2, i * 2 + 2); - fs_[i].Correct(_x.rowRange(r)).copyTo(m.rowRange(r)); - } - return m.reshape(0, fs_.size()); - } - - private: - vector fs_; -}; - -class TrackerFilter { - public: - using Points = vector; - - explicit TrackerFilter(float c_beta, float c_fc_min, float c_fc_d, float k_beta, float k_fc_min, - float k_fc_d, const Bbox& bbox, const Points& kpts) - : n_kpts_(kpts.size()) { - c_ = std::make_unique(cv::Mat_(Center(bbox)), c_beta, c_fc_min, c_fc_d); - s_ = std::make_unique(cv::Mat_(Scale(bbox)), 0, 1, 0); - kpts_ = std::make_unique>(kpts, k_beta, k_fc_min, k_fc_d); - } - - std::pair Predict() { - cv::Point2f c; - c_->Predict(1).copyTo(cv::Mat(c, false)); - cv::Point2f s; - s_->Predict(0).copyTo(cv::Mat(s, false)); - Points p(n_kpts_); - kpts_->Predict(1).copyTo(cv::Mat(p, false).reshape(1)); - return {GetBbox(c, s), std::move(p)}; - } - - std::pair Correct(const Bbox& bbox, const Points& kpts) { - cv::Point2f c; - c_->Correct(cv::Mat_(Center(bbox), false)).copyTo(cv::Mat(c, false)); - cv::Point2f s; - s_->Correct(cv::Mat_(Scale(bbox), false)).copyTo(cv::Mat(s, false)); - Points p(kpts.size()); - kpts_->Correct(cv::Mat(kpts, false)).copyTo(cv::Mat(p, false).reshape(1)); - return {GetBbox(c, s), std::move(p)}; - } - - private: - static cv::Point2f Center(const Bbox& bbox) { - return {.5f * (bbox[0] + bbox[2]), .5f * (bbox[1] + bbox[3])}; - } - static cv::Point2f Scale(const Bbox& bbox) { - return {bbox[2] - bbox[0], bbox[3] - bbox[1]}; - // return {std::log(bbox[2] - bbox[0]), std::log(bbox[3] - bbox[1])}; - } - static Bbox GetBbox(const cv::Point2f& center, const cv::Point2f& scale) { - // cv::Point2f half_size(.5 * std::exp(scale.x), .5 * std::exp(scale.y)); - Point half_size(.5f * scale.x, .5f * scale.y); - auto lo = center - half_size; - auto hi = center + half_size; - return {lo.x, lo.y, hi.x, hi.y}; - } - int n_kpts_; - std::unique_ptr c_; - std::unique_ptr s_; - std::unique_ptr kpts_; -}; - -struct Track { - vector keypoints; - vector scores; - vector avg_scores; - vector bboxes; - vector is_missing; - int64_t track_id{-1}; - std::shared_ptr filter; - Bbox bbox_pred{}; - Points kpts_pred; - int64_t age{0}; - int64_t n_missing{0}; -}; - -struct TrackInfo { - vector tracks; - int64_t next_id{0}; -}; - -static inline float Area(const Bbox& bbox) { return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]); } - -struct TrackerParams { - // detector params - int det_interval = 5; // detection interval - int det_label = 0; // label used to filter detections - float det_min_bbox_size = 100; // threshold for sqrt(area(bbox)) - float det_thr = .5f; // confidence threshold used to filter detections - float det_nms_thr = .7f; // detection nms threshold - - // pose model params - int pose_max_num_bboxes = 1; // max num of bboxes for pose model per frame - int pose_min_keypoints = -1; // min of visible key-points for valid bbox, -1 -> len(kpts)/2 - float pose_min_bbox_size = 64; // threshold for sqrt(area(bbox)) - vector sigmas; // sigmas for key-points - - // tracker params - float track_nms_oks_thr = .5f; // OKS threshold for suppressing duplicated key-points - float track_kpts_thr = .6f; // threshold for key-point visibility - float track_oks_thr = .3f; // OKS assignment threshold - float track_iou_thr = .3f; // IOU assignment threshold - float track_bbox_scale = 1.25f; // scale factor for bboxes - int track_max_missing = 10; // max number of missing frames before track removal - float track_missing_momentum = .95f; // extrapolation momentum for missing tracks - int track_n_history = 10; // track history length - - // filter params for bbox center - float filter_c_beta = .005; - float filter_c_fc_min = .05; - float filter_c_fc_d = 1.; - // filter params for key-points - float filter_k_beta = .0075; - float filter_k_fc_min = .1; - float filter_k_fc_d = .25; +struct Args { + std::string device; + std::string det_model; + std::string pose_model; + std::string video; + std::string output_dir; }; -class Tracker { - public: - explicit Tracker(const TrackerParams& _params) : params(_params) {} - // xyxy format - float IntersectionOverUnion(const std::array& a, const std::array& b) { - auto x1 = std::max(a[0], b[0]); - auto y1 = std::max(a[1], b[1]); - auto x2 = std::min(a[2], b[2]); - auto y2 = std::min(a[3], b[3]); - - auto inter_area = std::max(0.f, x2 - x1) * std::max(0.f, y2 - y1); - - auto a_area = Area(a); - auto b_area = Area(b); - auto union_area = a_area + b_area - inter_area; - - if (union_area == 0.f) { - return 0; - } - - return inter_area / union_area; - } - - // TopDownAffine's internal logic for mapping pose detector inputs - Bbox MapBbox(const Bbox& box) { - Point p0(box[0], box[1]); - Point p1(box[2], box[3]); - auto c = .5f * (p0 + p1); - auto s = p1 - p0; - static constexpr std::array image_size{192.f, 256.f}; - float aspect_ratio = image_size[0] * 1.0 / image_size[1]; - if (s.x > aspect_ratio * s.y) { - s.y = s.x / aspect_ratio; - } else if (s.x < aspect_ratio * s.y) { - s.x = s.y * aspect_ratio; - } - s.x *= 1.25f; - s.y *= 1.25f; - p0 = c - .5f * s; - p1 = c + .5f * s; - return {p0.x, p0.y, p1.x, p1.y}; - } - - template - vector SuppressNonMaximum(const vector& scores, const vector& similarities, - vector is_valid, float thresh) { - assert(is_valid.size() == scores.size()); - vector indices(scores.size()); - std::iota(indices.begin(), indices.end(), 0); - // stable sort, useful when the scores are equal - std::sort(indices.begin(), indices.end(), [&](int i, int j) { return scores[i] > scores[j]; }); - // suppress similar samples - for (int i = 0; i < indices.size(); ++i) { - if (auto u = indices[i]; is_valid[u]) { - for (int j = i + 1; j < indices.size(); ++j) { - if (auto v = indices[j]; is_valid[v]) { - if (similarities[u * scores.size() + v] >= thresh) { - is_valid[v] = false; - } - } - } - } - } - return is_valid; - } - - struct Detections { - Bboxes bboxes; - Scores scores; - vector labels; - }; - - void GetObjectsByDetection(const Detections& dets, vector& bboxes, - vector& track_ids, vector& types) const { - auto& [_bboxes, _scores, _labels] = dets; - for (size_t i = 0; i < _bboxes.size(); ++i) { - if (_labels[i] == params.det_label && _scores[i] > params.det_thr && - Area(_bboxes[i]) >= params.det_min_bbox_size * params.det_min_bbox_size) { - bboxes.push_back(_bboxes[i]); - track_ids.push_back(-1); - types.push_back(1); - } - } - } - - void GetObjectsByTracking(vector& bboxes, vector& track_ids, - vector& types) const { - for (auto& track : track_info.tracks) { - std::optional bbox; - if (track.n_missing) { - bbox = track.bbox_pred; - } else { - bbox = keypoints_to_bbox(track.kpts_pred, track.scores.back(), static_cast(frame_h), - static_cast(frame_w), params.track_bbox_scale, - params.track_kpts_thr, params.pose_min_keypoints); - } - if (bbox && Area(*bbox) >= params.pose_min_bbox_size * params.pose_min_bbox_size) { - bboxes.push_back(*bbox); - track_ids.push_back(track.track_id); - types.push_back(track.n_missing ? 0 : 2); - } - } - } - - std::tuple, vector> ProcessBboxes(const std::optional& dets) { - vector bboxes; - vector track_ids; - - // 2 - visible tracks - // 1 - detection - // 0 - missing tracks - vector types; - - if (dets) { - GetObjectsByDetection(*dets, bboxes, track_ids, types); - } +Args ParseArgs(int argc, char* argv[]); - GetObjectsByTracking(bboxes, track_ids, types); - - vector is_valid_bboxes(bboxes.size(), 1); - - auto count = [&] { - std::array acc{}; - for (size_t i = 0; i < is_valid_bboxes.size(); ++i) { - if (is_valid_bboxes[i]) { - ++acc[types[i]]; - } - } - return acc; - }; - POSE_TRACKER_DEBUG("frame {}, bboxes {}", frame_id, count()); - - vector> ranks; - ranks.reserve(bboxes.size()); - for (int i = 0; i < bboxes.size(); ++i) { - ranks.emplace_back(types[i], Area(bboxes[i])); - } - - vector iou(ranks.size() * ranks.size()); - for (int i = 0; i < bboxes.size(); ++i) { - for (int j = 0; j < i; ++j) { - iou[i * bboxes.size() + j] = iou[j * bboxes.size() + i] = - IntersectionOverUnion(bboxes[i], bboxes[j]); - } - } - - is_valid_bboxes = - SuppressNonMaximum(ranks, iou, std::move(is_valid_bboxes), params.det_nms_thr); - POSE_TRACKER_DEBUG("frame {}, bboxes after nms: {}", frame_id, count()); - - vector idxs; - idxs.reserve(bboxes.size()); - for (int i = 0; i < bboxes.size(); ++i) { - if (is_valid_bboxes[i]) { - idxs.push_back(i); - } - } - - std::stable_sort(idxs.begin(), idxs.end(), [&](int i, int j) { return ranks[i] > ranks[j]; }); - std::fill(is_valid_bboxes.begin(), is_valid_bboxes.end(), 0); - { - vector tmp_bboxes; - vector tmp_track_ids; - for (const auto& i : idxs) { - if (tmp_bboxes.size() >= params.pose_max_num_bboxes) { - break; - } - tmp_bboxes.push_back(bboxes[i]); - tmp_track_ids.push_back(track_ids[i]); - is_valid_bboxes[i] = 1; - } - bboxes = std::move(tmp_bboxes); - track_ids = std::move(tmp_track_ids); - } - - POSE_TRACKER_DEBUG("frame {}, bboxes after sort: {}", frame_id, count()); - - pose_bboxes.clear(); - for (const auto& bbox : bboxes) { - // pose_bboxes.push_back(MapBbox(bbox)); - pose_bboxes.push_back(bbox); - } - - return {bboxes, track_ids}; - } - - float ObjectKeypointSimilarity(const Points& pts_a, const Bbox& box_a, const Points& pts_b, - const Bbox& box_b) { - assert(pts_a.size() == params.sigmas.size()); - assert(pts_b.size() == params.sigmas.size()); - auto scale = [](const Bbox& bbox) -> float { - auto a = bbox[2] - bbox[0]; - auto b = bbox[3] - bbox[1]; - return std::sqrt(a * a + b * b); - }; - auto oks = [](const Point& pa, const Point& pb, float s, float k) { - return std::exp(-(pa - pb).dot(pa - pb) / (2.f * s * s * k * k)); - }; - auto sum = 0.f; - const auto s = .5f * (scale(box_a) + scale(box_b)); - for (int i = 0; i < params.sigmas.size(); ++i) { - sum += oks(pts_a[i], pts_b[i], s, params.sigmas[i]); - } - sum /= static_cast(params.sigmas.size()); - return sum; - } +using std::vector; +using namespace mmdeploy; - void UpdateTrack(Track& track, Points kpts, Scores score, const Bbox& bbox, int is_missing) { - auto avg_score = std::accumulate(score.begin(), score.end(), 0.f) / score.size(); - if (track.scores.size() == params.track_n_history) { - std::rotate(track.keypoints.begin(), track.keypoints.begin() + 1, track.keypoints.end()); - std::rotate(track.scores.begin(), track.scores.begin() + 1, track.scores.end()); - std::rotate(track.bboxes.begin(), track.bboxes.begin() + 1, track.bboxes.end()); - std::rotate(track.avg_scores.begin(), track.avg_scores.begin() + 1, track.avg_scores.end()); - std::rotate(track.is_missing.begin(), track.is_missing.begin() + 1, track.is_missing.end()); - track.keypoints.back() = std::move(kpts); - track.scores.back() = std::move(score); - track.bboxes.back() = bbox; - track.avg_scores.back() = avg_score; - track.is_missing.back() = is_missing; - } else { - track.keypoints.push_back(std::move(kpts)); - track.scores.push_back(std::move(score)); - track.bboxes.push_back(bbox); - track.avg_scores.push_back(avg_score); - track.is_missing.push_back(is_missing); - } - ++track.age; - track.n_missing = is_missing ? track.n_missing + 1 : 0; - } +bool Visualize(cv::Mat frame, const PoseTracker::Result& result, int size, + const std::string& output_dir, int frame_id, bool with_bbox); - vector> GreedyAssignment(const vector& scores, - vector& is_valid_rows, - vector& is_valid_cols, float thr) { - const auto n_rows = is_valid_rows.size(); - const auto n_cols = is_valid_cols.size(); - vector> assignment; - assignment.reserve(std::max(n_rows, n_cols)); - while (true) { - auto max_score = 0.f; - int max_row = -1; - int max_col = -1; - for (int i = 0; i < n_rows; ++i) { - if (is_valid_rows[i]) { - for (int j = 0; j < n_cols; ++j) { - if (is_valid_cols[j]) { - if (scores[i * n_cols + j] > max_score) { - max_score = scores[i * n_cols + j]; - max_row = i; - max_col = j; - } - } - } - } - } - if (max_score < thr) { - break; - } - is_valid_rows[max_row] = 0; - is_valid_cols[max_col] = 0; - assignment.emplace_back(max_row, max_col, max_score); - } - return assignment; - } - - vector SuppressOverlappingBboxes( - const vector& keypoints, const vector& scores, - const vector& is_present, // bbox from a visible track? - const vector& bboxes, vector is_valid, const vector& sigmas, - float oks_thr) { - assert(keypoints.size() == is_valid.size()); - assert(scores.size() == is_valid.size()); - assert(bboxes.size() == is_valid.size()); - const auto size = is_valid.size(); - vector oks(size * size); - for (int i = 0; i < size; ++i) { - if (is_valid[i]) { - for (int j = 0; j < i; ++j) { - if (is_valid[j]) { - oks[i * size + j] = oks[j * size + i] = - ObjectKeypointSimilarity(keypoints[i], bboxes[i], keypoints[j], bboxes[j]); - } - } - } - } - vector> ranks; - ranks.reserve(size); - for (int i = 0; i < size; ++i) { - auto& s = scores[i]; - auto avg = std::accumulate(s.begin(), s.end(), 0.f) / static_cast(s.size()); - // prevents bboxes from missing tracks to suppress visible tracks - ranks.emplace_back(is_present[i], avg); - } - return SuppressNonMaximum(ranks, oks, is_valid, oks_thr); +int main(int argc, char* argv[]) { + auto args = ParseArgs(argc, argv); + if (args.device.empty()) { + return 0; } - void TrackStep(vector& keypoints, vector& scores, - const vector& track_ids) { - auto& tracks = track_info.tracks; - - vector new_tracks; - new_tracks.reserve(tracks.size()); - - vector bboxes(keypoints.size()); - vector is_valid_bboxes(keypoints.size(), 1); - - pose_results.clear(); - - // key-points to bboxes - for (size_t i = 0; i < keypoints.size(); ++i) { - if (auto bbox = - keypoints_to_bbox(keypoints[i], scores[i], frame_h, frame_w, params.track_bbox_scale, - params.track_kpts_thr, params.pose_min_keypoints)) { - bboxes[i] = *bbox; - pose_results.push_back(*bbox); - } else { - is_valid_bboxes[i] = false; - // MMDEPLOY_INFO("frame {}: invalid key-points {}", frame_id, scores[i]); - } - } - - vector is_present(is_valid_bboxes.size()); - for (int i = 0; i < track_ids.size(); ++i) { - for (const auto& t : tracks) { - if (t.track_id == track_ids[i]) { - is_present[i] = !t.n_missing; - break; - } - } - } - is_valid_bboxes = - SuppressOverlappingBboxes(keypoints, scores, is_present, bboxes, is_valid_bboxes, - params.sigmas, params.track_nms_oks_thr); - assert(is_valid_bboxes.size() == bboxes.size()); - - const auto n_rows = static_cast(bboxes.size()); - const auto n_cols = static_cast(tracks.size()); + // create pose tracker pipeline + PoseTracker tracker(Model(args.det_model), Model(args.pose_model), Context{Device{args.device}}); - // generate similarity matrix - vector iou(n_rows * n_cols); - vector oks(n_rows * n_cols); - for (size_t i = 0; i < n_rows; ++i) { - const auto& bbox = bboxes[i]; - const auto& kpts = keypoints[i]; - for (size_t j = 0; j < n_cols; ++j) { - const auto& track = tracks[j]; - if (track_ids[i] != -1 && track_ids[i] != track.track_id) { - continue; - } - const auto index = i * n_cols + j; - iou[index] = IntersectionOverUnion(bbox, track.bbox_pred); - oks[index] = ObjectKeypointSimilarity(kpts, bbox, track.kpts_pred, track.bbox_pred); - } - } + // set parameters + PoseTracker::Params params; + params->det_min_bbox_size = 100; + params->det_interval = 1; + params->pose_max_num_bboxes = 6; - vector is_valid_tracks(n_cols, 1); - // disable missing tracks in the #1 assignment - for (int i = 0; i < tracks.size(); ++i) { - if (tracks[i].n_missing) { - is_valid_tracks[i] = 0; - } - } - const auto oks_assignment = - GreedyAssignment(oks, is_valid_bboxes, is_valid_tracks, params.track_oks_thr); + // optionally use OKS for keypoints similarity comparison + std::array sigmas{0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, + 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089}; + params->keypoint_sigmas = sigmas.data(); + params->keypoint_sigmas_size = sigmas.size(); - // enable missing tracks in the #2 assignment - for (int i = 0; i < tracks.size(); ++i) { - if (tracks[i].n_missing) { - is_valid_tracks[i] = 1; - } - } - const auto iou_assignment = - GreedyAssignment(iou, is_valid_bboxes, is_valid_tracks, params.track_iou_thr); + // create a tracker state for each video + PoseTracker::State state = tracker.CreateState(params); - POSE_TRACKER_DEBUG("frame {}, oks assignment {}", frame_id, oks_assignment); - POSE_TRACKER_DEBUG("frame {}, iou assignment {}", frame_id, iou_assignment); - - auto assignment = oks_assignment; - assignment.insert(assignment.end(), iou_assignment.begin(), iou_assignment.end()); - - // update assigned tracks - for (auto [i, j, _] : assignment) { - auto& track = tracks[j]; - if (track.n_missing) { - // re-initialize filter for recovering tracks - track.filter = CreateFilter(bboxes[i], keypoints[i]); - UpdateTrack(track, keypoints[i], scores[i], bboxes[i], false); - POSE_TRACKER_DEBUG("frame {}, track recovered {}", frame_id, track.track_id); - } else { - auto [bbox, kpts] = track.filter->Correct(bboxes[i], keypoints[i]); - UpdateTrack(track, std::move(kpts), std::move(scores[i]), bbox, false); - } - new_tracks.push_back(std::move(track)); - } - - // generating new tracks - for (size_t i = 0; i < is_valid_bboxes.size(); ++i) { - // only newly detected bboxes are allowed to form new tracks - if (is_valid_bboxes[i] && track_ids[i] == -1) { - auto& track = new_tracks.emplace_back(); - track.track_id = track_info.next_id++; - track.filter = CreateFilter(bboxes[i], keypoints[i]); - UpdateTrack(track, std::move(keypoints[i]), std::move(scores[i]), bboxes[i], false); - is_valid_bboxes[i] = 0; - POSE_TRACKER_DEBUG("frame {}, new track {}", frame_id, track.track_id); - } - } - - if (1) { - // diagnostic for missing tracks - int n_missing = 0; - for (int i = 0; i < is_valid_tracks.size(); ++i) { - if (is_valid_tracks[i]) { - float best_oks = 0.f; - float best_iou = 0.f; - for (int j = 0; j < is_valid_bboxes.size(); ++j) { - if (is_valid_bboxes[j]) { - best_oks = std::max(oks[j * n_cols + i], best_oks); - best_iou = std::max(iou[j * n_cols + i], best_iou); - } - } - POSE_TRACKER_DEBUG("frame {}: track missing {}, best_oks={}, best_iou={}", frame_id, - tracks[i].track_id, best_oks, best_iou); - ++n_missing; - } - } - if (n_missing) { - { - std::stringstream ss; - ss << cv::Mat_(n_rows, n_cols, oks.data()); - POSE_TRACKER_DEBUG("frame {}, oks: \n{}", frame_id, ss.str()); - } - { - std::stringstream ss; - ss << cv::Mat_(n_rows, n_cols, iou.data()); - POSE_TRACKER_DEBUG("frame {}, iou: \n{}", frame_id, ss.str()); - } - } - } - - for (int i = 0; i < is_valid_tracks.size(); ++i) { - if (is_valid_tracks[i]) { - if (auto& track = tracks[i]; track.n_missing < params.track_max_missing) { - // use predicted state to update missing tracks - auto [bbox, kpts] = track.filter->Correct(track.bbox_pred, track.kpts_pred); - vector score(track.kpts_pred.size()); - POSE_TRACKER_DEBUG("frame {}, track {}, bbox width {}", frame_id, track.track_id, - bbox[2] - bbox[0]); - UpdateTrack(track, std::move(kpts), std::move(score), bbox, true); - new_tracks.push_back(std::move(track)); - } else { - POSE_TRACKER_DEBUG("frame {}, track lost {}", frame_id, track.track_id); - } - is_valid_tracks[i] = false; - } - } - - tracks = std::move(new_tracks); - for (auto& t : tracks) { - if (t.n_missing == 0) { - std::tie(t.bbox_pred, t.kpts_pred) = t.filter->Predict(); - } else { - auto [bbox, kpts] = t.filter->Predict(); - const auto alpha = params.track_missing_momentum; - cv::Mat tmp_bbox = alpha * cv::Mat(bbox, false) + (1 - alpha) * cv::Mat(t.bbox_pred, false); - tmp_bbox.copyTo(cv::Mat(t.bbox_pred, false)); - } - } - - if (0) { - vector> summary; - for (const auto& track : tracks) { - summary.emplace_back(track.track_id, track.n_missing); - } - POSE_TRACKER_DEBUG("frame {}, track summary {}", frame_id, summary); - for (const auto& track : tracks) { - if (!track.n_missing) { - POSE_TRACKER_DEBUG("frame {}, track {}, scores {}", frame_id, track.track_id, - track.scores.back()); - } - } - } + cv::VideoCapture video; + if (args.video.size() == 1 && std::isdigit(args.video[0])) { + video.open(std::stoi(args.video)); // open by camera index + } else { + video.open(args.video); // open video file } - - std::shared_ptr CreateFilter(const Bbox& bbox, const Points& kpts) const { - return std::make_shared( - params.filter_c_beta, params.filter_c_fc_min, params.filter_c_fc_d, params.filter_k_beta, - params.filter_k_fc_min, params.filter_k_fc_d, bbox, kpts); + if (!video.isOpened()) { + std::cerr << "failed to open video: " << args.video << "\n"; } - struct Target { - Bbox bbox; - vector keypoints; - Scores scores; - MMDEPLOY_ARCHIVE_MEMBERS(bbox, keypoints, scores); - }; - - vector TrackPose(vector keypoints, vector scores, - const vector& track_ids) { - TrackStep(keypoints, scores, track_ids); - vector targets; - for (const auto& track : track_info.tracks) { - if (track.n_missing) { - continue; - } - if (auto bbox = keypoints_to_bbox(track.keypoints.back(), track.scores.back(), frame_h, - frame_w, params.track_bbox_scale, params.track_kpts_thr, - params.pose_min_keypoints)) { - vector kpts; - kpts.reserve(track.keypoints.back().size()); - for (const auto& kpt : track.keypoints.back()) { - kpts.emplace_back(kpt.x); - kpts.emplace_back(kpt.y); - } - targets.push_back(Target{*bbox, std::move(kpts), track.scores.back()}); - } - } - return targets; - } - - float frame_h = 0; - float frame_w = 0; - TrackInfo track_info; - - TrackerParams params; - + cv::Mat frame; int frame_id = 0; - - vector pose_bboxes; - vector pose_results; -}; - -MMDEPLOY_REGISTER_TYPE_ID(Tracker, 0xcfe87980aa895d3a); - -std::tuple ProcessBboxes(const Value& det_val, const Value& data, Value state) { - auto& tracker = state.get_ref(); - - std::optional dets; - - if (det_val.is_array()) { // has detections - auto& [bboxes, scores, labels] = dets.emplace(); - for (const auto& det : det_val.array()) { - bboxes.push_back(from_value(det["bbox"])); - scores.push_back(det["score"].get()); - labels.push_back(det["label_id"].get()); + while (true) { + video >> frame; + if (!frame.data) { + break; } - } - - auto [bboxes, ids] = tracker.ProcessBboxes(dets); - - Value::Array bbox_array; - Value track_ids_array; - // attach bboxes to image data - for (auto& bbox : bboxes) { - cv::Rect rect(cv::Rect2f(cv::Point2f(bbox[0], bbox[1]), cv::Point2f(bbox[2], bbox[3]))); - bbox_array.push_back({ - {"img", data["img"]}, // img - {"bbox", {rect.x, rect.y, rect.width, rect.height}}, // bbox - {"rotation", 0.f} // rotation - }); - } - - track_ids_array = to_value(ids); - return {std::move(bbox_array), std::move(track_ids_array)}; -} - -REGISTER_SIMPLE_MODULE(ProcessBboxes, ProcessBboxes); - -Value TrackPose(const Value& poses, const Value& track_indices, Value state) { - assert(poses.is_array()); - vector keypoints; - vector scores; - for (auto& output : poses.array()) { - auto& k = keypoints.emplace_back(); - auto& s = scores.emplace_back(); - float avg = 0.f; - for (auto& kpt : output["key_points"].array()) { - k.emplace_back(kpt["bbox"][0].get(), kpt["bbox"][1].get()); - s.push_back(kpt["score"].get()); - avg += s.back(); + // apply the pipeline with the tracker state and video frame + auto result = tracker.Apply(state, frame); + // visualize the results + if (!Visualize(frame, result, 1280, args.output_dir, frame_id++, false)) { + break; } } - vector track_ids; - from_value(track_indices, track_ids); - auto& tracker = state.get_ref(); - auto targets = tracker.TrackPose(std::move(keypoints), std::move(scores), track_ids); - return to_value(targets); -} - -REGISTER_SIMPLE_MODULE(TrackPose, TrackPose); - -class PoseTracker { - public: - using State = Value; - - public: - PoseTracker(const Model& det_model, const Model& pose_model, Context context) - : pipeline_([&] { - context.Add("detection", det_model); - context.Add("pose", pose_model); - auto config = from_json(config_json); - return Pipeline{config, context}; - }()) {} - - State CreateState(const TrackerParams& params) { - auto state = make_pointer(Tracker{params}); - auto& tracker = state.get_ref(); - return state; - } - - Value Track(const Mat& img, State& state, int use_detector = -1) { - assert(state.is_pointer()); - framework::Mat mat(img.desc().height, img.desc().width, - static_cast(img.desc().format), - static_cast(img.desc().type), {img.desc().data, [](void*) {}}); - - auto& tracker = state.get_ref(); - - if (use_detector < 0) { - if (tracker.frame_id % tracker.params.det_interval == 0) { - use_detector = 1; - POSE_TRACKER_DEBUG("frame {}, use detector", tracker.frame_id); - } else { - use_detector = 0; - } - } - - if (tracker.frame_id == 0) { - tracker.frame_h = static_cast(mat.height()); - tracker.frame_w = static_cast(mat.width()); - } - - Value::Object data{{"ori_img", mat}}; - Value input{{data}, {use_detector}, {state}}; - auto ret = pipeline_.Apply(input)[0][0]; - ++tracker.frame_id; - - return ret; - } + return 0; +} - private: - Pipeline pipeline_; +struct Skeleton { + vector> skeleton; + vector palette; + vector link_color; + vector point_color; }; -} // namespace mmdeploy - -using namespace mmdeploy; - -const cv::Scalar& gPalette(int index) { - static vector inst{ - {255, 128, 0}, {255, 153, 51}, {255, 178, 102}, {230, 230, 0}, {255, 153, 255}, - {153, 204, 255}, {255, 102, 255}, {255, 51, 255}, {102, 178, 255}, {51, 153, 255}, - {255, 153, 153}, {255, 102, 102}, {255, 51, 51}, {153, 255, 153}, {102, 255, 102}, - {51, 255, 51}, {0, 255, 0}, {0, 0, 255}, {255, 0, 0}, {255, 255, 255}}; - return inst[index]; +const Skeleton& gCocoSkeleton() { + static const Skeleton inst{ + { + {15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, {6, 12}, + {5, 6}, {5, 7}, {6, 8}, {7, 9}, {8, 10}, {1, 2}, {0, 1}, + {0, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}, + }, + { + {255, 128, 0}, {255, 153, 51}, {255, 178, 102}, {230, 230, 0}, {255, 153, 255}, + {153, 204, 255}, {255, 102, 255}, {255, 51, 255}, {102, 178, 255}, {51, 153, 255}, + {255, 153, 153}, {255, 102, 102}, {255, 51, 51}, {153, 255, 153}, {102, 255, 102}, + {51, 255, 51}, {0, 255, 0}, {0, 0, 255}, {255, 0, 0}, {255, 255, 255}, + }, + {0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16}, + {16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0}, + }; + return inst; } -void Visualize(cv::Mat& frame, const Value& result, const Bboxes& pose_bboxes, - const Bboxes& pose_results, int size) { - static vector> skeleton{ - {15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, {6, 12}, {5, 6}, {5, 7}, {6, 8}, - {7, 9}, {8, 10}, {1, 2}, {0, 1}, {0, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}}; - static vector link_color{0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16}; - static vector kpt_color{16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0}; +bool Visualize(cv::Mat frame, const PoseTracker::Result& result, int size, + const std::string& output_dir, int frame_id, bool with_bbox) { + auto& [skeleton, palette, link_color, point_color] = gCocoSkeleton(); auto scale = (float)size / (float)std::max(frame.cols, frame.rows); if (scale != 1) { cv::resize(frame, frame, {}, scale, scale); + } else { + frame = frame.clone(); } - auto draw_bbox = [](cv::Mat& image, Bbox bbox, const cv::Scalar& color, float scale = 1) { + auto draw_bbox = [&](std::array bbox, const cv::Scalar& color) { std::for_each(bbox.begin(), bbox.end(), [&](auto& x) { x *= scale; }); - cv::Point p1(bbox[0], bbox[1]); - cv::Point p2(bbox[2], bbox[3]); - cv::rectangle(image, p1, p2, color); + cv::rectangle(frame, cv::Point2f(bbox[0], bbox[1]), cv::Point2f(bbox[2], bbox[3]), color); }; - const auto& targets = result.array(); - for (const auto& target : targets) { - auto bbox = from_value>(target["bbox"]); - auto kpts = from_value>(target["keypoints"]); - std::for_each(bbox.begin(), bbox.end(), [&](auto& x) { x *= scale; }); + for (const auto& r : result) { + vector kpts(&r.keypoints[0].x, &r.keypoints[0].x + r.keypoint_count * 2); + vector scores(r.scores, r.scores + r.keypoint_count); std::for_each(kpts.begin(), kpts.end(), [&](auto& x) { x *= scale; }); - auto scores = from_value>(target["scores"]); - if (0) { - draw_bbox(frame, bbox, cv::Scalar(0, 255, 0)); - } constexpr auto score_thr = .5f; vector used(kpts.size()); - for (int i = 0; i < skeleton.size(); ++i) { + for (size_t i = 0; i < skeleton.size(); ++i) { auto [u, v] = skeleton[i]; if (scores[u] > score_thr && scores[v] > score_thr) { used[u] = used[v] = 1; - cv::Point p_u(kpts[u * 2], kpts[u * 2 + 1]); - cv::Point p_v(kpts[v * 2], kpts[v * 2 + 1]); - cv::line(frame, p_u, p_v, gPalette(link_color[i]), 1, cv::LINE_AA); + cv::Point2f p_u(kpts[u * 2], kpts[u * 2 + 1]); + cv::Point2f p_v(kpts[v * 2], kpts[v * 2 + 1]); + cv::line(frame, p_u, p_v, palette[link_color[i]], 1, cv::LINE_AA); } } - for (int i = 0; i < kpts.size(); i += 2) { + for (size_t i = 0; i < kpts.size(); i += 2) { if (used[i / 2]) { - cv::Point p(kpts[i], kpts[i + 1]); - cv::circle(frame, p, 1, gPalette(kpt_color[i / 2]), 2, cv::LINE_AA); + cv::Point2f p(kpts[i], kpts[i + 1]); + cv::circle(frame, p, 1, palette[point_color[i / 2]], 2, cv::LINE_AA); } } - } - if (0) { - for (auto bbox : pose_bboxes) { - draw_bbox(frame, bbox, {0, 255, 255}, scale); - } - for (auto bbox : pose_results) { - draw_bbox(frame, bbox, {0, 255, 0}, scale); + if (with_bbox) { + draw_bbox((std::array&)r.bbox, cv::Scalar(0, 255, 0)); } } - static int frame_id = 0; - cv::imwrite(fmt::format("pose_{}.jpg", frame_id++), frame, {cv::IMWRITE_JPEG_QUALITY, 90}); -} - -// ffmpeg -f image2 -i pose_%d.jpg -vcodec hevc -crf 30 pose.mp4 - -int main(int argc, char* argv[]) { - const auto device_name = argv[1]; - const auto det_model_path = argv[2]; - const auto pose_model_path = argv[3]; - const auto video_path = argv[4]; - Device device(device_name); - Context context(device); - Profiler profiler("pose_tracker.perf"); - context.Add(profiler); - PoseTracker tracker(Model(det_model_path), Model(pose_model_path), context); - TrackerParams params; - // coco - params.sigmas = {0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, - 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089}; - params.pose_max_num_bboxes = 5; - params.det_interval = 5; - - auto state = tracker.CreateState(params); - - cv::Mat frame; - std::chrono::duration dt{}; - - int frame_id{}; - - cv::VideoCapture video(video_path); - while (true) { - video >> frame; - if (!frame.data) { - break; - } - auto t0 = std::chrono::high_resolution_clock::now(); - auto result = tracker.Track(frame, state); - auto t1 = std::chrono::high_resolution_clock::now(); - dt += t1 - t0; - ++frame_id; - - auto& pose_bboxes = state.get_ref().pose_bboxes; - auto& pose_results = state.get_ref().pose_results; - - Visualize(frame, result, pose_bboxes, pose_results, 1024); + if (output_dir.empty()) { + cv::imshow("pose_tracker", frame); + return cv::waitKey(1) != 'q'; } + auto join = [](const std::string& a, const std::string& b) { +#if _MSC_VER + return a + "\\" + b; +#else + return a + "/" + b; +#endif + }; + cv::imwrite(join(output_dir, std::to_string(frame_id) + ".jpg"), frame, + {cv::IMWRITE_JPEG_QUALITY, 90}); + return true; +} - MMDEPLOY_INFO("frames: {}, time {} ms", frame_id, dt.count()); +Args ParseArgs(int argc, char* argv[]) { + if (argc < 5 || argc > 6) { + std::cout << R"(Usage: pose_tracker device_name det_model pose_model video [output] + device_name device name for execution, e.g. "cpu", "cuda" + det_model object detection model path + pose_model pose estimation model path + video video path or camera index + output output directory, will cv::imshow if omitted +)"; + return {}; + } + Args args; + args.device = argv[1]; + args.det_model = argv[2]; + args.pose_model = argv[3]; + args.video = argv[4]; + if (argc == 6) { + args.output_dir = argv[5]; + } + return args; } diff --git a/demo/python/det_pose.py b/demo/python/det_pose.py new file mode 100644 index 0000000000..3422eee9fa --- /dev/null +++ b/demo/python/det_pose.py @@ -0,0 +1,85 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse + +import cv2 +import numpy as np +from mmdeploy_python import Detector, PoseDetector + + +def parse_args(): + parser = argparse.ArgumentParser( + description='show how to use SDK Python API') + parser.add_argument('device_name', help='name of device, cuda or cpu') + parser.add_argument( + 'det_model_path', + help='path of mmdeploy SDK model dumped by model converter') + parser.add_argument( + 'pose_model_path', + help='path of mmdeploy SDK model dumped by model converter') + parser.add_argument('image_path', help='path of input image') + args = parser.parse_args() + return args + + +def visualize(frame, keypoints, filename, thr=0.5, resize=1280): + skeleton = [(15, 13), (13, 11), (16, 14), (14, 12), (11, 12), (5, 11), + (6, 12), (5, 6), (5, 7), (6, 8), (7, 9), (8, 10), (1, 2), + (0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6)] + palette = [(255, 128, 0), (255, 153, 51), (255, 178, 102), (230, 230, 0), + (255, 153, 255), (153, 204, 255), (255, 102, 255), + (255, 51, 255), (102, 178, 255), + (51, 153, 255), (255, 153, 153), (255, 102, 102), (255, 51, 51), + (153, 255, 153), (102, 255, 102), (51, 255, 51), (0, 255, 0), + (0, 0, 255), (255, 0, 0), (255, 255, 255)] + link_color = [ + 0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16 + ] + point_color = [16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0] + + scale = resize / max(frame.shape[0], frame.shape[1]) + + scores = keypoints[..., 2] + keypoints = (keypoints[..., :2] * scale).astype(int) + + img = cv2.resize(frame, (0, 0), fx=scale, fy=scale) + for kpts, score in zip(keypoints, scores): + show = [0] * len(kpts) + for (u, v), color in zip(skeleton, link_color): + if score[u] > thr and score[v] > thr: + cv2.line(img, kpts[u], tuple(kpts[v]), palette[color], 1, + cv2.LINE_AA) + show[u] = show[v] = 1 + for kpt, show, color in zip(kpts, show, point_color): + if show: + cv2.circle(img, kpt, 1, palette[color], 2, cv2.LINE_AA) + cv2.imwrite(filename, img) + + +def main(): + args = parse_args() + + # load image + img = cv2.imread(args.image_path) + + # create object detector + detector = Detector( + model_path=args.det_model_path, device_name=args.device_name) + # create pose detector + pose_detector = PoseDetector( + model_path=args.pose_model_path, device_name=args.device_name) + + # apply detector + bboxes, labels, _ = detector(img) + + # filter detections + keep = np.logical_and(labels == 0, bboxes[..., 4] > 0.6) + bboxes = bboxes[keep, :4] + + # apply pose detector + poses = pose_detector(img, bboxes) + + visualize(img, poses, 'det_pose_output.jpg', 0.5, 1280) + + +if __name__ == '__main__': + main() diff --git a/demo/python/pose_tracker.py b/demo/python/pose_tracker.py new file mode 100644 index 0000000000..9b1b8fa3d6 --- /dev/null +++ b/demo/python/pose_tracker.py @@ -0,0 +1,98 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os + +import cv2 +from mmdeploy_python import PoseTracker + + +def parse_args(): + parser = argparse.ArgumentParser( + description='show how to use SDK Python API') + parser.add_argument('device_name', help='name of device, cuda or cpu') + parser.add_argument( + 'det_model', + help='path of mmdeploy SDK model dumped by model converter') + parser.add_argument( + 'pose_model', + help='path of mmdeploy SDK model dumped by model converter') + parser.add_argument('video', help='video path or camera index') + parser.add_argument('--output_dir', help='output directory', default=None) + args = parser.parse_args() + if args.video.isnumeric(): + args.video = int(args.video) + return args + + +def visualize(frame, results, output_dir, frame_id, thr=0.5, resize=1280): + skeleton = [(15, 13), (13, 11), (16, 14), (14, 12), (11, 12), (5, 11), + (6, 12), (5, 6), (5, 7), (6, 8), (7, 9), (8, 10), (1, 2), + (0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6)] + palette = [(255, 128, 0), (255, 153, 51), (255, 178, 102), (230, 230, 0), + (255, 153, 255), (153, 204, 255), (255, 102, 255), + (255, 51, 255), (102, 178, 255), + (51, 153, 255), (255, 153, 153), (255, 102, 102), (255, 51, 51), + (153, 255, 153), (102, 255, 102), (51, 255, 51), (0, 255, 0), + (0, 0, 255), (255, 0, 0), (255, 255, 255)] + link_color = [ + 0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16 + ] + point_color = [16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0] + scale = resize / max(frame.shape[0], frame.shape[1]) + keypoints, bboxes, _ = results + scores = keypoints[..., 2] + keypoints = (keypoints[..., :2] * scale).astype(int) + bboxes *= scale + img = cv2.resize(frame, (0, 0), fx=scale, fy=scale) + for kpts, score, bbox in zip(keypoints, scores, bboxes): + show = [0] * len(kpts) + for (u, v), color in zip(skeleton, link_color): + if score[u] > thr and score[v] > thr: + cv2.line(img, kpts[u], tuple(kpts[v]), palette[color], 1, + cv2.LINE_AA) + show[u] = show[v] = 1 + for kpt, show, color in zip(kpts, show, point_color): + if show: + cv2.circle(img, kpt, 1, palette[color], 2, cv2.LINE_AA) + if output_dir: + cv2.imwrite(f'{output_dir}/{str(frame_id).zfill(6)}.jpg', img) + else: + cv2.imshow('pose_tracker', img) + return cv2.waitKey(1) != 'q' + return True + + +def main(): + args = parse_args() + + video = cv2.VideoCapture(args.video) + + tracker = PoseTracker( + det_model=args.det_model, + pose_model=args.pose_model, + device_name=args.device_name) + + # optionally use OKS for keypoints similarity comparison + coco_sigmas = [ + 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, + 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 + ] + state = tracker.create_state( + det_interval=1, det_min_bbox_size=100, keypoint_sigmas=coco_sigmas) + + if args.output_dir: + os.makedirs(args.output_dir, exist_ok=True) + + frame_id = 0 + while True: + success, frame = video.read() + if not success: + break + results = tracker(state, frame, detect=-1) + if not visualize(frame, results, args.output_dir, frame_id): + break + frame_id += 1 + + +if __name__ == '__main__': + main()