From 8d344ee1642decced07b8e112e9702c6021f3303 Mon Sep 17 00:00:00 2001
From: Chen Xin <irexyc@gmail.com>
Date: Fri, 13 Jan 2023 16:08:29 +0800
Subject: [PATCH 1/3] [Enhancement] Speedup TopDownAffine by CropResizePad
 (#1486)

* "use 'CropResizePad' to speed up topdownaffine"

* add missing header

(cherry picked from commit c458e2a5244c299f4a76e0d41fa63e8bdde86d20)
---
 .../apis/c/mmdeploy/pose_detector.cpp         |   2 +-
 csrc/mmdeploy/codebase/mmpose/CMakeLists.txt  |   8 +-
 .../mmpose/keypoints_from_heatmap.cpp         |   1 -
 .../mmpose/keypoints_from_regression.cpp      |   1 -
 .../codebase/mmpose/topdown_affine.cpp        | 163 ++++++++----------
 csrc/mmdeploy/operation/cpu/CMakeLists.txt    |   3 +-
 .../operation/cpu/crop_resize_pad.cpp         |  25 +++
 csrc/mmdeploy/operation/cuda/CMakeLists.txt   |   3 +-
 .../operation/cuda/crop_resize_pad.cpp        |  88 ++++++++++
 csrc/mmdeploy/operation/vision.cpp            |   1 +
 csrc/mmdeploy/operation/vision.h              |   9 +
 csrc/mmdeploy/utils/opencv/opencv_utils.cpp   |  12 ++
 csrc/mmdeploy/utils/opencv/opencv_utils.h     |  10 ++
 demo/csrc/c/det_pose.cpp                      |   3 +-
 14 files changed, 229 insertions(+), 100 deletions(-)
 create mode 100644 csrc/mmdeploy/operation/cpu/crop_resize_pad.cpp
 create mode 100644 csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp

diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp
index 5db936bde5..e4fffd4655 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp
@@ -117,7 +117,7 @@ int mmdeploy_pose_detector_create_input(const mmdeploy_mat_t* mats, int mat_coun
       } else {
         b = {0, 0, img.width(), img.height(), 1.0};
       }
-      input_images.push_back({{"ori_img", img}, {"bbox", std::move(b)}, {"rotation", 0.f}});
+      input_images.push_back({{"ori_img", img}, {"bbox", std::move(b)}});
     };
 
     for (int i = 0; i < mat_count; ++i) {
diff --git a/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt b/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt
index 968b6e956c..86b244a662 100644
--- a/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt
+++ b/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt
@@ -2,13 +2,15 @@
 
 project(mmdeploy_mmpose)
 
+find_package(OpenCV REQUIRED)
 
-file(GLOB_RECURSE SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
-mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
+aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} MMPOSE_SRCS)
+
+mmdeploy_add_module(${PROJECT_NAME} ${MMPOSE_SRCS})
 target_link_libraries(${PROJECT_NAME} PRIVATE
         mmdeploy::transform
         mmdeploy_operation
-        mmdeploy_opencv_utils)
+        ${OpenCV_LIBS})
 add_library(mmdeploy::mmpose ALIAS ${PROJECT_NAME})
 
 set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} pose_detector CACHE INTERNAL "")
diff --git a/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp b/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp
index cf20173e69..a199b752e0 100644
--- a/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp
+++ b/csrc/mmdeploy/codebase/mmpose/keypoints_from_heatmap.cpp
@@ -12,7 +12,6 @@
 #include "mmdeploy/core/value.h"
 #include "mmdeploy/experimental/module_adapter.h"
 #include "mmpose.h"
-#include "opencv_utils.h"
 
 namespace mmdeploy::mmpose {
 
diff --git a/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp b/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp
index 6810519575..06f372c788 100644
--- a/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp
+++ b/csrc/mmdeploy/codebase/mmpose/keypoints_from_regression.cpp
@@ -11,7 +11,6 @@
 #include "mmdeploy/core/value.h"
 #include "mmdeploy/experimental/module_adapter.h"
 #include "mmpose.h"
-#include "opencv_utils.h"
 
 namespace mmdeploy::mmpose {
 
diff --git a/csrc/mmdeploy/codebase/mmpose/topdown_affine.cpp b/csrc/mmdeploy/codebase/mmpose/topdown_affine.cpp
index 75b93fa847..f04ab0f38e 100644
--- a/csrc/mmdeploy/codebase/mmpose/topdown_affine.cpp
+++ b/csrc/mmdeploy/codebase/mmpose/topdown_affine.cpp
@@ -1,5 +1,6 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
+#include <array>
 #include <set>
 
 #include "mmdeploy/archive/value_archive.h"
@@ -10,31 +11,18 @@
 #include "mmdeploy/operation/managed.h"
 #include "mmdeploy/operation/vision.h"
 #include "mmdeploy/preprocess/transform/transform.h"
-#include "opencv2/imgproc.hpp"
-#include "opencv_utils.h"
 
 using namespace std;
 
 namespace mmdeploy::mmpose {
 
-cv::Point2f operator*(const cv::Point2f& a, const cv::Point2f& b) {
-  cv::Point2f c;
-  c.x = a.x * b.x;
-  c.y = a.y * b.y;
-  return c;
-}
-
 class TopDownAffine : public transform::Transform {
  public:
   explicit TopDownAffine(const Value& args) noexcept {
-    use_udp_ = args.value("use_udp", use_udp_);
-    backend_ = args.contains("backend") && args["backend"].is_string()
-                   ? args["backend"].get<string>()
-                   : backend_;
-    stream_ = args["context"]["stream"].get<Stream>();
     assert(args.contains("image_size"));
     from_value(args["image_size"], image_size_);
-    warp_affine_ = operation::Managed<operation::WarpAffine>::Create("bilinear");
+    crop_resize_pad_ =
+        ::mmdeploy::operation::Managed<::mmdeploy::operation::CropResizePad>::Create();
   }
 
   ~TopDownAffine() override = default;
@@ -52,6 +40,7 @@ class TopDownAffine : public transform::Transform {
       // after mmpose v0.26.0
       from_value(data["center"], c);
       from_value(data["scale"], s);
+      from_value(data["bbox"], bbox);
     } else {
       // before mmpose v0.26.0
       from_value(data["bbox"], bbox);
@@ -59,28 +48,84 @@ class TopDownAffine : public transform::Transform {
     }
     // end prepare data
 
-    auto r = data["rotation"].get<float>();
-
     Tensor dst;
-    if (use_udp_) {
-      cv::Mat trans =
-          GetWarpMatrix(r, {c[0] * 2.f, c[1] * 2.f}, {image_size_[0] - 1.f, image_size_[1] - 1.f},
-                        {s[0] * 200.f, s[1] * 200.f});
-      OUTCOME_TRY(warp_affine_.Apply(img, dst, trans.ptr<float>(), image_size_[1], image_size_[0]));
-    } else {
-      cv::Mat trans =
-          GetAffineTransform({c[0], c[1]}, {s[0], s[1]}, r, {image_size_[0], image_size_[1]});
-      OUTCOME_TRY(warp_affine_.Apply(img, dst, trans.ptr<float>(), image_size_[1], image_size_[0]));
+    {
+      s[0] *= 200;
+      s[1] *= 200;
+      const std::array img_roi{0, 0, (int)img.shape(2), (int)img.shape(1)};
+      const std::array tmp_roi{0, 0, (int)image_size_[0], (int)image_size_[1]};
+      auto roi = round({c[0] - s[0] / 2.f, c[1] - s[1] / 2.f, s[0], s[1]});
+      auto src_roi = intersect(roi, img_roi);
+      // prior scale factor
+      auto factor = (float)image_size_[0] / s[0];
+      // rounded dst roi
+      auto dst_roi = round({(src_roi[0] - roi[0]) * factor,  //
+                            (src_roi[1] - roi[1]) * factor,  //
+                            src_roi[2] * factor,             //
+                            src_roi[3] * factor});
+      dst_roi = intersect(dst_roi, tmp_roi);
+      // exact scale factors
+      auto factor_x = (float)dst_roi[2] / src_roi[2];
+      auto factor_y = (float)dst_roi[3] / src_roi[3];
+      // center of src roi
+      auto c_src_x = src_roi[0] + (src_roi[2] - 1) / 2.f;
+      auto c_src_y = src_roi[1] + (src_roi[3] - 1) / 2.f;
+      // center of dst roi
+      auto c_dst_x = dst_roi[0] + (dst_roi[2] - 1) / 2.f;
+      auto c_dst_y = dst_roi[1] + (dst_roi[3] - 1) / 2.f;
+      // vector from c_dst to (w/2, h/2)
+      auto v_dst_x = image_size_[0] / 2.f - c_dst_x;
+      auto v_dst_y = image_size_[1] / 2.f - c_dst_y;
+      // vector from c_src to corrected center
+      auto v_src_x = v_dst_x / factor_x;
+      auto v_src_y = v_dst_y / factor_y;
+      // corrected center
+      c[0] = c_src_x + v_src_x;
+      c[1] = c_src_y + v_src_y;
+      // corrected scale
+      s[0] = image_size_[0] / factor_x / 200.f;
+      s[1] = image_size_[1] / factor_y / 200.f;
+
+      vector<int> crop_rect = {src_roi[1], src_roi[0], src_roi[1] + src_roi[3] - 1,
+                               src_roi[0] + src_roi[2] - 1};
+      vector<int> target_size = {dst_roi[2], dst_roi[3]};
+      vector<int> pad_rect = {dst_roi[1], dst_roi[0], image_size_[1] - dst_roi[3] - dst_roi[1],
+                              image_size_[0] - dst_roi[2] - dst_roi[0]};
+      crop_resize_pad_.Apply(img, crop_rect, target_size, pad_rect, dst);
     }
 
-    data["img_shape"] = {1, image_size_[1], image_size_[0], dst.shape(3)};
     data["img"] = std::move(dst);
+    data["img_shape"] = {1, image_size_[1], image_size_[0], img.shape(3)};
     data["center"] = to_value(c);
     data["scale"] = to_value(s);
     MMDEPLOY_DEBUG("output: {}", data);
     return success();
   }
 
+  static std::array<int, 4> round(const std::array<float, 4>& a) {
+    return {
+        static_cast<int>(std::round(a[0])),
+        static_cast<int>(std::round(a[1])),
+        static_cast<int>(std::round(a[2])),
+        static_cast<int>(std::round(a[3])),
+    };
+  }
+
+  // xywh
+  template <typename T>
+  static std::array<T, 4> intersect(std::array<T, 4> a, std::array<T, 4> b) {
+    auto x1 = std::max(a[0], b[0]);
+    auto y1 = std::max(a[1], b[1]);
+    a[2] = std::min(a[0] + a[2], b[0] + b[2]) - x1;
+    a[3] = std::min(a[1] + a[3], b[1] + b[3]) - y1;
+    a[0] = x1;
+    a[1] = y1;
+    if (a[2] <= 0 || a[3] <= 0) {
+      a = {};
+    }
+    return a;
+  }
+
   void Box2cs(vector<float>& box, vector<float>& center, vector<float>& scale) {
     // bbox_xywh2cs
     float x = box[0];
@@ -99,71 +144,9 @@ class TopDownAffine : public transform::Transform {
     scale.push_back(h / 200 * 1.25);
   }
 
-  cv::Mat GetWarpMatrix(float theta, cv::Size2f size_input, cv::Size2f size_dst,
-                        cv::Size2f size_target) {
-    theta = theta * 3.1415926 / 180;
-    float scale_x = size_dst.width / size_target.width;
-    float scale_y = size_dst.height / size_target.height;
-    cv::Mat matrix = cv::Mat(2, 3, CV_32F);
-    matrix.at<float>(0, 0) = std::cos(theta) * scale_x;
-    matrix.at<float>(0, 1) = -std::sin(theta) * scale_x;
-    matrix.at<float>(0, 2) =
-        scale_x * (-0.5f * size_input.width * std::cos(theta) +
-                   0.5f * size_input.height * std::sin(theta) + 0.5f * size_target.width);
-    matrix.at<float>(1, 0) = std::sin(theta) * scale_y;
-    matrix.at<float>(1, 1) = std::cos(theta) * scale_y;
-    matrix.at<float>(1, 2) =
-        scale_y * (-0.5f * size_input.width * std::sin(theta) -
-                   0.5f * size_input.height * std::cos(theta) + 0.5f * size_target.height);
-    return matrix;
-  }
-
-  cv::Mat GetAffineTransform(cv::Point2f center, cv::Point2f scale, float rot, cv::Size output_size,
-                             cv::Point2f shift = {0.f, 0.f}, bool inv = false) {
-    cv::Point2f scale_tmp = scale * 200;
-    float src_w = scale_tmp.x;
-    int dst_w = output_size.width;
-    int dst_h = output_size.height;
-    float rot_rad = 3.1415926 * rot / 180;
-    cv::Point2f src_dir = rotate_point({0.f, src_w * -0.5f}, rot_rad);
-    cv::Point2f dst_dir = {0.f, dst_w * -0.5f};
-
-    cv::Point2f src_points[3];
-    src_points[0] = center + scale_tmp * shift;
-    src_points[1] = center + src_dir + scale_tmp * shift;
-    src_points[2] = Get3rdPoint(src_points[0], src_points[1]);
-
-    cv::Point2f dst_points[3];
-    dst_points[0] = {dst_w * 0.5f, dst_h * 0.5f};
-    dst_points[1] = dst_dir + cv::Point2f(dst_w * 0.5f, dst_h * 0.5f);
-    dst_points[2] = Get3rdPoint(dst_points[0], dst_points[1]);
-
-    cv::Mat trans = inv ? cv::getAffineTransform(dst_points, src_points)
-                        : cv::getAffineTransform(src_points, dst_points);
-    trans.convertTo(trans, CV_32F);
-    return trans;
-  }
-
-  cv::Point2f rotate_point(cv::Point2f pt, float angle_rad) {
-    float sn = std::sin(angle_rad);
-    float cs = std::cos(angle_rad);
-    float new_x = pt.x * cs - pt.y * sn;
-    float new_y = pt.x * sn + pt.y * cs;
-    return {new_x, new_y};
-  }
-
-  cv::Point2f Get3rdPoint(cv::Point2f a, cv::Point2f b) {
-    cv::Point2f direction = a - b;
-    cv::Point2f third_pt = b + cv::Point2f(-direction.y, direction.x);
-    return third_pt;
-  }
-
  protected:
-  operation::Managed<operation::WarpAffine> warp_affine_;
-  bool use_udp_{false};
   vector<int> image_size_;
-  std::string backend_;
-  Stream stream_;
+  ::mmdeploy::operation::Managed<::mmdeploy::operation::CropResizePad> crop_resize_pad_;
 };
 
 MMDEPLOY_REGISTER_TRANSFORM(TopDownAffine);
diff --git a/csrc/mmdeploy/operation/cpu/CMakeLists.txt b/csrc/mmdeploy/operation/cpu/CMakeLists.txt
index d1310baaef..fa6a1de56c 100644
--- a/csrc/mmdeploy/operation/cpu/CMakeLists.txt
+++ b/csrc/mmdeploy/operation/cpu/CMakeLists.txt
@@ -10,7 +10,8 @@ set(SRCS resize.cpp
         normalize.cpp
         crop.cpp
         flip.cpp
-        warp_affine.cpp)
+        warp_affine.cpp
+        crop_resize_pad.cpp)
 
 mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
 
diff --git a/csrc/mmdeploy/operation/cpu/crop_resize_pad.cpp b/csrc/mmdeploy/operation/cpu/crop_resize_pad.cpp
new file mode 100644
index 0000000000..ee0dea65ab
--- /dev/null
+++ b/csrc/mmdeploy/operation/cpu/crop_resize_pad.cpp
@@ -0,0 +1,25 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "mmdeploy/operation/vision.h"
+#include "mmdeploy/utils/opencv/opencv_utils.h"
+
+namespace mmdeploy::operation::cpu {
+
+class CropResizePadImpl : public CropResizePad {
+ public:
+  CropResizePadImpl() = default;
+
+  Result<void> apply(const Tensor &src, const std::vector<int> &crop_rect,
+                     const std::vector<int> &target_size, const std::vector<int> &pad_rect,
+                     Tensor &dst) override {
+    auto src_mat = mmdeploy::cpu::Tensor2CVMat(src);
+    auto dst_mat = mmdeploy::cpu::CropResizePad(src_mat, crop_rect, target_size, pad_rect);
+    dst = mmdeploy::cpu::CVMat2Tensor(dst_mat);
+    return success();
+  }
+};
+
+MMDEPLOY_REGISTER_FACTORY_FUNC(CropResizePad, (cpu, 0),
+                               []() { return std::make_unique<CropResizePadImpl>(); });
+
+}  // namespace mmdeploy::operation::cpu
diff --git a/csrc/mmdeploy/operation/cuda/CMakeLists.txt b/csrc/mmdeploy/operation/cuda/CMakeLists.txt
index 5e04f640bd..8322b9d2ad 100644
--- a/csrc/mmdeploy/operation/cuda/CMakeLists.txt
+++ b/csrc/mmdeploy/operation/cuda/CMakeLists.txt
@@ -18,7 +18,8 @@ set(SRCS resize.cpp
         crop.cpp
         crop.cu
         flip.cpp
-        warp_affine.cpp)
+        warp_affine.cpp
+        crop_resize_pad.cpp)
 
 mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
 
diff --git a/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp b/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp
new file mode 100644
index 0000000000..8bf3232d03
--- /dev/null
+++ b/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp
@@ -0,0 +1,88 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "mmdeploy/operation/vision.h"
+#include "ppl/cv/cuda/resize.h"
+
+namespace mmdeploy::operation::cuda {
+
+class CropResizePadImpl : public CropResizePad {
+ public:
+  CropResizePadImpl() = default;
+
+  Result<void> apply(const Tensor &src, const std::vector<int> &crop_rect,
+                     const std::vector<int> &target_size, const std::vector<int> &pad_rect,
+                     Tensor &dst) override {
+    auto cuda_stream = GetNative<cudaStream_t>(stream());
+
+    int width = target_size[0] + pad_rect[1] + pad_rect[3];
+    int height = target_size[1] + pad_rect[0] + pad_rect[2];
+
+    TensorDesc desc{device(), src.data_type(), {1, height, width, src.shape(3)}, src.name()};
+    Tensor dst_tensor(desc);
+    cudaMemsetAsync(dst_tensor.data<uint8_t>(), 0, dst_tensor.byte_size(), cuda_stream);
+
+    if (src.data_type() == DataType::kINT8) {
+      OUTCOME_TRY(
+          ResizeDispatch<uint8_t>(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream));
+    } else if (src.data_type() == DataType::kFLOAT) {
+      OUTCOME_TRY(
+          ResizeDispatch<float>(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream));
+    } else {
+      MMDEPLOY_ERROR("unsupported data type {}", src.data_type());
+      return Status(eNotSupported);
+    }
+
+    dst = std::move(dst_tensor);
+    return success();
+  }
+
+ private:
+  template <typename T>
+  auto Select(int channels) -> decltype(&ppl::cv::cuda::Resize<T, 1>) {
+    switch (channels) {
+      case 1:
+        return &ppl::cv::cuda::Resize<T, 1>;
+      case 3:
+        return &ppl::cv::cuda::Resize<T, 3>;
+      case 4:
+        return &ppl::cv::cuda::Resize<T, 4>;
+      default:
+        MMDEPLOY_ERROR("unsupported channels {}", channels);
+        return nullptr;
+    }
+  }
+
+  template <class T>
+  Result<void> ResizeDispatch(const Tensor &src, const std::vector<int> &crop_rect,
+                              const std::vector<int> &target_size, const std::vector<int> &pad_rect,
+                              Tensor &dst, cudaStream_t stream) {
+    int in_height = crop_rect[2] - crop_rect[0] + 1;
+    int in_width = crop_rect[3] - crop_rect[1] + 1;
+    int in_width_stride = src.shape(2) * src.shape(3);
+    int in_offset = crop_rect[0] * in_width_stride + crop_rect[1] * src.shape(3);
+    int out_h = target_size[1];
+    int out_w = target_size[0];
+    int out_width_stride = dst.shape(2) * dst.shape(3);
+    int out_offset = pad_rect[0] * out_width_stride + pad_rect[1] * dst.shape(3);
+    auto interp = ppl::cv::InterpolationType::INTERPOLATION_LINEAR;
+
+    auto input = src.data<T>();
+    auto output = dst.data<T>();
+
+    ppl::common::RetCode ret = 0;
+
+    if (auto resize = Select<T>(src.shape(3)); resize) {
+      ret = resize(stream, in_height, in_width, in_width_stride, input + in_offset, out_h, out_w,
+                   out_width_stride, output + out_offset, interp);
+    } else {
+      return Status(eNotSupported);
+    }
+
+    return ret == 0 ? success() : Result<void>(Status(eFail));
+  }
+};
+
+MMDEPLOY_REGISTER_FACTORY_FUNC(CropResizePad, (cuda, 0),
+                               []() { return std::make_unique<CropResizePadImpl>(); });
+
+}  // namespace mmdeploy::operation::cuda
diff --git a/csrc/mmdeploy/operation/vision.cpp b/csrc/mmdeploy/operation/vision.cpp
index 35076e2bdb..18694f06a5 100644
--- a/csrc/mmdeploy/operation/vision.cpp
+++ b/csrc/mmdeploy/operation/vision.cpp
@@ -13,5 +13,6 @@ MMDEPLOY_DEFINE_REGISTRY(Normalize);
 MMDEPLOY_DEFINE_REGISTRY(Crop);
 MMDEPLOY_DEFINE_REGISTRY(Flip);
 MMDEPLOY_DEFINE_REGISTRY(WarpAffine);
+MMDEPLOY_DEFINE_REGISTRY(CropResizePad);
 
 }  // namespace mmdeploy::operation
diff --git a/csrc/mmdeploy/operation/vision.h b/csrc/mmdeploy/operation/vision.h
index 9b65dbaaac..10e699fed4 100644
--- a/csrc/mmdeploy/operation/vision.h
+++ b/csrc/mmdeploy/operation/vision.h
@@ -84,6 +84,15 @@ class WarpAffine : public Operation {
 };
 MMDEPLOY_DECLARE_REGISTRY(WarpAffine, unique_ptr<WarpAffine>(const string_view& interp));
 
+class CropResizePad : public Operation {
+ public:
+  virtual Result<void> apply(const Tensor& src, const std::vector<int>& crop_rect,
+                             const std::vector<int>& target_size, const std::vector<int>& pad_rect,
+                             Tensor& dst) = 0;
+};
+
+MMDEPLOY_DECLARE_REGISTRY(CropResizePad, unique_ptr<CropResizePad>());
+
 }  // namespace mmdeploy::operation
 
 #endif  // MMDEPLOY_CSRC_MMDEPLOY_PREPROCESS_OPERATION_RESIZE_H_
diff --git a/csrc/mmdeploy/utils/opencv/opencv_utils.cpp b/csrc/mmdeploy/utils/opencv/opencv_utils.cpp
index d410d5dcc8..17a87bbf9b 100644
--- a/csrc/mmdeploy/utils/opencv/opencv_utils.cpp
+++ b/csrc/mmdeploy/utils/opencv/opencv_utils.cpp
@@ -290,6 +290,18 @@ cv::Mat Pad(const cv::Mat& src, int top, int left, int bottom, int right, int bo
   return dst;
 }
 
+cv::Mat CropResizePad(const cv::Mat& src, const std::vector<int>& crop_rect,
+                      const std::vector<int>& target_size, const std::vector<int>& pad_rect) {
+  int width = target_size[0] + pad_rect[1] + pad_rect[3];
+  int height = target_size[1] + pad_rect[0] + pad_rect[2];
+  cv::Mat dst = cv::Mat::zeros(height, width, src.type());
+  cv::Rect roi1 = {crop_rect[1], crop_rect[0], crop_rect[3] - crop_rect[1] + 1,
+                   crop_rect[2] - crop_rect[0] + 1};
+  cv::Rect roi2 = {pad_rect[1], pad_rect[0], target_size[0], target_size[1]};
+  cv::resize(src(roi1), dst(roi2), {target_size[0], target_size[1]});
+  return dst;
+}
+
 bool Compare(const cv::Mat& src1, const cv::Mat& src2, float threshold) {
   cv::Mat _src1, _src2, diff;
   src1.convertTo(_src1, CV_32FC(src1.channels()));
diff --git a/csrc/mmdeploy/utils/opencv/opencv_utils.h b/csrc/mmdeploy/utils/opencv/opencv_utils.h
index 0c9646466d..9dd268e651 100644
--- a/csrc/mmdeploy/utils/opencv/opencv_utils.h
+++ b/csrc/mmdeploy/utils/opencv/opencv_utils.h
@@ -91,6 +91,16 @@ MMDEPLOY_API cv::Mat CvtColor(const cv::Mat& src, PixelFormat src_format, PixelF
 MMDEPLOY_API cv::Mat Pad(const cv::Mat& src, int top, int left, int bottom, int right,
                          int border_type, float val);
 
+/**
+ * @param src
+ * @param crop_rect t, l, b, r
+ * @param target_size w, h
+ * @param pad_rect t, l, b, r
+ */
+MMDEPLOY_API cv::Mat CropResizePad(const cv::Mat& src, const std::vector<int>& crop_rect,
+                                   const std::vector<int>& target_size,
+                                   const std::vector<int>& pad_rect);
+
 /**
  * @brief compare two images
  *
diff --git a/demo/csrc/c/det_pose.cpp b/demo/csrc/c/det_pose.cpp
index 30f80f1a77..a12a33ef7b 100644
--- a/demo/csrc/c/det_pose.cpp
+++ b/demo/csrc/c/det_pose.cpp
@@ -65,8 +65,7 @@ class AddBboxField {
       auto _box = from_value<std::vector<float>>(dets["bbox"]);
       rect = cv::Rect(cv::Rect2f(cv::Point2f(_box[0], _box[1]), cv::Point2f(_box[2], _box[3])));
     }
-    return Value{
-        {"ori_img", _img}, {"bbox", {rect.x, rect.y, rect.width, rect.height}}, {"rotation", 0.f}};
+    return Value{{"ori_img", _img}, {"bbox", {rect.x, rect.y, rect.width, rect.height}}};
   }
 };
 

From d91ba2f3c8f5ba1cba683c76e9a75836e6492204 Mon Sep 17 00:00:00 2001
From: Li Zhang <lzhang329@gmail.com>
Date: Fri, 20 Jan 2023 00:04:42 +0800
Subject: [PATCH 2/3] fix 'cvtcolor' error in the preprocessing of single
 channel images (#1666)

(cherry picked from commit 8bb3fcc6d856857dc8341fb4c7de42f6b0c23ab0)
---
 csrc/mmdeploy/operation/cuda/cvtcolor.cpp   | 9 +++++----
 csrc/mmdeploy/preprocess/transform/load.cpp | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/csrc/mmdeploy/operation/cuda/cvtcolor.cpp b/csrc/mmdeploy/operation/cuda/cvtcolor.cpp
index eaff21b1a5..1e35adc06b 100644
--- a/csrc/mmdeploy/operation/cuda/cvtcolor.cpp
+++ b/csrc/mmdeploy/operation/cuda/cvtcolor.cpp
@@ -90,10 +90,11 @@ class CvtColorImpl : public CvtColor {
 
     auto height = src.height();
     auto width = src.width();
-    auto channels = src.channel();
-    auto stride = width * channels;
+    auto src_channels = src.channel();
+    auto src_stride = width * src_channels;
 
     Mat dst_mat(height, width, dst_fmt, src.type(), device());
+    auto dst_stride = width * dst_mat.channel();
 
     auto convert = [&](auto type) -> Result<void> {
       using T = typename decltype(type)::type;
@@ -101,8 +102,8 @@ class CvtColorImpl : public CvtColor {
       if (!converter) {
         return Status(eNotSupported);
       }
-      auto ret =
-          converter(cuda_stream, height, width, stride, src.data<T>(), stride, dst_mat.data<T>());
+      auto ret = converter(cuda_stream, height, width, src_stride, src.data<T>(), dst_stride,
+                           dst_mat.data<T>());
       if (ret != ppl::common::RC_SUCCESS) {
         return Status(eFail);
       }
diff --git a/csrc/mmdeploy/preprocess/transform/load.cpp b/csrc/mmdeploy/preprocess/transform/load.cpp
index 57879d5b4b..c386957222 100644
--- a/csrc/mmdeploy/preprocess/transform/load.cpp
+++ b/csrc/mmdeploy/preprocess/transform/load.cpp
@@ -61,7 +61,7 @@ class PrepareImage : public Transform {
     if (color_type_ == "color" || color_type_ == "color_ignore_orientation") {
       OUTCOME_TRY(cvt_color_.Apply(src_mat, dst_mat, PixelFormat::kBGR));
     } else {
-      OUTCOME_TRY(cvt_color_.Apply(dst_mat, dst_mat, PixelFormat::kGRAYSCALE));
+      OUTCOME_TRY(cvt_color_.Apply(src_mat, dst_mat, PixelFormat::kGRAYSCALE));
     }
     auto tensor = to_tensor(dst_mat);
     if (to_float32_) {

From 7d9696914996d1543428a16dbb2c11a9842e7540 Mon Sep 17 00:00:00 2001
From: Li Zhang <lzhang329@gmail.com>
Date: Tue, 31 Jan 2023 11:24:24 +0800
Subject: [PATCH 3/3] [Feature] Pose tracker C/C++/Python API&demos (#1663)

* add PoseTracker API

* add mahalanobis distance, add det_pose demo

* simplify api

* simplify api

* fix cmake & fix `CropResizePad`

* ignore out of frame bboxes

* clean-up

* fix lint

* add c api docs

* add c++ api docs/comments

* fix gcc7 build

* fix gcc7+opencv3

* fix stupid lint

* fix ci

* add help info & webcam support for C++ pose tracker demo

* add webcam support for Python pose tracker demo

* fix lint

* minor

* minor

* fix MSVC build

* fix python binding

* simplify module adapter

* fix module adapter

* minor fix

(cherry picked from commit 3d425bbb9ffd652debb3ebb842c5048ec59051f8)
---
 csrc/mmdeploy/apis/c/CMakeLists.txt           |    4 +
 csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp  |    8 +-
 csrc/mmdeploy/apis/c/mmdeploy/classifier.h    |    6 +-
 csrc/mmdeploy/apis/c/mmdeploy/common.cpp      |    6 +-
 .../apis/c/mmdeploy/common_internal.h         |    8 +-
 csrc/mmdeploy/apis/c/mmdeploy/detector.cpp    |    2 +-
 csrc/mmdeploy/apis/c/mmdeploy/detector.h      |    6 +-
 csrc/mmdeploy/apis/c/mmdeploy/executor.cpp    |    8 +-
 csrc/mmdeploy/apis/c/mmdeploy/executor.h      |    2 +-
 .../apis/c/mmdeploy/executor_internal.h       |    2 +-
 csrc/mmdeploy/apis/c/mmdeploy/model.cpp       |    4 +-
 csrc/mmdeploy/apis/c/mmdeploy/model.h         |    2 +-
 csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp    |    8 +-
 csrc/mmdeploy/apis/c/mmdeploy/pipeline.h      |    4 +-
 .../apis/c/mmdeploy/pose_detector.cpp         |    8 +-
 csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h |    6 +-
 .../mmdeploy/apis/c/mmdeploy/pose_tracker.cpp |  225 ++++
 csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h  |  158 +++
 csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp    |   10 +-
 csrc/mmdeploy/apis/c/mmdeploy/restorer.h      |    6 +-
 .../apis/c/mmdeploy/rotated_detector.cpp      |    8 +-
 .../apis/c/mmdeploy/rotated_detector.h        |    6 +-
 csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp   |    8 +-
 csrc/mmdeploy/apis/c/mmdeploy/segmentor.h     |    6 +-
 .../apis/c/mmdeploy/text_detector.cpp         |   10 +-
 csrc/mmdeploy/apis/c/mmdeploy/text_detector.h |    6 +-
 .../apis/c/mmdeploy/text_recognizer.cpp       |   10 +-
 .../apis/c/mmdeploy/text_recognizer.h         |    6 +-
 .../apis/c/mmdeploy/video_recognizer.cpp      |   10 +-
 .../apis/c/mmdeploy/video_recognizer.h        |    6 +-
 csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp    |   38 +
 .../apis/cxx/mmdeploy/pose_tracker.hpp        |  151 +++
 csrc/mmdeploy/apis/python/pose_tracker.cpp    |  149 +++
 csrc/mmdeploy/codebase/mmpose/CMakeLists.txt  |    9 +-
 .../codebase/mmpose/pose_tracker/common.h     |   57 +
 .../codebase/mmpose/pose_tracker/pipeline.cpp |  117 ++
 .../mmpose/pose_tracker/pose_tracker.cpp      |  399 ++++++
 .../mmpose/pose_tracker/pose_tracker.h        |  101 ++
 .../mmpose/pose_tracker/smoothing_filter.cpp  |   49 +
 .../mmpose/pose_tracker/smoothing_filter.h    |   58 +
 .../codebase/mmpose/pose_tracker/track.cpp    |   70 +
 .../codebase/mmpose/pose_tracker/track.h      |   65 +
 .../mmpose/pose_tracker/tracking_filter.cpp   |  199 +++
 .../mmpose/pose_tracker/tracking_filter.h     |   50 +
 .../codebase/mmpose/pose_tracker/utils.cpp    |  136 ++
 .../codebase/mmpose/pose_tracker/utils.h      |   96 ++
 csrc/mmdeploy/experimental/module_adapter.h   |   91 +-
 csrc/mmdeploy/graph/inference.cpp             |    2 +-
 .../operation/cuda/crop_resize_pad.cpp        |   20 +-
 csrc/mmdeploy/utils/opencv/opencv_utils.cpp   |   10 +-
 demo/csrc/CMakeLists.txt                      |    4 +-
 demo/csrc/cpp/det_pose.cpp                    |  113 ++
 demo/csrc/cpp/pose_tracker.cpp                | 1144 ++---------------
 demo/python/det_pose.py                       |   85 ++
 demo/python/pose_tracker.py                   |   98 ++
 55 files changed, 2696 insertions(+), 1174 deletions(-)
 create mode 100644 csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.cpp
 create mode 100644 csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h
 create mode 100644 csrc/mmdeploy/apis/cxx/mmdeploy/pose_tracker.hpp
 create mode 100644 csrc/mmdeploy/apis/python/pose_tracker.cpp
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/common.h
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/pipeline.cpp
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.cpp
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.h
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.cpp
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.h
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/track.cpp
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/track.h
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.cpp
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.h
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.cpp
 create mode 100644 csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.h
 create mode 100644 demo/csrc/cpp/det_pose.cpp
 create mode 100644 demo/python/det_pose.py
 create mode 100644 demo/python/pose_tracker.py

diff --git a/csrc/mmdeploy/apis/c/CMakeLists.txt b/csrc/mmdeploy/apis/c/CMakeLists.txt
index a9dd03c06a..1d0533aff4 100644
--- a/csrc/mmdeploy/apis/c/CMakeLists.txt
+++ b/csrc/mmdeploy/apis/c/CMakeLists.txt
@@ -12,6 +12,9 @@ macro(add_object name)
         target_compile_options(${name} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-fvisibility=hidden>)
     endif ()
     target_link_libraries(${name} PRIVATE mmdeploy::core)
+    target_include_directories(${name} PUBLIC
+            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+            $<INSTALL_INTERFACE:include>)
     set(CAPI_OBJS ${CAPI_OBJS} ${name})
     mmdeploy_export(${name})
 endmacro()
@@ -31,6 +34,7 @@ foreach (TASK ${COMMON_LIST})
     mmdeploy_add_library(${TARGET_NAME})
     target_link_libraries(${TARGET_NAME} PRIVATE ${OBJECT_NAME})
     target_include_directories(${TARGET_NAME} PUBLIC
+            $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
             $<INSTALL_INTERFACE:include>)
     install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/mmdeploy/${TASK}.h
             DESTINATION include/mmdeploy)
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp b/csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp
index 42168ad547..6b177c9b47 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/classifier.cpp
@@ -1,17 +1,17 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "classifier.h"
+#include "mmdeploy/classifier.h"
 
 #include <numeric>
 
-#include "common_internal.h"
-#include "handle.h"
 #include "mmdeploy/archive/value_archive.h"
 #include "mmdeploy/codebase/mmcls/mmcls.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/graph.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "pipeline.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/pipeline.h"
 
 using namespace mmdeploy;
 using namespace std;
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/classifier.h b/csrc/mmdeploy/apis/c/mmdeploy/classifier.h
index 0b16a68072..54e9d0215b 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/classifier.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/classifier.h
@@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_CLASSIFIER_H
 #define MMDEPLOY_CLASSIFIER_H
 
-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/common.cpp b/csrc/mmdeploy/apis/c/mmdeploy/common.cpp
index 564ba5dd0d..1936be3353 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/common.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/common.cpp
@@ -1,9 +1,9 @@
-#include "common.h"
+#include "mmdeploy/common.h"
 
-#include "common_internal.h"
-#include "executor_internal.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/profiler.h"
+#include "mmdeploy/executor_internal.h"
 
 mmdeploy_value_t mmdeploy_value_copy(mmdeploy_value_t value) {
   if (!value) {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/common_internal.h b/csrc/mmdeploy/apis/c/mmdeploy/common_internal.h
index c7d22268de..a1ddecb54d 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/common_internal.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/common_internal.h
@@ -3,12 +3,12 @@
 #ifndef MMDEPLOY_CSRC_APIS_C_COMMON_INTERNAL_H_
 #define MMDEPLOY_CSRC_APIS_C_COMMON_INTERNAL_H_
 
-#include "common.h"
-#include "handle.h"
+#include "mmdeploy/common.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/value.h"
-#include "model.h"
-#include "pipeline.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/model.h"
+#include "mmdeploy/pipeline.h"
 
 using namespace mmdeploy;
 
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/detector.cpp b/csrc/mmdeploy/apis/c/mmdeploy/detector.cpp
index 9086f3c950..7267e2d178 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/detector.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/detector.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "detector.h"
+#include "mmdeploy/detector.h"
 
 #include <deque>
 #include <numeric>
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/detector.h b/csrc/mmdeploy/apis/c/mmdeploy/detector.h
index 5c6df5aed9..5c5ba2f356 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/detector.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/detector.h
@@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_DETECTOR_H
 #define MMDEPLOY_DETECTOR_H
 
-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/executor.cpp b/csrc/mmdeploy/apis/c/mmdeploy/executor.cpp
index aa1966d30b..2fdfb9091f 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/executor.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/executor.cpp
@@ -1,11 +1,11 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "executor.h"
+#include "mmdeploy/executor.h"
 
-#include "common.h"
-#include "common_internal.h"
-#include "executor_internal.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/execution/when_all_value.h"
+#include "mmdeploy/executor_internal.h"
 
 using namespace mmdeploy;
 
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/executor.h b/csrc/mmdeploy/apis/c/mmdeploy/executor.h
index ecc4bf22fc..a2c8ffa387 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/executor.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/executor.h
@@ -3,7 +3,7 @@
 #ifndef MMDEPLOY_CSRC_APIS_C_EXECUTOR_H_
 #define MMDEPLOY_CSRC_APIS_C_EXECUTOR_H_
 
-#include "common.h"
+#include "mmdeploy/common.h"
 
 #if __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/executor_internal.h b/csrc/mmdeploy/apis/c/mmdeploy/executor_internal.h
index c1a5c94526..95f39fe009 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/executor_internal.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/executor_internal.h
@@ -3,8 +3,8 @@
 #ifndef MMDEPLOY_CSRC_APIS_C_EXECUTOR_INTERNAL_H_
 #define MMDEPLOY_CSRC_APIS_C_EXECUTOR_INTERNAL_H_
 
-#include "executor.h"
 #include "mmdeploy/execution/schedulers/registry.h"
+#include "mmdeploy/executor.h"
 
 using namespace mmdeploy;
 
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/model.cpp b/csrc/mmdeploy/apis/c/mmdeploy/model.cpp
index ffde1d05f1..6d202bce81 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/model.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/model.cpp
@@ -1,11 +1,11 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
 // clang-format off
-#include "model.h"
+#include "mmdeploy/model.h"
 
 #include <memory>
 
-#include "common_internal.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/logger.h"
 #include "mmdeploy/core/model.h"
 // clang-format on
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/model.h b/csrc/mmdeploy/apis/c/mmdeploy/model.h
index 32d3ef3b9c..394d2902c2 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/model.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/model.h
@@ -8,7 +8,7 @@
 #ifndef MMDEPLOY_SRC_APIS_C_MODEL_H_
 #define MMDEPLOY_SRC_APIS_C_MODEL_H_
 
-#include "common.h"
+#include "mmdeploy/common.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp b/csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp
index 287896439e..5f7fb06e50 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pipeline.cpp
@@ -1,10 +1,10 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "pipeline.h"
+#include "mmdeploy/pipeline.h"
 
-#include "common_internal.h"
-#include "executor_internal.h"
-#include "handle.h"
+#include "mmdeploy/common_internal.h"
+#include "mmdeploy/executor_internal.h"
+#include "mmdeploy/handle.h"
 
 int mmdeploy_pipeline_create_v3(mmdeploy_value_t config, mmdeploy_context_t context,
                                 mmdeploy_pipeline_t* pipeline) {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pipeline.h b/csrc/mmdeploy/apis/c/mmdeploy/pipeline.h
index d517b68077..b159aad86e 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/pipeline.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pipeline.h
@@ -3,8 +3,8 @@
 #ifndef MMDEPLOY_CSRC_APIS_C_PIPELINE_H_
 #define MMDEPLOY_CSRC_APIS_C_PIPELINE_H_
 
-#include "common.h"
-#include "executor.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp
index e4fffd4655..6a7a95d794 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.cpp
@@ -1,17 +1,17 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "pose_detector.h"
+#include "mmdeploy/pose_detector.h"
 
 #include <numeric>
 
-#include "common_internal.h"
-#include "handle.h"
 #include "mmdeploy/codebase/mmpose/mmpose.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/graph.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "pipeline.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/pipeline.h"
 
 using namespace std;
 using namespace mmdeploy;
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h
index 9d467aa907..ff0987cee4 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_detector.h
@@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_SRC_APIS_C_POSE_DETECTOR_H_
 #define MMDEPLOY_SRC_APIS_C_POSE_DETECTOR_H_
 
-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.cpp b/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.cpp
new file mode 100644
index 0000000000..113b520c39
--- /dev/null
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.cpp
@@ -0,0 +1,225 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "mmdeploy/pose_tracker.h"
+
+#include "mmdeploy/archive/json_archive.h"
+#include "mmdeploy/archive/value_archive.h"
+#include "mmdeploy/codebase/mmpose/pose_tracker/common.h"
+#include "mmdeploy/common_internal.h"
+#include "mmdeploy/core/mpl/structure.h"
+#include "mmdeploy/pipeline.h"
+
+namespace mmdeploy {
+
+using namespace framework;
+
+}  // namespace mmdeploy
+
+using namespace mmdeploy;
+
+namespace {
+
+Value config_template() {
+  static const auto json = R"(
+{
+  "type": "Pipeline",
+  "input": ["img", "force_det", "state"],
+  "output": "targets",
+  "tasks": [
+    {
+      "type": "Task",
+      "name": "prepare",
+      "module": "pose_tracker::Prepare",
+      "input": ["img", "force_det", "state"],
+      "output": "use_det"
+    },
+    {
+      "type": "Task",
+      "module": "Transform",
+      "name": "preload",
+      "input": "img",
+      "output": "data",
+      "transforms": [ { "type": "LoadImageFromFile" } ]
+    },
+    {
+      "type": "Cond",
+      "name": "cond",
+      "input": ["use_det", "data"],
+      "output": "dets",
+      "body": {
+        "name": "detection",
+        "type": "Inference",
+        "params": { "model": "detection" }
+      }
+    },
+    {
+      "type": "Task",
+      "name": "process_bboxes",
+      "module": "pose_tracker::ProcessBboxes",
+      "input": ["dets", "data", "state"],
+      "output": ["rois", "track_ids"]
+    },
+    {
+      "input": "*rois",
+      "output": "*keypoints",
+      "name": "pose",
+      "type": "Inference",
+      "params": { "model": "pose" }
+    },
+    {
+      "type": "Task",
+      "name": "track_step",
+      "module": "pose_tracker::TrackStep",
+      "scheduler": "pool",
+      "input": ["keypoints", "track_ids", "state"],
+      "output": "targets"
+    }
+  ]
+}
+)"_json;
+  static const auto config = from_json<Value>(json);
+  return config;
+}
+
+}  // namespace
+
+int mmdeploy_pose_tracker_default_params(mmdeploy_pose_tracker_param_t* params) {
+  mmpose::_pose_tracker::SetDefaultParams(*params);
+  return 0;
+}
+
+int mmdeploy_pose_tracker_create(mmdeploy_model_t det_model, mmdeploy_model_t pose_model,
+                                 mmdeploy_context_t context, mmdeploy_pose_tracker_t* pipeline) {
+  mmdeploy_context_add(context, MMDEPLOY_TYPE_MODEL, "detection", det_model);
+  mmdeploy_context_add(context, MMDEPLOY_TYPE_MODEL, "pose", pose_model);
+  auto config = config_template();
+  return mmdeploy_pipeline_create_v3(Cast(&config), context, (mmdeploy_pipeline_t*)pipeline);
+}
+
+void mmdeploy_pose_tracker_destroy(mmdeploy_pose_tracker_t pipeline) {
+  mmdeploy_pipeline_destroy((mmdeploy_pipeline_t)pipeline);
+}
+
+int mmdeploy_pose_tracker_create_state(mmdeploy_pose_tracker_t pipeline,
+                                       const mmdeploy_pose_tracker_param_t* params,
+                                       mmdeploy_pose_tracker_state_t* state) {
+  try {
+    auto create_fn = gRegistry<Module>().Create("pose_tracker::Create", Value()).value();
+    *state = reinterpret_cast<mmdeploy_pose_tracker_state_t>(new Value(
+        create_fn->Process({const_cast<mmdeploy_pose_tracker_param_t*>(params)}).value()[0]));
+    return MMDEPLOY_SUCCESS;
+  } catch (const std::exception& e) {
+    MMDEPLOY_ERROR("unhandled exception: {}", e.what());
+  } catch (...) {
+    MMDEPLOY_ERROR("unknown exception caught");
+  }
+  return MMDEPLOY_E_FAIL;
+}
+
+void mmdeploy_pose_tracker_destroy_state(mmdeploy_pose_tracker_state_t state) {
+  delete reinterpret_cast<Value*>(state);
+}
+
+int mmdeploy_pose_tracker_create_input(mmdeploy_pose_tracker_state_t* states,
+                                       const mmdeploy_mat_t* frames, const int32_t* use_detect,
+                                       int batch_size, mmdeploy_value_t* value) {
+  try {
+    Value::Array images;
+    Value::Array use_dets;
+    Value::Array trackers;
+    for (int i = 0; i < batch_size; ++i) {
+      images.push_back({{"ori_img", Cast(frames[i])}});
+      use_dets.emplace_back(use_detect ? use_detect[i] : -1);
+      trackers.push_back(*reinterpret_cast<Value*>(states[i]));
+    }
+    *value = Take(Value{std::move(images), std::move(use_dets), std::move(trackers)});
+    return MMDEPLOY_SUCCESS;
+  } catch (const std::exception& e) {
+    MMDEPLOY_ERROR("unhandled exception: {}", e.what());
+  } catch (...) {
+    MMDEPLOY_ERROR("unknown exception caught");
+  }
+  return MMDEPLOY_E_FAIL;
+}
+
+using ResultType = mmdeploy::Structure<mmdeploy_pose_tracker_target_t, std::vector<int32_t>,
+                                       std::vector<mmpose::_pose_tracker::TrackerResult>>;
+
+int mmdeploy_pose_tracker_get_result(mmdeploy_value_t output,
+                                     mmdeploy_pose_tracker_target_t** results,
+                                     int32_t** result_count) {
+  if (!output || !results) {
+    return MMDEPLOY_E_INVALID_ARG;
+  }
+  try {
+    // convert result from Values
+    std::vector<mmpose::_pose_tracker::TrackerResult> res;
+    from_value(Cast(output)->front(), res);
+
+    size_t total = 0;
+    for (const auto& r : res) {
+      total += r.bboxes.size();
+    }
+
+    // preserve space for the output structure
+    ResultType result_type({total, 1, 1});
+    auto [result_data, result_cnt, result_holder] = result_type.pointers();
+
+    auto result_ptr = result_data;
+
+    result_holder->swap(res);
+
+    // build output structure
+    for (auto& r : *result_holder) {
+      for (int j = 0; j < r.bboxes.size(); ++j) {
+        auto& p = *result_ptr++;
+        p.keypoint_count = static_cast<int32_t>(r.keypoints[j].size());
+        p.keypoints = r.keypoints[j].data();
+        p.scores = r.scores[j].data();
+        p.bbox = r.bboxes[j];
+        p.target_id = r.track_ids[j];
+      }
+      result_cnt->push_back(r.bboxes.size());
+      // debug info
+      //  p.reserved0 = new std::vector(r.pose_input_bboxes);
+      //  p.reserved1 = new std::vector(r.pose_output_bboxes);
+    }
+
+    *results = result_data;
+    *result_count = result_cnt->data();
+    result_type.release();
+
+    return MMDEPLOY_SUCCESS;
+
+  } catch (const std::exception& e) {
+    MMDEPLOY_ERROR("unhandled exception: {}", e.what());
+  } catch (...) {
+    MMDEPLOY_ERROR("unknown exception caught");
+  }
+  return MMDEPLOY_E_FAIL;
+}
+
+int mmdeploy_pose_tracker_apply(mmdeploy_pose_tracker_t pipeline,
+                                mmdeploy_pose_tracker_state_t* states, const mmdeploy_mat_t* frames,
+                                const int32_t* use_detect, int32_t count,
+                                mmdeploy_pose_tracker_target_t** results, int32_t** result_count) {
+  wrapped<mmdeploy_value_t> input;
+  if (auto ec =
+          mmdeploy_pose_tracker_create_input(states, frames, use_detect, count, input.ptr())) {
+    return ec;
+  }
+  wrapped<mmdeploy_value_t> output;
+  if (auto ec = mmdeploy_pipeline_apply((mmdeploy_pipeline_t)pipeline, input, output.ptr())) {
+    return ec;
+  }
+  if (auto ec = mmdeploy_pose_tracker_get_result(output, results, result_count)) {
+    return ec;
+  }
+  return MMDEPLOY_SUCCESS;
+}
+
+void mmdeploy_pose_tracker_release_result(mmdeploy_pose_tracker_target_t* results,
+                                          const int32_t* result_count, int count) {
+  auto total = std::accumulate(result_count, result_count + count, 0);
+  ResultType deleter({static_cast<size_t>(total), 1, 1}, results);
+}
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h b/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h
new file mode 100644
index 0000000000..f895af4150
--- /dev/null
+++ b/csrc/mmdeploy/apis/c/mmdeploy/pose_tracker.h
@@ -0,0 +1,158 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+/**
+ * @file pose_tracker.h
+ * @brief Pose tracker C API
+ */
+
+#ifndef MMDEPLOY_POSE_TRACKER_H
+#define MMDEPLOY_POSE_TRACKER_H
+
+#include "mmdeploy/common.h"
+#include "mmdeploy/detector.h"
+#include "mmdeploy/model.h"
+#include "mmdeploy/pose_detector.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct mmdeploy_pose_tracker* mmdeploy_pose_tracker_t;
+typedef struct mmdeploy_pose_tracker_state* mmdeploy_pose_tracker_state_t;
+
+typedef struct mmdeploy_pose_tracker_param_t {
+  // detection interval, default = 1
+  int32_t det_interval;
+  // detection label use for pose estimation, default = 0
+  int32_t det_label;
+  // detection score threshold, default = 0.5
+  float det_thr;
+  // detection minimum bbox size (compute as sqrt(area)), default = -1
+  float det_min_bbox_size;
+  // nms iou threshold for merging detected bboxes and bboxes from tracked targets, default = 0.7
+  float det_nms_thr;
+
+  // max number of bboxes used for pose estimation per frame, default = -1
+  int32_t pose_max_num_bboxes;
+  // threshold for visible key-points, default = 0.5
+  float pose_kpt_thr;
+  // min number of key-points for valid poses, default = -1
+  int32_t pose_min_keypoints;
+  // scale for expanding key-points to bbox, default = 1.25
+  float pose_bbox_scale;
+  // min pose bbox size, tracks with bbox size smaller than the threshold will be dropped,
+  // default = -1
+  float pose_min_bbox_size;
+  // nms oks/iou threshold for suppressing overlapped poses, useful when multiple pose estimations
+  // collapse to the same target, default = 0.5
+  float pose_nms_thr;
+  // keypoint sigmas for computing OKS, will use IOU if not set, default = nullptr
+  float* keypoint_sigmas;
+  // size of keypoint sigma array, must be consistent with the number of key-points, default = 0
+  int32_t keypoint_sigmas_size;
+
+  // iou threshold for associating missing tracks, default = 0.4
+  float track_iou_thr;
+  // max number of missing frames before a missing tracks is removed, default = 10
+  int32_t track_max_missing;
+  // track history size, default = 1
+  int32_t track_history_size;
+
+  // weight of position for setting covariance matrices of kalman filters, default = 0.05
+  float std_weight_position;
+  // weight of velocity for setting covariance matrices of kalman filters, default = 0.00625
+  float std_weight_velocity;
+
+  // params for the one-euro filter for smoothing the outputs - (beta, fc_min, fc_derivative)
+  // default = (0.007, 1, 1)
+  float smooth_params[3];
+} mmdeploy_pose_tracker_param_t;
+
+typedef struct mmdeploy_pose_tracker_target_t {
+  mmdeploy_point_t* keypoints;  // key-points of the target
+  int32_t keypoint_count;       // size of `keypoints` array
+  float* scores;                // scores of each key-point
+  mmdeploy_rect_t bbox;         // estimated bbox from key-points
+  uint32_t target_id;           // target id from internal tracker
+} mmdeploy_pose_tracker_target_t;
+
+/**
+ * @brief Fill params with default parameters
+ * @param[in,out] params
+ * @return status of the operation
+ */
+MMDEPLOY_API int mmdeploy_pose_tracker_default_params(mmdeploy_pose_tracker_param_t* params);
+
+/**
+ * @brief Create pose tracker pipeline
+ * @param[in] det_model detection model object, created by \ref mmdeploy_model_create
+ * @param[in] pose_model pose model object
+ * @param[in] context context object describing execution environment (device, profiler, etc...),
+ * created by \ref mmdeploy_context_create
+ * @param[out] pipeline handle of the created pipeline
+ * @return status of the operation
+ */
+MMDEPLOY_API int mmdeploy_pose_tracker_create(mmdeploy_model_t det_model,
+                                              mmdeploy_model_t pose_model,
+                                              mmdeploy_context_t context,
+                                              mmdeploy_pose_tracker_t* pipeline);
+
+/**
+ * @brief Destroy pose tracker pipeline
+ * @param[in] pipeline
+ */
+MMDEPLOY_API void mmdeploy_pose_tracker_destroy(mmdeploy_pose_tracker_t pipeline);
+
+/**
+ * @brief Create a tracker state handle corresponds to a video stream
+ * @param[in] pipeline handle of a pose tracker pipeline
+ * @param[in] params params for creating the tracker state
+ * @param[out] state handle of the created tracker state
+ * @return status of the operation
+ */
+MMDEPLOY_API int mmdeploy_pose_tracker_create_state(mmdeploy_pose_tracker_t pipeline,
+                                                    const mmdeploy_pose_tracker_param_t* params,
+                                                    mmdeploy_pose_tracker_state_t* state);
+
+/**
+ * @brief Destroy tracker state
+ * @param[in] state handle of the tracker state
+ */
+MMDEPLOY_API void mmdeploy_pose_tracker_destroy_state(mmdeploy_pose_tracker_state_t state);
+
+/**
+ * @brief Apply pose tracker pipeline, notice that this function supports batch operation by feeding
+ * arrays of size \p count to \p states, \p frames and \p use_detect
+ * @param[in] pipeline handle of a pose tracker pipeline
+ * @param[in] states tracker states handles, array of size \p count
+ * @param[in] frames input frames of size \p count
+ * @param[in] use_detect control the use of detector, array of size \p count
+ *   -1: use params.det_interval, 0: don't use detector, 1: force use detector
+ * @param[in] count batch size
+ * @param[out] results a linear buffer contains the tracked targets of input frames. Should be
+ * released by \ref mmdeploy_pose_tracker_release_result
+ * @param[out] result_count a linear buffer of size \p count contains the number of tracked
+ * targets of the frames. Should be released by \ref mmdeploy_pose_tracker_release_result
+ * @return status of the operation
+ */
+MMDEPLOY_API int mmdeploy_pose_tracker_apply(mmdeploy_pose_tracker_t pipeline,
+                                             mmdeploy_pose_tracker_state_t* states,
+                                             const mmdeploy_mat_t* frames,
+                                             const int32_t* use_detect, int32_t count,
+                                             mmdeploy_pose_tracker_target_t** results,
+                                             int32_t** result_count);
+
+/**
+ * @brief Release result objects
+ * @param[in] results
+ * @param[in] result_count
+ * @param[in] count
+ */
+MMDEPLOY_API void mmdeploy_pose_tracker_release_result(mmdeploy_pose_tracker_target_t* results,
+                                                       const int32_t* result_count, int count);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MMDEPLOY_POSE_TRACKER_H
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp b/csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp
index 14b5075006..e5b8391d6c 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/restorer.cpp
@@ -1,16 +1,16 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "restorer.h"
+#include "mmdeploy/restorer.h"
 
-#include "common_internal.h"
-#include "executor_internal.h"
-#include "handle.h"
 #include "mmdeploy/codebase/mmedit/mmedit.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/graph.h"
 #include "mmdeploy/core/mpl/structure.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "pipeline.h"
+#include "mmdeploy/executor_internal.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/pipeline.h"
 
 using namespace mmdeploy;
 
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/restorer.h b/csrc/mmdeploy/apis/c/mmdeploy/restorer.h
index b67c7b3d77..9ab529850f 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/restorer.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/restorer.h
@@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_SRC_APIS_C_RESTORER_H_
 #define MMDEPLOY_SRC_APIS_C_RESTORER_H_
 
-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.cpp b/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.cpp
index 278dcfa0fc..317b31c731 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.cpp
@@ -1,16 +1,16 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "rotated_detector.h"
+#include "mmdeploy/rotated_detector.h"
 
 #include <numeric>
 
-#include "common_internal.h"
-#include "handle.h"
 #include "mmdeploy/codebase/mmrotate/mmrotate.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/graph.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "pipeline.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/pipeline.h"
 
 using namespace std;
 using namespace mmdeploy;
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.h b/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.h
index 52e2674b34..35125a74ff 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/rotated_detector.h
@@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_SRC_APIS_C_ROTATED_DETECTOR_H_
 #define MMDEPLOY_SRC_APIS_C_ROTATED_DETECTOR_H_
 
-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp b/csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp
index c93445b0a0..313a37dc04 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/segmentor.cpp
@@ -1,17 +1,17 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "segmentor.h"
+#include "mmdeploy/segmentor.h"
 
-#include "common_internal.h"
-#include "handle.h"
 #include "mmdeploy/codebase/mmseg/mmseg.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/graph.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/mpl/structure.h"
 #include "mmdeploy/core/tensor.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "pipeline.h"
+#include "mmdeploy/handle.h"
+#include "mmdeploy/pipeline.h"
 
 using namespace std;
 using namespace mmdeploy;
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/segmentor.h b/csrc/mmdeploy/apis/c/mmdeploy/segmentor.h
index 70a0f1488c..7ae77a03f1 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/segmentor.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/segmentor.h
@@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_SEGMENTOR_H
 #define MMDEPLOY_SEGMENTOR_H
 
-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/text_detector.cpp b/csrc/mmdeploy/apis/c/mmdeploy/text_detector.cpp
index ae965860a5..2b68d98636 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/text_detector.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/text_detector.cpp
@@ -1,17 +1,17 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "text_detector.h"
+#include "mmdeploy/text_detector.h"
 
 #include <numeric>
 
-#include "common_internal.h"
-#include "executor_internal.h"
 #include "mmdeploy/codebase/mmocr/mmocr.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/model.h"
 #include "mmdeploy/core/status_code.h"
 #include "mmdeploy/core/utils/formatter.h"
-#include "model.h"
-#include "pipeline.h"
+#include "mmdeploy/executor_internal.h"
+#include "mmdeploy/model.h"
+#include "mmdeploy/pipeline.h"
 
 using namespace std;
 using namespace mmdeploy;
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/text_detector.h b/csrc/mmdeploy/apis/c/mmdeploy/text_detector.h
index ef3119bb20..a3c38dc6f6 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/text_detector.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/text_detector.h
@@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_TEXT_DETECTOR_H
 #define MMDEPLOY_TEXT_DETECTOR_H
 
-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.cpp b/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.cpp
index 4a29ddaa80..7644da9811 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.cpp
@@ -1,21 +1,21 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "text_recognizer.h"
+#include "mmdeploy/text_recognizer.h"
 
 #include <numeric>
 
-#include "common_internal.h"
-#include "executor_internal.h"
 #include "mmdeploy/archive/value_archive.h"
 #include "mmdeploy/codebase/mmocr/mmocr.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/model.h"
 #include "mmdeploy/core/status_code.h"
 #include "mmdeploy/core/utils/formatter.h"
 #include "mmdeploy/core/value.h"
-#include "model.h"
-#include "pipeline.h"
+#include "mmdeploy/executor_internal.h"
+#include "mmdeploy/model.h"
+#include "mmdeploy/pipeline.h"
 
 using namespace mmdeploy;
 
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.h b/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.h
index 638dda00ea..6c18928242 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/text_recognizer.h
@@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_SRC_APIS_C_TEXT_RECOGNIZER_H_
 #define MMDEPLOY_SRC_APIS_C_TEXT_RECOGNIZER_H_
 
-#include "common.h"
-#include "executor.h"
-#include "text_detector.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/text_detector.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.cpp b/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.cpp
index 60d8b5c832..3e16480d91 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.cpp
+++ b/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.cpp
@@ -1,22 +1,22 @@
 // Copyright (c) OpenMMLab. All rights reserved.
 
-#include "video_recognizer.h"
+#include "mmdeploy/video_recognizer.h"
 
 #include <numeric>
 #include <vector>
 
-#include "common_internal.h"
-#include "executor_internal.h"
 #include "mmdeploy/archive/value_archive.h"
 #include "mmdeploy/codebase/mmaction/mmaction.h"
+#include "mmdeploy/common_internal.h"
 #include "mmdeploy/core/device.h"
 #include "mmdeploy/core/mat.h"
 #include "mmdeploy/core/model.h"
 #include "mmdeploy/core/status_code.h"
 #include "mmdeploy/core/utils/formatter.h"
 #include "mmdeploy/core/value.h"
-#include "model.h"
-#include "pipeline.h"
+#include "mmdeploy/executor_internal.h"
+#include "mmdeploy/model.h"
+#include "mmdeploy/pipeline.h"
 
 using namespace mmdeploy;
 
diff --git a/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.h b/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.h
index 380da5d361..e98b2bd07e 100644
--- a/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.h
+++ b/csrc/mmdeploy/apis/c/mmdeploy/video_recognizer.h
@@ -8,9 +8,9 @@
 #ifndef MMDEPLOY_VIDEO_RECOGNIZER_H
 #define MMDEPLOY_VIDEO_RECOGNIZER_H
 
-#include "common.h"
-#include "executor.h"
-#include "model.h"
+#include "mmdeploy/common.h"
+#include "mmdeploy/executor.h"
+#include "mmdeploy/model.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp b/csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp
index c0d920180e..221b78104d 100644
--- a/csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp
+++ b/csrc/mmdeploy/apis/cxx/mmdeploy/common.hpp
@@ -28,6 +28,30 @@ namespace cxx {
 
 using Rect = mmdeploy_rect_t;
 
+template <typename T>
+class UniqueHandle : public NonCopyable {
+ public:
+  UniqueHandle() = default;
+  explicit UniqueHandle(T handle) : handle_(handle) {}
+
+  // derived class must destroy the object and reset `handle_`
+  ~UniqueHandle() { assert(handle_ == nullptr); }
+
+  UniqueHandle(UniqueHandle&& o) noexcept : handle_(std::exchange(o.handle_, nullptr)) {}
+  UniqueHandle& operator=(UniqueHandle&& o) noexcept {
+    if (this != &o) {
+      handle_ = std::exchange(o.handle_, nullptr);
+    }
+    return *this;
+  }
+
+  explicit operator T() const noexcept { return handle_; }
+  T operator->() const noexcept { return handle_; }
+
+ protected:
+  T handle_{};
+};
+
 class Model {
  public:
   explicit Model(const char* path) {
@@ -39,6 +63,8 @@ class Model {
     model_.reset(model, [](auto p) { mmdeploy_model_destroy(p); });
   }
 
+  explicit Model(const std::string& path) : Model(path.c_str()) {}
+
   Model(const void* buffer, size_t size) {
     mmdeploy_model_t model{};
     auto ec = mmdeploy_model_create(buffer, static_cast<int>(size), &model);
@@ -102,6 +128,8 @@ class Mat {
       mmdeploy_data_type_t type, uint8_t* data, mmdeploy_device_t device = nullptr)
       : desc_{data, height, width, channels, format, type, device} {}
 
+  Mat(const mmdeploy_mat_t& desc) : desc_(desc) {}  // NOLINT
+
   const mmdeploy_mat_t& desc() const noexcept { return desc_; }
 
 #if MMDEPLOY_CXX_USE_OPENCV
@@ -146,6 +174,16 @@ class Mat {
 template <typename T>
 class Result_ {
  public:
+  using value_type = T;
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
+  using reference = T&;
+  using const_reference = const T&;
+  using pointer = T*;
+  using const_pointer = const T*;
+  using iterator = T*;
+  using const_iterator = T*;
+
   Result_(size_t offset, size_t size, std::shared_ptr<T> data)
       : offset_(offset), size_(size), data_(std::move(data)) {}
 
diff --git a/csrc/mmdeploy/apis/cxx/mmdeploy/pose_tracker.hpp b/csrc/mmdeploy/apis/cxx/mmdeploy/pose_tracker.hpp
new file mode 100644
index 0000000000..077ec75700
--- /dev/null
+++ b/csrc/mmdeploy/apis/cxx/mmdeploy/pose_tracker.hpp
@@ -0,0 +1,151 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_POSE_TRACKER_HPP
+#define MMDEPLOY_POSE_TRACKER_HPP
+
+#include "mmdeploy/common.hpp"
+#include "mmdeploy/pose_tracker.h"
+
+namespace mmdeploy {
+
+namespace cxx {
+
+class PoseTracker : public UniqueHandle<mmdeploy_pose_tracker_t> {
+ public:
+  using Result = Result_<mmdeploy_pose_tracker_target_t>;
+  class State;
+  class Params;
+
+ public:
+  /**
+   * @brief Create pose tracker pipeline
+   * @param detect object detection model
+   * @param pose pose estimation model
+   * @param context execution context
+   */
+  PoseTracker(const Model& detect, const Model& pose, const Context& context) {
+    auto ec = mmdeploy_pose_tracker_create(detect, pose, context, &handle_);
+    if (ec != MMDEPLOY_SUCCESS) {
+      throw_exception(static_cast<ErrorCode>(ec));
+    }
+  }
+  ~PoseTracker() {
+    if (handle_) {
+      mmdeploy_pose_tracker_destroy(handle_);
+      handle_ = {};
+    }
+  }
+  PoseTracker(PoseTracker&&) noexcept = default;
+
+  /**
+   * @brief Create a tracker state corresponds to a video stream
+   * @param params params for creating the tracker state
+   * @return created tracker state
+   */
+  State CreateState(const Params& params);
+
+  /**
+   * @brief Apply pose tracker pipeline
+   * @param state tracker state
+   * @param frame input video frame
+   * @param detect control the use of detector
+   *   -1: use params.det_interval, 0: don't use detector, 1: force use detector
+   * @return
+   */
+  Result Apply(State& state, const Mat& frame, int detect = -1);
+
+  /**
+   * @brief batched version of Apply
+   * @param states
+   * @param frames
+   * @param detects
+   * @return
+   */
+  std::vector<Result> Apply(const Span<State>& states, const Span<const Mat>& frames,
+                            const Span<const int>& detects = {});
+
+ public:
+  /**
+   * see \ref mmdeploy/pose_tracker.h for detail
+   */
+  class Params : public UniqueHandle<mmdeploy_pose_tracker_param_t*> {
+   public:
+    explicit Params() {
+      handle_ = new mmdeploy_pose_tracker_param_t{};
+      mmdeploy_pose_tracker_default_params(handle_);
+    }
+    ~Params() {
+      if (handle_) {
+        delete handle_;
+        handle_ = {};
+      }
+    }
+  };
+
+  class State : public UniqueHandle<mmdeploy_pose_tracker_state_t> {
+   public:
+    explicit State(mmdeploy_pose_tracker_t pipeline, const mmdeploy_pose_tracker_param_t* params) {
+      auto ec = mmdeploy_pose_tracker_create_state(pipeline, params, &handle_);
+      if (ec != MMDEPLOY_SUCCESS) {
+        throw_exception(static_cast<ErrorCode>(ec));
+      }
+    }
+    ~State() {
+      if (handle_) {
+        mmdeploy_pose_tracker_destroy_state(handle_);
+        handle_ = {};
+      }
+    }
+    State(State&&) noexcept = default;
+  };
+};
+
+inline PoseTracker::State PoseTracker::CreateState(const PoseTracker::Params& params) {
+  return State(handle_, static_cast<mmdeploy_pose_tracker_param_t*>(params));
+}
+
+inline std::vector<PoseTracker::Result> PoseTracker::Apply(const Span<State>& states,
+                                                           const Span<const Mat>& frames,
+                                                           const Span<const int32_t>& detects) {
+  if (frames.empty()) {
+    return {};
+  }
+  mmdeploy_pose_tracker_target_t* results{};
+  int32_t* result_count{};
+
+  auto ec = mmdeploy_pose_tracker_apply(
+      handle_, reinterpret_cast<mmdeploy_pose_tracker_state_t*>(states.data()),
+      reinterpret(frames.data()), detects.data(), static_cast<int32_t>(frames.size()), &results,
+      &result_count);
+  if (ec != MMDEPLOY_SUCCESS) {
+    throw_exception(static_cast<ErrorCode>(ec));
+  }
+
+  std::shared_ptr<mmdeploy_pose_tracker_target_t> data(
+      results, [result_count, count = frames.size()](auto p) {
+        mmdeploy_pose_tracker_release_result(p, result_count, count);
+      });
+
+  std::vector<Result> rets;
+  rets.reserve(frames.size());
+
+  size_t offset = 0;
+  for (size_t i = 0; i < frames.size(); ++i) {
+    offset += rets.emplace_back(offset, result_count[i], data).size();
+  }
+
+  return rets;
+}
+
+inline PoseTracker::Result PoseTracker::Apply(PoseTracker::State& state, const Mat& frame,
+                                              int32_t detect) {
+  return Apply(Span(&state, 1), Span{frame}, Span{detect})[0];
+}
+
+}  // namespace cxx
+
+using cxx::PoseTracker;
+
+}  // namespace mmdeploy
+
+#endif  // MMDEPLOY_POSE_TRACKER_HPP
diff --git a/csrc/mmdeploy/apis/python/pose_tracker.cpp b/csrc/mmdeploy/apis/python/pose_tracker.cpp
new file mode 100644
index 0000000000..fea6895ae8
--- /dev/null
+++ b/csrc/mmdeploy/apis/python/pose_tracker.cpp
@@ -0,0 +1,149 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "mmdeploy/pose_tracker.hpp"
+
+#include "common.h"
+#include "mmdeploy/common.hpp"
+
+namespace mmdeploy::python {
+
+namespace {
+
+std::vector<py::tuple> Apply(mmdeploy::PoseTracker* self,
+                             const std::vector<mmdeploy::PoseTracker::State*>& _states,
+                             const std::vector<PyImage>& _frames, std::vector<int> detect) {
+  std::vector<mmdeploy_pose_tracker_state_t> tmp;
+  for (const auto& s : _states) {
+    tmp.push_back(static_cast<mmdeploy_pose_tracker_state_t>(*s));
+  }
+  mmdeploy::Span states(reinterpret_cast<mmdeploy::PoseTracker::State*>(tmp.data()), tmp.size());
+  std::vector<mmdeploy::Mat> frames;
+  for (const auto& f : _frames) {
+    frames.emplace_back(GetMat(f));
+  }
+  if (detect.empty()) {
+    detect.resize(frames.size(), -1);
+  }
+  assert(states.size() == frames.size());
+  assert(states.size() == detect.size());
+  auto results = self->Apply(states, frames, detect);
+  std::vector<py::tuple> batch_ret;
+  batch_ret.reserve(frames.size());
+  for (const auto& rs : results) {
+    py::array_t<float> keypoints({static_cast<int>(rs.size()), rs[0].keypoint_count, 3});
+    py::array_t<float> bboxes({static_cast<int>(rs.size()), 4});
+    py::array_t<uint32_t> track_ids(static_cast<int>(rs.size()));
+    auto kpts_ptr = keypoints.mutable_data();
+    auto bbox_ptr = bboxes.mutable_data();
+    auto track_id_ptr = track_ids.mutable_data();
+    for (const auto& r : rs) {
+      for (int i = 0; i < r.keypoint_count; ++i) {
+        kpts_ptr[0] = r.keypoints[i].x;
+        kpts_ptr[1] = r.keypoints[i].y;
+        kpts_ptr[2] = r.scores[i];
+        kpts_ptr += 3;
+      }
+      {
+        auto tmp_bbox = (std::array<float, 4>&)r.bbox;
+        bbox_ptr[0] = tmp_bbox[0];
+        bbox_ptr[1] = tmp_bbox[1];
+        bbox_ptr[2] = tmp_bbox[2];
+        bbox_ptr[3] = tmp_bbox[3];
+        bbox_ptr += 4;
+      }
+      *track_id_ptr++ = r.target_id;
+    }
+    batch_ret.push_back(
+        py::make_tuple(std::move(keypoints), std::move(bboxes), std::move(track_ids)));
+  }
+  return batch_ret;
+}
+
+template <typename T, size_t N>
+void Copy(const py::handle& h, T (&a)[N]) {
+  auto array = h.cast<py::array_t<float>>();
+  assert(array.size() == N);
+  auto data = array.data();
+  for (int i = 0; i < N; ++i) {
+    a[i] = data[i];
+  }
+}
+
+void Parse(const py::dict& dict, PoseTracker::Params& params, py::array_t<float>& sigmas) {
+  for (const auto& [_name, value] : dict) {
+    auto name = _name.cast<std::string>();
+    if (name == "det_interval") {
+      params->det_interval = value.cast<int32_t>();
+    } else if (name == "det_label") {
+      params->det_label = value.cast<int32_t>();
+    } else if (name == "det_thr") {
+      params->det_thr = value.cast<float>();
+    } else if (name == "det_min_bbox_size") {
+      params->det_min_bbox_size = value.cast<float>();
+    } else if (name == "det_nms_thr") {
+      params->det_nms_thr = value.cast<float>();
+    } else if (name == "pose_max_num_bboxes") {
+      params->pose_max_num_bboxes = value.cast<int32_t>();
+    } else if (name == "pose_min_keypoints") {
+      params->pose_min_keypoints = value.cast<int32_t>();
+    } else if (name == "pose_min_bbox_size") {
+      params->pose_min_bbox_size = value.cast<float>();
+    } else if (name == "pose_nms_thr") {
+      params->pose_nms_thr = value.cast<float>();
+    } else if (name == "track_kpt_thr") {
+      params->pose_kpt_thr = value.cast<float>();
+    } else if (name == "track_iou_thr") {
+      params->track_iou_thr = value.cast<float>();
+    } else if (name == "pose_bbox_scale") {
+      params->pose_bbox_scale = value.cast<float>();
+    } else if (name == "track_max_missing") {
+      params->track_max_missing = value.cast<float>();
+    } else if (name == "track_history_size") {
+      params->track_history_size = value.cast<int32_t>();
+    } else if (name == "keypoint_sigmas") {
+      sigmas = value.cast<py::array_t<float>>();
+      params->keypoint_sigmas = const_cast<float*>(sigmas.data());
+      params->keypoint_sigmas_size = sigmas.size();
+    } else if (name == "std_weight_position") {
+      params->std_weight_position = value.cast<float>();
+    } else if (name == "std_weight_velocity") {
+      params->std_weight_velocity = value.cast<float>();
+    } else if (name == "smooth_params") {
+      Copy(value, params->smooth_params);
+    } else {
+      MMDEPLOY_ERROR("unused argument: {}", name);
+    }
+  }
+}
+
+}  // namespace
+
+static PythonBindingRegisterer register_pose_tracker{[](py::module& m) {
+  py::class_<mmdeploy::PoseTracker::State>(m, "PoseTracker.State");
+  py::class_<mmdeploy::PoseTracker>(m, "PoseTracker")
+      .def(py::init([](const char* det_model_path, const char* pose_model_path,
+                       const char* device_name, int device_id) {
+             return mmdeploy::PoseTracker(
+                 mmdeploy::Model(det_model_path), mmdeploy::Model(pose_model_path),
+                 mmdeploy::Context(mmdeploy::Device(device_name, device_id)));
+           }),
+           py::arg("det_model"), py::arg("pose_model"), py::arg("device_name"),
+           py::arg("device_id") = 0)
+      .def(
+          "__call__",
+          [](mmdeploy::PoseTracker* self, mmdeploy::PoseTracker::State* state, const PyImage& img,
+             int detect) { return Apply(self, {state}, {img}, {detect})[0]; },
+          py::arg("state"), py::arg("frame"), py::arg("detect") = -1)
+      .def("batch", &Apply, py::arg("states"), py::arg("frames"),
+           py::arg("detects") = std::vector<int>{})
+      .def("create_state", [](mmdeploy::PoseTracker* self, const py::kwargs& kwargs) {
+        PoseTracker::Params params;
+        py::array_t<float> sigmas;
+        if (kwargs) {
+          Parse(kwargs, params, sigmas);
+        }
+        return self->CreateState(params);
+      });
+}};
+
+}  // namespace mmdeploy::python
diff --git a/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt b/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt
index 86b244a662..4c165c0d10 100644
--- a/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt
+++ b/csrc/mmdeploy/codebase/mmpose/CMakeLists.txt
@@ -5,12 +5,17 @@ project(mmdeploy_mmpose)
 find_package(OpenCV REQUIRED)
 
 aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} MMPOSE_SRCS)
+aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/pose_tracker POSE_TRACKER_SRCS)
 
-mmdeploy_add_module(${PROJECT_NAME} ${MMPOSE_SRCS})
+mmdeploy_add_module(${PROJECT_NAME} ${MMPOSE_SRCS} ${POSE_TRACKER_SRCS})
 target_link_libraries(${PROJECT_NAME} PRIVATE
         mmdeploy::transform
         mmdeploy_operation
+        mmdeploy_opencv_utils
         ${OpenCV_LIBS})
+target_include_directories(${PROJECT_NAME} PRIVATE
+        ${CMAKE_CURRENT_SOURCE_DIR}
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../apis/c)
 add_library(mmdeploy::mmpose ALIAS ${PROJECT_NAME})
 
-set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} pose_detector CACHE INTERNAL "")
+set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} pose_detector pose_tracker CACHE INTERNAL "")
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/common.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/common.h
new file mode 100644
index 0000000000..ec24648148
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/common.h
@@ -0,0 +1,57 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_COMMON_H
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_COMMON_H
+
+#include <array>
+#include <vector>
+
+#include "mmdeploy/core/mpl/type_traits.h"
+#include "mmdeploy/pose_tracker.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+struct TrackerResult {
+  std::vector<std::vector<mmdeploy_point_t>> keypoints;
+  std::vector<std::vector<float>> scores;
+  std::vector<mmdeploy_rect_t> bboxes;
+  std::vector<uint32_t> track_ids;
+  // debug info
+  std::vector<std::array<float, 4>> pose_input_bboxes;
+  std::vector<std::array<float, 4>> pose_output_bboxes;
+};
+
+inline void SetDefaultParams(mmdeploy_pose_tracker_param_t& p) {
+  p.det_interval = 1;
+  p.det_label = 0;
+  p.det_min_bbox_size = -1;
+  p.det_thr = .5f;
+  p.det_nms_thr = .7f;
+  p.pose_max_num_bboxes = -1;
+  p.pose_min_keypoints = -1;
+  p.pose_min_bbox_size = 0;
+  p.pose_kpt_thr = .5f;
+  p.pose_nms_thr = .5f;
+  p.keypoint_sigmas = nullptr;
+  p.keypoint_sigmas_size = 0;
+  p.track_iou_thr = .4f;
+  p.pose_bbox_scale = 1.25f;
+  p.track_max_missing = 10;
+  p.track_history_size = 1;
+
+  p.std_weight_position = 1 / 20.f;
+  p.std_weight_velocity = 1 / 160.f;
+
+  (std::array<float, 3>&)p.smooth_params = {0.007, 1., 1.};
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+namespace mmdeploy {
+
+MMDEPLOY_REGISTER_TYPE_ID(mmdeploy_pose_tracker_param_t*, 0x32bc6919d5287035);
+MMDEPLOY_REGISTER_TYPE_ID(mmpose::_pose_tracker::TrackerResult, 0xacb6ddb7dc1ffbca);
+
+}  // namespace mmdeploy
+
+#endif  // MMDEPLOY_COMMON_H
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/pipeline.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pipeline.cpp
new file mode 100644
index 0000000000..6ae6012b2f
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pipeline.cpp
@@ -0,0 +1,117 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "mmdeploy/archive/value_archive.h"
+#include "mmdeploy/core/mat.h"
+#include "mmdeploy/core/module.h"
+#include "mmdeploy/experimental/module_adapter.h"
+#include "pose_tracker/common.h"
+#include "pose_tracker/pose_tracker.h"
+
+namespace mmdeploy {
+
+MMDEPLOY_REGISTER_TYPE_ID(mmpose::_pose_tracker::Tracker, 0xcfe87980aa895d3a);
+
+}
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+#define REGISTER_SIMPLE_MODULE(name, fn) \
+  MMDEPLOY_REGISTER_FACTORY_FUNC(Module, (name, 0), [](const Value&) { return CreateTask(fn); });
+
+Value Prepare(const Value& data, const Value& use_det, Value state) {
+  auto& tracker = state.get_ref<Tracker&>();
+  // set frame size for the first frame
+  if (tracker.frame_id() == 0) {
+    auto& frame = data["ori_img"].get_ref<const framework::Mat&>();
+    tracker.SetFrameSize(frame.height(), frame.width());
+  }
+  // use_det is set to non-auto mode
+  if (use_det.get<int>() != -1) {
+    return use_det;
+  }
+  auto interval = tracker.params().det_interval;
+  return interval > 0 && tracker.frame_id() % interval == 0;
+}
+
+REGISTER_SIMPLE_MODULE(pose_tracker::Prepare, Prepare);
+
+std::tuple<Value, Value> ProcessBboxes(const Value& det_val, const Value& data,
+                                       Value state) noexcept {
+  auto& tracker = state.get_ref<Tracker&>();
+
+  std::optional<Tracker::Detections> dets;
+
+  if (det_val.is_array()) {  // has detections
+    auto& [bboxes, scores, labels] = dets.emplace();
+    for (const auto& det : det_val.array()) {
+      bboxes.push_back(from_value<Bbox>(det["bbox"]));
+      scores.push_back(det["score"].get<float>());
+      labels.push_back(det["label_id"].get<int>());
+    }
+  }
+
+  auto [bboxes, ids] = tracker.ProcessBboxes(dets);
+
+  Value::Array bbox_array;
+  Value track_ids_array;
+  // attach bboxes to image data
+  for (auto& bbox : bboxes) {
+    cv::Rect rect(cv::Rect2f(cv::Point2f(bbox[0], bbox[1]), cv::Point2f(bbox[2], bbox[3])));
+    bbox_array.push_back({
+        {"img", data["img"]},                                 // img
+        {"bbox", {rect.x, rect.y, rect.width, rect.height}},  // bbox
+    });
+  }
+
+  track_ids_array = to_value(ids);
+  return {std::move(bbox_array), std::move(track_ids_array)};
+}
+REGISTER_SIMPLE_MODULE(pose_tracker::ProcessBboxes, ProcessBboxes);
+
+Value TrackStep(const Value& poses, const Value& track_indices, Value state) noexcept {
+  assert(poses.is_array());
+  vector<Points> keypoints;
+  vector<Scores> scores;
+  for (auto& output : poses.array()) {
+    auto& k = keypoints.emplace_back();
+    auto& s = scores.emplace_back();
+    float avg = 0.f;
+    for (auto& kpt : output["key_points"].array()) {
+      k.emplace_back(kpt["bbox"][0].get<float>(), kpt["bbox"][1].get<float>());
+      s.push_back(kpt["score"].get<float>());
+      avg += s.back();
+    }
+  }
+  vector<int64_t> track_ids;
+  from_value(track_indices, track_ids);
+  auto& tracker = state.get_ref<Tracker&>();
+  tracker.TrackStep(keypoints, scores, track_ids);
+  TrackerResult result;
+  for (const auto& track : tracker.tracks()) {
+    if (track->missing()) {
+      continue;
+    }
+    vector<mmdeploy_point_t> kpts;
+    kpts.reserve(track->smoothed_kpts().size());
+    for (const auto& kpt : track->smoothed_kpts()) {
+      kpts.push_back({kpt.x, kpt.y});
+    }
+    result.keypoints.push_back(std::move(kpts));
+    result.scores.push_back(track->scores());
+    auto& bbox = track->smoothed_bbox();
+    result.bboxes.push_back({bbox[0], bbox[1], bbox[2], bbox[3]});
+    result.track_ids.push_back(track->track_id());
+  }
+  result.pose_input_bboxes = tracker.pose_input_bboxes();
+  result.pose_output_bboxes = tracker.pose_output_bboxes();
+  return result;
+}
+REGISTER_SIMPLE_MODULE(pose_tracker::TrackStep, TrackStep);
+
+// MSVC toolset v143 keeps ICEing when using a lambda here
+static Value CreateTracker(mmdeploy_pose_tracker_param_t* param) {
+  return make_pointer(Tracker{*param});
+}
+REGISTER_SIMPLE_MODULE(pose_tracker::Create, CreateTracker);
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.cpp
new file mode 100644
index 0000000000..e1671dd7ac
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.cpp
@@ -0,0 +1,399 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "pose_tracker/pose_tracker.h"
+
+#include <array>
+#include <cmath>
+#include <numeric>
+
+#include "mmdeploy/core/utils/formatter.h"
+#include "pose_tracker/utils.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+Tracker::Tracker(const mmdeploy_pose_tracker_param_t& _params) : params_(_params) {
+  if (params_.keypoint_sigmas && params_.keypoint_sigmas_size) {
+    std::copy_n(params_.keypoint_sigmas, params_.keypoint_sigmas_size,
+                std::back_inserter(key_point_sigmas_));
+    params_.keypoint_sigmas = key_point_sigmas_.data();
+  }
+}
+
+void Tracker::SuppressOverlappingBoxes(const vector<Bbox>& bboxes,
+                                       vector<std::pair<int, float>>& ranks,
+                                       vector<int>& is_valid_bbox) const {
+  vector<float> iou(ranks.size() * ranks.size());
+  for (int i = 0; i < bboxes.size(); ++i) {
+    for (int j = 0; j < i; ++j) {
+      iou[i * bboxes.size() + j] = iou[j * bboxes.size() + i] =
+          intersection_over_union(bboxes[i], bboxes[j]);
+    }
+  }
+  suppress_non_maximum(ranks, iou, is_valid_bbox, params_.det_nms_thr);
+}
+
+void Tracker::SuppressOverlappingPoses(const vector<Points>& keypoints,
+                                       const vector<Scores>& scores, const vector<Bbox>& bboxes,
+                                       const vector<int64_t>& track_ids, vector<int>& is_valid,
+                                       float oks_thr) {
+  assert(keypoints.size() == is_valid.size());
+  assert(scores.size() == is_valid.size());
+  assert(bboxes.size() == is_valid.size());
+  const auto size = is_valid.size();
+  vector<float> similarity(size * size);
+  for (int i = 0; i < size; ++i) {
+    if (is_valid[i]) {
+      for (int j = 0; j < i; ++j) {
+        if (is_valid[j]) {
+          similarity[i * size + j] = similarity[j * size + i] =
+              GetPosePoseSimilarity(bboxes[i], keypoints[i], bboxes[j], keypoints[j]);
+        }
+      }
+    }
+  }
+  vector<std::pair<bool, float>> ranks;
+  ranks.reserve(size);
+  for (int i = 0; i < size; ++i) {
+    bool is_visible = false;
+    for (const auto& track : tracks_) {
+      if (track->track_id() == track_ids[i]) {
+        is_visible = track->missing() == 0;
+        break;
+      }
+    }
+    auto score = std::accumulate(scores[i].begin(), scores[i].end(), 0.f);
+    // prevents bboxes from missing tracks to suppress visible tracks
+    ranks.emplace_back(is_visible, score);
+  }
+  suppress_non_maximum(ranks, similarity, is_valid, oks_thr);
+}
+
+std::tuple<vector<Bbox>, vector<int64_t>> Tracker::ProcessBboxes(
+    const std::optional<Detections>& dets) {
+  vector<Bbox> bboxes;
+  vector<int64_t> prev_ids;
+
+  // 2 - visible tracks
+  // 1 - detection
+  // 0 - missing tracks
+  vector<int> types;
+
+  GetDetectedObjects(dets, bboxes, prev_ids, types);
+
+  GetTrackedObjects(bboxes, prev_ids, types);
+
+  vector<int> is_valid_bboxes(bboxes.size(), 1);
+
+  auto count = [&] {
+    std::array<int, 3> acc{};
+    for (size_t i = 0; i < is_valid_bboxes.size(); ++i) {
+      if (is_valid_bboxes[i]) {
+        ++acc[types[i]];
+      }
+    }
+    return acc;
+  };
+  POSE_TRACKER_DEBUG("frame {}, bboxes {}", frame_id_, count());
+
+  vector<std::pair<int, float>> ranks;
+  ranks.reserve(bboxes.size());
+  for (int i = 0; i < bboxes.size(); ++i) {
+    ranks.emplace_back(types[i], get_area(bboxes[i]));
+  }
+  SuppressOverlappingBoxes(bboxes, ranks, is_valid_bboxes);
+  POSE_TRACKER_DEBUG("frame {}, bboxes after nms: {}", frame_id_, count());
+
+  vector<int> idxs;
+  idxs.reserve(bboxes.size());
+  for (int i = 0; i < bboxes.size(); ++i) {
+    if (is_valid_bboxes[i]) {
+      idxs.push_back(i);
+    }
+  }
+
+  std::stable_sort(idxs.begin(), idxs.end(), [&](int i, int j) { return ranks[i] > ranks[j]; });
+  std::fill(is_valid_bboxes.begin(), is_valid_bboxes.end(), 0);
+  {
+    vector<Bbox> tmp_bboxes;
+    vector<int64_t> tmp_track_ids;
+    for (const auto& i : idxs) {
+      if (tmp_bboxes.size() >= params_.pose_max_num_bboxes) {
+        break;
+      }
+      tmp_bboxes.push_back(bboxes[i]);
+      tmp_track_ids.push_back(prev_ids[i]);
+      is_valid_bboxes[i] = 1;
+    }
+    bboxes = std::move(tmp_bboxes);
+    prev_ids = std::move(tmp_track_ids);
+  }
+
+  pose_input_bboxes_ = bboxes;
+
+  POSE_TRACKER_DEBUG("frame {}, bboxes after sort: {}", frame_id_, count());
+  return {bboxes, prev_ids};
+}
+
+void Tracker::TrackStep(vector<Points>& keypoints, vector<Scores>& scores,
+                        const vector<int64_t>& prev_ids) noexcept {
+  vector<Bbox> bboxes(keypoints.size());
+  vector<int> is_unused_bbox(keypoints.size(), 1);
+
+  // key-points to bboxes
+  for (size_t i = 0; i < keypoints.size(); ++i) {
+    if (auto bbox = KeypointsToBbox(keypoints[i], scores[i])) {
+      bboxes[i] = *bbox;
+    } else {
+      is_unused_bbox[i] = false;
+    }
+  }
+
+  pose_output_bboxes_ = bboxes;
+
+  SuppressOverlappingPoses(keypoints, scores, bboxes, prev_ids, is_unused_bbox,
+                           params_.pose_nms_thr);
+  assert(is_unused_bbox.size() == bboxes.size());
+
+  vector<float> similarity0;    // average mahalanobis dist - proportion of tracked key-points
+  vector<float> similarity1;    // iou
+  vector<vector<bool>> gating;  // key-point gating result
+  GetSimilarityMatrices(bboxes, keypoints, prev_ids, similarity0, similarity1, gating);
+
+  vector<int> is_unused_track(tracks_.size(), 1);
+  // disable missing tracks in the #1 assignment
+  for (int i = 0; i < tracks_.size(); ++i) {
+    if (tracks_[i]->missing()) {
+      is_unused_track[i] = 0;
+    }
+  }
+  const auto assignment_visible =
+      greedy_assignment(similarity0, is_unused_bbox, is_unused_track, -kInf / 10);
+  POSE_TRACKER_DEBUG("frame {}, assignment for visible {}", frame_id_, assignment_visible);
+
+  // enable missing tracks in the #2 assignment
+  for (int i = 0; i < tracks_.size(); ++i) {
+    if (tracks_[i]->missing()) {
+      is_unused_track[i] = 1;
+    }
+  }
+  const auto assignment_missing =
+      greedy_assignment(similarity1, is_unused_bbox, is_unused_track, params_.track_iou_thr);
+  POSE_TRACKER_DEBUG("frame {}, assignment for missing {}", frame_id_, assignment_missing);
+
+  // update assigned tracks
+  for (auto [i, j, _] : assignment_visible) {
+    tracks_[j]->UpdateVisible(bboxes[i], keypoints[i], scores[i], gating[i * tracks_.size() + j]);
+  }
+
+  // update recovered tracks
+  for (auto [i, j, _] : assignment_missing) {
+    tracks_[j]->UpdateRecovered(bboxes[i], keypoints[i], scores[i]);
+  }
+
+  // generating new tracks from detected bboxes
+  for (size_t i = 0; i < is_unused_bbox.size(); ++i) {
+    if (is_unused_bbox[i] && prev_ids[i] == -1) {
+      CreateTrack(bboxes[i], keypoints[i], scores[i]);
+    }
+  }
+
+  // update missing tracks
+  for (size_t i = 0; i < is_unused_track.size(); ++i) {
+    if (is_unused_track[i]) {
+      tracks_[i]->UpdateMissing();
+    }
+  }
+
+  // diagnostic for missing tracks
+  DiagnosticMissingTracks(is_unused_track, is_unused_bbox, similarity0, similarity1);
+
+  RemoveMissingTracks();
+
+  for (auto& track : tracks_) {
+    track->Predict();
+  }
+
+  ++frame_id_;
+
+  // print track summary
+  //  SummaryTracks();
+}
+
+void Tracker::GetTrackedObjects(vector<Bbox>& bboxes, vector<int64_t>& track_ids,
+                                vector<int>& types) const {
+  for (auto& track : tracks_) {
+    std::optional<Bbox> bbox;
+    if (track->missing()) {
+      bbox = track->predicted_bbox();
+    } else {
+      bbox = keypoints_to_bbox(track->predicted_kpts(), track->scores(), frame_h_, frame_w_,
+                               params_.pose_bbox_scale, params_.pose_kpt_thr,
+                               params_.pose_min_keypoints);
+    }
+    if (bbox) {
+      auto& b = *bbox;
+      cv::Rect_<float> img_rect(0, 0, frame_w_, frame_h_);
+      cv::Rect_<float> box_rect(b[0], b[1], b[2] - b[0], b[3] - b[1]);
+      auto roi = img_rect & box_rect;
+      if (roi.area() > 0 && get_area(b) > params_.pose_min_bbox_size * params_.pose_min_bbox_size) {
+        bboxes.push_back(*bbox);
+        track_ids.push_back(track->track_id());
+        types.push_back(track->missing() ? 0 : 2);
+      }
+    }
+  }
+}
+
+void Tracker::GetDetectedObjects(const std::optional<Detections>& dets, vector<Bbox>& _bboxes,
+                                 vector<int64_t>& track_ids, vector<int>& types) const {
+  if (dets) {
+    auto& [bboxes, scores, labels] = *dets;
+    for (size_t i = 0; i < bboxes.size(); ++i) {
+      if (labels[i] == params_.det_label && scores[i] > params_.det_thr &&
+          get_area(bboxes[i]) >= params_.det_min_bbox_size * params_.det_min_bbox_size) {
+        _bboxes.push_back(bboxes[i]);
+        track_ids.push_back(-1);
+        types.push_back(1);
+      }
+    }
+  }
+}
+
+std::tuple<float, float, vector<bool>> Tracker::GetTrackPoseSimilarity(Track& track,
+                                                                       const Bbox& bbox,
+                                                                       const Points& kpts) const {
+  static constexpr const std::array chi2inv95{0.f,     3.8415f, 5.9915f, 7.8147f, 9.4877f,
+                                              11.070f, 12.592f, 14.067f, 15.507f, 16.919f};
+  auto dists = track.KeyPointDistance(kpts);
+  vector<bool> gating;
+  gating.reserve(dists.size());
+  float dist = 0.f;
+  int count = 0;
+  for (const auto& d : dists) {
+    if (d < chi2inv95[2]) {
+      dist += d;
+      ++count;
+      gating.push_back(true);
+    } else {
+      gating.push_back(false);
+    }
+  }
+  auto count_thr =
+      params_.pose_min_keypoints >= 0 ? params_.pose_min_keypoints : (dists.size() + 1) / 2;
+  if (count >= count_thr) {
+    auto fcount = static_cast<float>(count);
+    dist = dist / fcount - fcount / static_cast<float>(dists.size());
+  } else {
+    dist = kInf;
+  }
+
+  auto iou = intersection_over_union(track.predicted_bbox(), bbox);
+  if (key_point_sigmas_.empty()) {
+    return {dist, iou, gating};
+  }
+
+  return {dist, iou, gating};
+}
+
+void Tracker::GetSimilarityMatrices(const vector<Bbox>& bboxes, const vector<Points>& keypoints,
+                                    const vector<int64_t>& prev_ids, vector<float>& similarity0,
+                                    vector<float>& similarity1, vector<vector<bool>>& gating) {
+  const auto n_rows = static_cast<int>(bboxes.size());
+  const auto n_cols = static_cast<int>(tracks_.size());
+
+  // generate similarity matrix
+  similarity0 = vector<float>(n_rows * n_cols, -kInf);
+  similarity1 = vector<float>(n_rows * n_cols, -kInf);
+  gating = vector<vector<bool>>(n_rows * n_cols);
+  for (size_t i = 0; i < n_rows; ++i) {
+    const auto& bbox = bboxes[i];
+    const auto& kpts = keypoints[i];
+    for (size_t j = 0; j < n_cols; ++j) {
+      auto& track = *tracks_[j];
+      if (prev_ids[i] != -1 && prev_ids[i] != track.track_id()) {
+        continue;
+      }
+      const auto index = i * n_cols + j;
+      auto&& [dist, iou, gate] = GetTrackPoseSimilarity(track, bbox, kpts);
+      similarity0[index] = -dist;
+      similarity1[index] = iou;
+      gating.push_back(std::move(gate));
+    }
+  }
+}
+
+float Tracker::GetPosePoseSimilarity(const Bbox& bbox0, const Points& kpts0, const Bbox& bbox1,
+                                     const Points& kpts1) {
+  if (key_point_sigmas_.empty()) {
+    return intersection_over_union(bbox0, bbox1);
+  }
+  // symmetric
+  return object_keypoint_similarity(kpts0, bbox0, kpts1, bbox1, key_point_sigmas_);
+}
+
+void Tracker::CreateTrack(const Bbox& bbox, const Points& kpts, const Scores& scores) {
+  *tracks_.emplace_back(std::make_unique<Track>(&params_, bbox, kpts, scores, next_id_++));
+}
+
+std::optional<Bbox> Tracker::KeypointsToBbox(const Points& kpts, const Scores& scores) const {
+  return keypoints_to_bbox(kpts, scores, frame_h_, frame_w_, params_.pose_bbox_scale,
+                           params_.pose_kpt_thr, params_.pose_min_keypoints);
+}
+
+void Tracker::RemoveMissingTracks() {
+  size_t count{};
+  for (auto& track : tracks_) {
+    if (track->missing() <= params_.track_max_missing) {
+      tracks_[count++] = std::move(track);
+    }
+  }
+  tracks_.resize(count);
+}
+
+void Tracker::DiagnosticMissingTracks(const vector<int>& is_unused_track,
+                                      const vector<int>& is_unused_bbox,
+                                      const vector<float>& similarity0,
+                                      const vector<float>& similarity1) {
+  int missing = 0;
+  const auto n_cols = static_cast<int>(is_unused_track.size());
+  const auto n_rows = static_cast<int>(is_unused_bbox.size());
+  for (int i = 0; i < is_unused_track.size(); ++i) {
+    if (is_unused_track[i]) {
+      float best_s0 = 0.f;
+      float best_s1 = 0.f;
+      for (int j = 0; j < is_unused_bbox.size(); ++j) {
+        if (is_unused_bbox[j]) {
+          best_s0 = std::max(similarity0[j * n_cols + i], best_s0);
+          best_s1 = std::max(similarity1[j * n_cols + i], best_s1);
+        }
+      }
+      POSE_TRACKER_DEBUG("frame {}: track missing {}, best_s0={}, best_s1={}", frame_id_,
+                         tracks_[i]->track_id(), best_s0, best_s1);
+      ++missing;
+    }
+  }
+  if (missing) {
+    std::stringstream ss;
+    ss << cv::Mat_<float>(n_rows, n_cols, const_cast<float*>(similarity0.data()));
+    POSE_TRACKER_DEBUG("frame {}, similarity#0: \n{}", frame_id_, ss.str());
+    ss = std::stringstream{};
+    ss << cv::Mat_<float>(n_rows, n_cols, const_cast<float*>(similarity1.data()));
+    POSE_TRACKER_DEBUG("frame {}, similarity#1: \n{}", frame_id_, ss.str());
+  }
+}
+
+void Tracker::SummaryTracks() {
+  vector<std::tuple<int64_t, int>> summary;
+  for (const auto& track : tracks_) {
+    summary.emplace_back(track->track_id(), track->missing());
+  }
+  POSE_TRACKER_DEBUG("frame {}, track summary {}", frame_id_, summary);
+  for (const auto& track : tracks_) {
+    if (!track->missing()) {
+      POSE_TRACKER_DEBUG("frame {}, track {}, scores {}", frame_id_, track->track_id(),
+                         track->scores());
+    }
+  }
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.h
new file mode 100644
index 0000000000..c27ae0b85b
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/pose_tracker.h
@@ -0,0 +1,101 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP
+
+#include "mmdeploy/pose_tracker.h"
+#include "pose_tracker/common.h"
+#include "pose_tracker/track.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+class Tracker {
+ public:
+  explicit Tracker(const mmdeploy_pose_tracker_param_t& _params);
+
+  Tracker(const Tracker&) { assert(0); }
+  Tracker(Tracker&& o) noexcept = default;
+
+  struct Detections {
+    Bboxes bboxes;
+    Scores scores;
+    vector<int> labels;
+  };
+
+  void SetFrameSize(int height, int width) {
+    frame_h_ = static_cast<float>(height);
+    frame_w_ = static_cast<float>(width);
+  }
+
+  const mmdeploy_pose_tracker_param_t& params() const noexcept { return params_; }
+
+  int64_t frame_id() const noexcept { return frame_id_; }
+
+  const vector<std::unique_ptr<Track>>& tracks() const noexcept { return tracks_; }
+
+  std::tuple<vector<Bbox>, vector<int64_t>> ProcessBboxes(const std::optional<Detections>& dets);
+
+  void TrackStep(vector<Points>& keypoints, vector<Scores>& scores,
+                 const vector<int64_t>& prev_ids) noexcept;
+
+ private:
+  void GetDetectedObjects(const std::optional<Detections>& dets, vector<Bbox>& _bboxes,
+                          vector<int64_t>& track_ids, vector<int>& types) const;
+
+  void GetTrackedObjects(vector<Bbox>& bboxes, vector<int64_t>& track_ids,
+                         vector<int>& types) const;
+
+  void SuppressOverlappingBoxes(const vector<Bbox>& bboxes, vector<std::pair<int, float>>& ranks,
+                                vector<int>& is_valid_bbox) const;
+
+  void SuppressOverlappingPoses(const vector<Points>& keypoints, const vector<Scores>& scores,
+                                const vector<Bbox>& bboxes, const vector<int64_t>& track_ids,
+                                vector<int>& is_valid, float oks_thr);
+
+  std::optional<Bbox> KeypointsToBbox(const Points& kpts, const Scores& scores) const;
+
+  float GetPosePoseSimilarity(const Bbox& bbox0, const Points& kpts0, const Bbox& bbox1,
+                              const Points& kpts1);
+
+  void GetSimilarityMatrices(const vector<Bbox>& bboxes, const vector<Points>& keypoints,
+                             const vector<int64_t>& prev_ids, vector<float>& similarity0,
+                             vector<float>& similarity1, vector<vector<bool>>& gating);
+
+  std::tuple<float, float, vector<bool>> GetTrackPoseSimilarity(Track& track, const Bbox& bbox,
+                                                                const Points& kpts) const;
+
+  void CreateTrack(const Bbox& bbox, const Points& kpts, const Scores& scores);
+
+  void RemoveMissingTracks();
+
+  void DiagnosticMissingTracks(const vector<int>& is_unused_track,
+                               const vector<int>& is_unused_bbox, const vector<float>& similarity0,
+                               const vector<float>& similarity1);
+
+  void SummaryTracks();
+
+ private:
+  static constexpr const auto kInf = 1000.f;
+
+  float frame_h_ = 0;
+  float frame_w_ = 0;
+
+  vector<std::unique_ptr<Track>> tracks_;
+  int64_t next_id_{0};
+
+  std::vector<float> key_point_sigmas_;
+  mmdeploy_pose_tracker_param_t params_;
+
+  vector<Bbox> pose_input_bboxes_;
+  vector<Bbox> pose_output_bboxes_;
+
+  int64_t frame_id_ = 0;
+
+ public:
+  const vector<Bbox>& pose_input_bboxes() const noexcept { return pose_input_bboxes_; }
+  const vector<Bbox>& pose_output_bboxes() const noexcept { return pose_output_bboxes_; }
+};
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+#endif  // MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_HPP
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.cpp
new file mode 100644
index 0000000000..9c2711a4f5
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.cpp
@@ -0,0 +1,49 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "smoothing_filter.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+SmoothingFilter::SmoothingFilter(const Bbox& bbox, const Points& pts,
+                                 const SmoothingFilter::Params& params)
+    : params_(params),
+      pts_v_(pts.size()),
+      pts_x_(pts),
+      center_v_{},
+      center_x_{get_center(bbox)},
+      scale_v_{},
+      scale_x_{get_scale(bbox)} {}
+
+std::pair<Bbox, Points> SmoothingFilter::Step(const Bbox& bbox, const Points& kpts) {
+  constexpr auto abs = [](const Point& p) { return std::sqrt(p.dot(p)); };
+
+  // filter key-points
+  step<Point>(pts_x_, pts_v_, kpts, params_, abs);
+
+  // filter bbox center
+  std::array c{get_center(bbox)};
+  step<Point>(center_x_, center_v_, c, params_, abs);
+
+  // filter bbox scales
+  auto s = get_scale(bbox);
+  step<float>(scale_x_, scale_v_, s, params_, [](auto x) { return x; });
+
+  return {get_bbox(center_x_[0], scale_x_), pts_x_};
+}
+
+void SmoothingFilter::Reset(const Bbox& bbox, const Points& pts) {
+  pts_v_ = Points(pts_v_.size());
+  center_v_ = {};
+  scale_v_ = {};
+  pts_x_ = pts;
+  center_v_ = {get_center(bbox)};
+  scale_v_ = get_scale(bbox);
+}
+
+float SmoothingFilter::smoothing_factor(float cutoff) {
+  static constexpr float kPi = 3.1415926;
+  auto r = 2.f * kPi * cutoff;
+  return r / (r + 1.f);
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.h
new file mode 100644
index 0000000000..c136beb2d6
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/smoothing_filter.h
@@ -0,0 +1,58 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_SMOOTHING_FILTER_H
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_SMOOTHING_FILTER_H
+
+#include "mmdeploy/core/mpl/span.h"
+#include "pose_tracker/utils.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+template <typename T>
+using span = mmdeploy::Span<T>;
+
+class SmoothingFilter {
+ public:
+  struct Params {
+    float beta;
+    float fc_min;
+    float fc_v;
+  };
+  explicit SmoothingFilter(const Bbox& bbox, const Points& pts, const Params& params);
+
+  std::pair<Bbox, Points> Step(const Bbox& bbox, const Points& kpts);
+
+  void Reset(const Bbox& bbox, const Points& pts);
+
+ private:
+  static float smoothing_factor(float cutoff);
+
+  template <typename T, typename Norm>
+  static void step(span<T> x, span<T> v, span<const T> x1, const Params& params, Norm norm) {
+    auto a_v = smoothing_factor(params.fc_v);
+    for (int i = 0; i < v.size(); ++i) {
+      v[i] = smooth(a_v, v[i], x1[i] - x[i]);
+      auto fc = params.fc_min + params.beta * norm(v[i]);
+      auto a_x = smoothing_factor(fc);
+      x[i] = smooth(a_x, x[i], x1[i]);
+    }
+  }
+
+  template <typename T>
+  static T smooth(float a, const T& x0, const T& x1) {
+    return (1.f - a) * x0 + a * x1;
+  }
+
+ private:
+  Params params_;
+  std::vector<Point> pts_v_;
+  std::vector<Point> pts_x_;
+  std::array<Point, 1> center_v_;
+  std::array<Point, 1> center_x_;
+  std::array<float, 2> scale_v_;
+  std::array<float, 2> scale_x_;
+};
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+#endif  // MMDEPLOY_SMOOTHING_FILTER_H
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.cpp
new file mode 100644
index 0000000000..8566433ba0
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.cpp
@@ -0,0 +1,70 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "pose_tracker/track.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+Track::Track(const mmdeploy_pose_tracker_param_t* params, const Bbox& bbox, const Points& kpts,
+             const Scores& ss, int64_t id)
+    : params_(params),
+      filter_(CreateFilter(bbox, kpts)),
+      smoother_(CreateSmoother(bbox, kpts)),
+      track_id_(id) {
+  POSE_TRACKER_DEBUG("new track {}", track_id_);
+  Add(bbox, kpts, ss);
+}
+
+Track::~Track() { POSE_TRACKER_DEBUG("track lost {}", track_id_); }
+
+void Track::UpdateVisible(const Bbox& bbox, const Points& kpts, const Scores& scores,
+                          const vector<bool>& tracked) {
+  auto [bbox_corr, kpts_corr] = filter_.Correct(bbox, kpts, tracked);
+  Add(bbox_corr, kpts_corr, scores);
+}
+
+void Track::UpdateRecovered(const Bbox& bbox, const Points& kpts, const Scores& scores) {
+  POSE_TRACKER_DEBUG("track recovered {}", track_id_);
+  filter_ = CreateFilter(bbox, kpts);
+  smoother_.Reset(bbox, kpts);
+  Add(bbox, kpts, scores);
+  missing_ = 0;
+}
+
+void Track::UpdateMissing() {
+  missing_++;
+  if (missing_ <= params_->track_max_missing) {
+    // use predicted state to update the missing tracks
+    Add(bbox_predict_, kpts_predict_, vector<float>(kpts_predict_.size()));
+  }
+}
+
+void Track::Predict() {
+  // TODO: velocity decay for missing tracks
+  std::tie(bbox_predict_, kpts_predict_) = filter_.Predict();
+}
+
+void Track::Add(const Bbox& bbox, const Points& kpts, const Scores& ss) {
+  bboxes_.push_back(bbox);
+  keypoints_.push_back(kpts);
+  scores_.push_back(ss);
+  if (bboxes_.size() > params_->track_history_size) {
+    std::rotate(bboxes_.begin(), bboxes_.begin() + 1, bboxes_.end());
+    std::rotate(keypoints_.begin(), keypoints_.begin() + 1, keypoints_.end());
+    std::rotate(scores_.begin(), scores_.begin() + 1, scores_.end());
+    bboxes_.pop_back();
+    keypoints_.pop_back();
+    scores_.pop_back();
+  }
+  std::tie(bbox_smooth_, kpts_smooth_) = smoother_.Step(bbox, kpts);
+}
+
+TrackingFilter Track::CreateFilter(const Bbox& bbox, const Points& pts) {
+  return {bbox, pts, params_->std_weight_position, params_->std_weight_velocity};
+}
+
+SmoothingFilter Track::CreateSmoother(const Bbox& bbox, const Points& pts) {
+  return SmoothingFilter(
+      bbox, pts, {params_->smooth_params[0], params_->smooth_params[1], params_->smooth_params[2]});
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.h
new file mode 100644
index 0000000000..5be73168ac
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/track.h
@@ -0,0 +1,65 @@
+//
+// Created by zhangli on 1/9/23.
+//
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACK_H
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACK_H
+
+#include "pose_tracker/common.h"
+#include "pose_tracker/smoothing_filter.h"
+#include "pose_tracker/tracking_filter.h"
+#include "pose_tracker/utils.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+class Track {
+ public:
+  Track(const mmdeploy_pose_tracker_param_t* params, const Bbox& bbox, const Points& kpts,
+        const Scores& ss, int64_t id);
+  ~Track();
+
+  void UpdateVisible(const Bbox& bbox, const Points& kpts, const Scores& scores,
+                     const vector<bool>& tracked);
+  void UpdateRecovered(const Bbox& bbox, const Points& kpts, const Scores& scores);
+  void UpdateMissing();
+  void Predict();
+
+  float BboxDistance(const Bbox& bbox) { return filter_.BboxDistance(bbox); }
+
+  vector<float> KeyPointDistance(const Points& kpts) { return filter_.KeyPointDistance(kpts); }
+
+  uint32_t track_id() const noexcept { return track_id_; }
+  uint32_t missing() const noexcept { return missing_; }
+
+  const Bbox& predicted_bbox() const noexcept { return bbox_predict_; }
+  const Bbox& smoothed_bbox() const noexcept { return bbox_smooth_; }
+
+  const Points& predicted_kpts() const noexcept { return kpts_predict_; }
+  const Points& smoothed_kpts() const noexcept { return kpts_smooth_; }
+
+  const Scores& scores() const noexcept { return scores_.back(); }
+
+ private:
+  void Add(const Bbox& bbox, const Points& kpts, const Scores& ss);
+
+  TrackingFilter CreateFilter(const Bbox& bbox, const Points& pts);
+  SmoothingFilter CreateSmoother(const Bbox& bbox, const Points& pts);
+
+ private:
+  const mmdeploy_pose_tracker_param_t* params_;
+  vector<Bbox> bboxes_;
+  vector<Points> keypoints_;
+  vector<Scores> scores_;
+  uint32_t track_id_{};
+  Bbox bbox_predict_{};
+  Bbox bbox_smooth_{};
+  Points kpts_predict_;
+  Points kpts_smooth_;
+  uint32_t missing_{0};
+  TrackingFilter filter_;
+  SmoothingFilter smoother_;
+};
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+#endif  // MMDEPLOY_TRACK_H
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.cpp
new file mode 100644
index 0000000000..03745a3c8f
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.cpp
@@ -0,0 +1,199 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "pose_tracker/tracking_filter.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+float get_mean_scale(float scale_w, float scale_h) { return std::sqrt(scale_w * scale_h); }
+
+TrackingFilter::TrackingFilter(const Bbox& bbox, const vector<Point>& kpts,
+                               float std_weight_position, float std_weight_velocity)
+    : std_weight_position_(std_weight_position), std_weight_velocity_(std_weight_velocity) {
+  auto center = get_center(bbox);
+  auto scale = get_scale(bbox);
+
+  auto mean_scale = get_mean_scale(scale[0], scale[1]);
+
+  const auto n = kpts.size();
+  pt_filters_.resize(n);
+  for (int i = 0; i < n; ++i) {
+    auto& f = pt_filters_[i];
+    f.init(4, 2);
+    SetKeyPointTransitionMat(i);
+    SetKeyPointMeasurementMat(i);
+
+    ResetKeyPoint(i, kpts[i], mean_scale);
+  }
+
+  {
+    // [x, y, w, h, dx, dy, dw, dh]
+    auto& f = bbox_filter_;
+
+    f.init(8, 4);
+
+    SetBboxTransitionMat();
+    SetBboxMeasurementMat();
+
+    SetBboxErrorCov(2 * std_weight_position * mean_scale,  //
+                    10 * std_weight_velocity * mean_scale);
+
+    f.statePost.at<float>(0) = center.x;
+    f.statePost.at<float>(1) = center.y;
+    f.statePost.at<float>(2) = scale[0];
+    f.statePost.at<float>(3) = scale[1];
+  }
+}
+
+std::pair<Bbox, Points> TrackingFilter::Predict() {
+  auto mean_scale = get_mean_scale(bbox_filter_.statePost.at<float>(2),  //
+                                   bbox_filter_.statePost.at<float>(3));
+  const auto n = pt_filters_.size();
+  Points pts(n);
+  for (int i = 0; i < n; ++i) {
+    SetKeyPointProcessCov(i, std_weight_position_ * mean_scale, std_weight_velocity_ * mean_scale);
+    auto mat = pt_filters_[i].predict();
+    pts[i].x = mat.at<float>(0);
+    pts[i].y = mat.at<float>(1);
+  }
+  Bbox bbox;
+  {
+    SetBboxProcessCov(std_weight_position_ * mean_scale, std_weight_velocity_ * mean_scale);
+    auto mat = bbox_filter_.predict();
+    auto x = mat.ptr<float>();
+    bbox = get_bbox({x[0], x[1]}, {x[2], x[3]});
+  }
+  return {bbox, pts};
+}
+
+std::pair<Bbox, Points> TrackingFilter::Correct(const Bbox& bbox, const Points& kpts,
+                                                const vector<bool>& tracked) {
+  auto mean_scale = get_mean_scale(bbox_filter_.statePre.at<float>(2),  //
+                                   bbox_filter_.statePre.at<float>(3));
+  const auto n = pt_filters_.size();
+  Points corr_kpts(n);
+  for (int i = 0; i < n; ++i) {
+    if (!tracked.empty() && tracked[i]) {
+      SetKeyPointMeasurementCov(i, std_weight_position_ * mean_scale);
+      std::array<float, 2> m{kpts[i].x, kpts[i].y};
+      auto mat = pt_filters_[i].correct(as_mat(m));
+      corr_kpts[i].x = mat.at<float>(0);
+      corr_kpts[i].y = mat.at<float>(1);
+    } else {
+      ResetKeyPoint(i, kpts[i], mean_scale);
+      corr_kpts[i] = kpts[i];
+    }
+  }
+  Bbox corr_bbox;
+  {
+    SetBboxMeasurementCov(std_weight_position_ * mean_scale);
+    auto c = get_center(bbox);
+    auto s = get_scale(bbox);
+    std::array<float, 4> m{c.x, c.y, s[0], s[1]};
+    auto mat = bbox_filter_.correct(as_mat(m));
+    auto x = mat.ptr<float>();
+    corr_bbox = get_bbox({x[0], x[1]}, {x[2], x[3]});
+  }
+  return {corr_bbox, corr_kpts};
+}
+
+float TrackingFilter::BboxDistance(const Bbox& bbox) {
+  auto mean_scale = get_mean_scale(bbox_filter_.statePre.at<float>(2),  //
+                                   bbox_filter_.statePre.at<float>(3));
+  SetBboxMeasurementCov(std_weight_position_ * mean_scale);
+  auto c = get_center(bbox);
+  auto s = get_scale(bbox);
+  std::array<float, 4> m{c.x, c.y, s[0], s[1]};
+  cv::Mat z = as_mat(m);
+  auto& f = bbox_filter_;
+  cv::Mat sigma;
+  cv::gemm(f.measurementMatrix * f.errorCovPre, f.measurementMatrix, 1, f.measurementNoiseCov, 1,
+           sigma, cv::GEMM_2_T);
+  cv::Mat r = z - f.measurementMatrix * f.statePre;
+  // ignore contribution of scales as it is unstable when inferred from key-points
+  r.at<float>(2) = 0;
+  r.at<float>(3) = 0;
+  cv::Mat d = r.t() * sigma.inv() * r;
+  return d.at<float>();
+}
+
+vector<float> TrackingFilter::KeyPointDistance(const Points& kpts) {
+  auto mean_scale = get_mean_scale(bbox_filter_.statePre.at<float>(2),  //
+                                   bbox_filter_.statePre.at<float>(3));
+
+  const auto n = pt_filters_.size();
+  vector<float> dists(n);
+  for (int i = 0; i < n; ++i) {
+    SetKeyPointMeasurementCov(i, std_weight_position_ * mean_scale);
+    std::array<float, 2> m{kpts[i].x, kpts[i].y};
+    cv::Mat z = as_mat(m);
+    auto& f = pt_filters_[i];
+    cv::Mat sigma;
+    cv::gemm(f.measurementMatrix * f.errorCovPre, f.measurementMatrix, 1, f.measurementNoiseCov, 1,
+             sigma, cv::GEMM_2_T);
+    cv::Mat r = z - f.measurementMatrix * f.statePre;
+    cv::Mat d = r.t() * sigma.inv() * r;
+    dists[i] = d.at<float>();
+  }
+  return dists;
+}
+
+void TrackingFilter::SetBboxProcessCov(float sigma_p, float sigma_v) {
+  auto& m = bbox_filter_.processNoiseCov;
+  cv::setIdentity(m(cv::Rect(0, 0, 4, 4)), sigma_p * sigma_p);
+  cv::setIdentity(m(cv::Rect(4, 4, 4, 4)), sigma_v * sigma_v);
+}
+void TrackingFilter::SetBboxMeasurementCov(float sigma_p) {
+  auto& m = bbox_filter_.measurementNoiseCov;
+  cv::setIdentity(m, sigma_p * sigma_p);
+}
+void TrackingFilter::SetBboxErrorCov(float sigma_p, float sigma_v) {
+  auto& m = bbox_filter_.errorCovPost;
+  cv::setIdentity(m(cv::Rect(0, 0, 4, 4)), sigma_p * sigma_p);
+  cv::setIdentity(m(cv::Rect(4, 4, 4, 4)), sigma_v * sigma_v);
+}
+void TrackingFilter::SetBboxTransitionMat() {
+  auto& m = bbox_filter_.transitionMatrix;
+  cv::setIdentity(m(cv::Rect(4, 0, 4, 4)));  // with scale velocity
+  //  cv::setIdentity(m(cv::Rect(4, 0, 2, 2)));  // w/o scale velocity
+}
+void TrackingFilter::SetBboxMeasurementMat() {
+  auto& m = bbox_filter_.measurementMatrix;
+  cv::setIdentity(m(cv::Rect(0, 0, 4, 4)));
+}
+
+void TrackingFilter::SetKeyPointProcessCov(int index, float sigma_p, float sigma_v) {
+  auto& m = pt_filters_[index].processNoiseCov;
+  m.at<float>(0, 0) = sigma_p * sigma_p;
+  m.at<float>(1, 1) = sigma_p * sigma_p;
+  m.at<float>(2, 2) = sigma_v * sigma_v;
+  m.at<float>(3, 3) = sigma_v * sigma_v;
+}
+void TrackingFilter::SetKeyPointMeasurementCov(int index, float sigma_p) {
+  auto& m = pt_filters_[index].measurementNoiseCov;
+  m.at<float>(0, 0) = sigma_p * sigma_p;
+  m.at<float>(1, 1) = sigma_p * sigma_p;
+}
+void TrackingFilter::SetKeyPointErrorCov(int index, float sigma_p, float sigma_v) {
+  auto& m = pt_filters_[index].errorCovPost;
+  m.at<float>(0, 0) = sigma_p * sigma_p;
+  m.at<float>(1, 1) = sigma_p * sigma_p;
+  m.at<float>(2, 2) = sigma_v * sigma_v;
+  m.at<float>(3, 3) = sigma_v * sigma_v;
+}
+void TrackingFilter::SetKeyPointTransitionMat(int index) {
+  auto& m = pt_filters_[index].transitionMatrix;
+  cv::setIdentity(m(cv::Rect(2, 0, 2, 2)));
+}
+void TrackingFilter::SetKeyPointMeasurementMat(int index) {
+  auto& m = pt_filters_[index].measurementMatrix;
+  cv::setIdentity(m(cv::Rect(0, 0, 2, 2)));
+}
+
+void TrackingFilter::ResetKeyPoint(int index, const Point& kpt, float scale) {
+  auto& f = pt_filters_[index];
+  SetKeyPointErrorCov(index, 2 * std_weight_position_ * scale, 10 * std_weight_velocity_ * scale);
+  f.statePost.at<float>(0) = kpt.x;
+  f.statePost.at<float>(1) = kpt.y;
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.h
new file mode 100644
index 0000000000..593fddc369
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/tracking_filter.h
@@ -0,0 +1,50 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACKING_FILTER_H
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_TRACKING_FILTER_H
+
+#include "opencv2/video/video.hpp"
+#include "pose_tracker/utils.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+// use Kalman filter to estimate and predict true states
+class TrackingFilter {
+ public:
+  TrackingFilter(const Bbox& bbox, const vector<Point>& kpts, float std_weight_position,
+                 float std_weight_velocity);
+
+  std::pair<Bbox, Points> Predict();
+
+  vector<float> KeyPointDistance(const Points& kpts);
+
+  float BboxDistance(const Bbox& bbox);
+
+  std::pair<Bbox, Points> Correct(const Bbox& bbox, const Points& kpts,
+                                  const vector<bool>& tracked);
+
+ private:
+  void SetBboxProcessCov(float sigma_p, float sigma_v);
+  void SetBboxMeasurementCov(float sigma_p);
+  void SetBboxErrorCov(float sigma_p, float sigma_v);
+  void SetBboxTransitionMat();
+  void SetBboxMeasurementMat();
+
+  void SetKeyPointProcessCov(int index, float sigma_p, float sigma_v);
+  void SetKeyPointMeasurementCov(int index, float sigma_p);
+  void SetKeyPointErrorCov(int index, float sigma_p, float sigma_v);
+  void SetKeyPointTransitionMat(int index);
+  void SetKeyPointMeasurementMat(int index);
+
+  void ResetKeyPoint(int index, const Point& kpt, float scale);
+
+ private:
+  std::vector<cv::KalmanFilter> pt_filters_;
+  cv::KalmanFilter bbox_filter_;
+  float std_weight_position_;
+  float std_weight_velocity_;
+};
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+#endif  // MMDEPLOY_TRACKING_FILTER_H
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.cpp b/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.cpp
new file mode 100644
index 0000000000..e264a7b4f1
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.cpp
@@ -0,0 +1,136 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "pose_tracker/utils.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+vector<std::tuple<int, int, float>> greedy_assignment(const vector<float>& scores,
+                                                      vector<int>& is_valid_row,
+                                                      vector<int>& is_valid_col, float thr) {
+  const auto n_rows = is_valid_row.size();
+  const auto n_cols = is_valid_col.size();
+  vector<std::tuple<int, int, float>> assignment;
+  assignment.reserve(std::max(n_rows, n_cols));
+  while (true) {
+    auto max_score = std::numeric_limits<float>::lowest();
+    int max_row = -1;
+    int max_col = -1;
+    for (int i = 0; i < n_rows; ++i) {
+      if (is_valid_row[i]) {
+        for (int j = 0; j < n_cols; ++j) {
+          if (is_valid_col[j]) {
+            if (scores[i * n_cols + j] > max_score) {
+              max_score = scores[i * n_cols + j];
+              max_row = i;
+              max_col = j;
+            }
+          }
+        }
+      }
+    }
+    if (max_score < thr) {
+      break;
+    }
+    is_valid_row[max_row] = 0;
+    is_valid_col[max_col] = 0;
+    assignment.emplace_back(max_row, max_col, max_score);
+  }
+  return assignment;
+}
+
+float intersection_over_union(const Bbox& a, const Bbox& b) {
+  auto x1 = std::max(a[0], b[0]);
+  auto y1 = std::max(a[1], b[1]);
+  auto x2 = std::min(a[2], b[2]);
+  auto y2 = std::min(a[3], b[3]);
+
+  auto inter_area = std::max(0.f, x2 - x1) * std::max(0.f, y2 - y1);
+
+  auto a_area = get_area(a);
+  auto b_area = get_area(b);
+  auto union_area = a_area + b_area - inter_area;
+
+  if (union_area == 0.f) {
+    return 0;
+  }
+
+  return inter_area / union_area;
+}
+
+float object_keypoint_similarity(const Points& pts_a, const Bbox& box_a, const Points& pts_b,
+                                 const Bbox& box_b, const vector<float>& sigmas) {
+  assert(pts_a.size() == sigmas.size());
+  assert(pts_b.size() == sigmas.size());
+  auto scale = [](const Bbox& bbox) -> float {
+    auto a = bbox[2] - bbox[0];
+    auto b = bbox[3] - bbox[1];
+    return std::sqrt(a * a + b * b);
+  };
+  auto oks = [](const Point& pa, const Point& pb, float s, float k) {
+    return std::exp(-(pa - pb).dot(pa - pb) / (2.f * s * s * k * k));
+  };
+  auto sum = 0.f;
+  const auto s = .5f * (scale(box_a) + scale(box_b));
+  for (int i = 0; i < sigmas.size(); ++i) {
+    sum += oks(pts_a[i], pts_b[i], s, sigmas[i]);
+  }
+  sum /= static_cast<float>(sigmas.size());
+  return sum;
+}
+
+std::optional<Bbox> keypoints_to_bbox(const Points& keypoints, const Scores& scores, float img_h,
+                                      float img_w, float scale, float kpt_thr, int min_keypoints) {
+  int valid = 0;
+  auto x1 = static_cast<float>(img_w);
+  auto y1 = static_cast<float>(img_h);
+  auto x2 = 0.f;
+  auto y2 = 0.f;
+  for (size_t i = 0; i < keypoints.size(); ++i) {
+    auto& kpt = keypoints[i];
+    if (scores[i] >= kpt_thr) {
+      x1 = std::min(x1, kpt.x);
+      y1 = std::min(y1, kpt.y);
+      x2 = std::max(x2, kpt.x);
+      y2 = std::max(y2, kpt.y);
+      ++valid;
+    }
+  }
+  if (min_keypoints < 0) {
+    min_keypoints = (static_cast<int>(scores.size()) + 1) / 2;
+  }
+  if (valid < min_keypoints) {
+    return std::nullopt;
+  }
+  auto xc = .5f * (x1 + x2);
+  auto yc = .5f * (y1 + y2);
+  auto w = (x2 - x1) * scale;
+  auto h = (y2 - y1) * scale;
+
+  return std::array<float, 4>{
+      std::max(0.f, std::min(img_w, xc - .5f * w)),
+      std::max(0.f, std::min(img_h, yc - .5f * h)),
+      std::max(0.f, std::min(img_w, xc + .5f * w)),
+      std::max(0.f, std::min(img_h, yc + .5f * h)),
+  };
+}
+
+Bbox map_bbox(const Bbox& box) {
+  Point p0(box[0], box[1]);
+  Point p1(box[2], box[3]);
+  auto c = .5f * (p0 + p1);
+  auto s = p1 - p0;
+  static constexpr std::array image_size{192.f, 256.f};
+  float aspect_ratio = image_size[0] * 1.0 / image_size[1];
+  if (s.x > aspect_ratio * s.y) {
+    s.y = s.x / aspect_ratio;
+  } else if (s.x < aspect_ratio * s.y) {
+    s.x = s.y * aspect_ratio;
+  }
+  s.x *= 1.25f;
+  s.y *= 1.25f;
+  p0 = c - .5f * s;
+  p1 = c + .5f * s;
+  return {p0.x, p0.y, p1.x, p1.y};
+}
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
diff --git a/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.h b/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.h
new file mode 100644
index 0000000000..3643c99387
--- /dev/null
+++ b/csrc/mmdeploy/codebase/mmpose/pose_tracker/utils.h
@@ -0,0 +1,96 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_UTILS_H
+#define MMDEPLOY_CODEBASE_MMPOSE_POSE_TRACKER_UTILS_H
+
+#include <array>
+#include <numeric>
+#include <optional>
+#include <vector>
+
+#include "mmdeploy/core/utils/formatter.h"
+#include "opencv2/core/core.hpp"
+#include "pose_tracker/common.h"
+
+namespace mmdeploy::mmpose::_pose_tracker {
+
+using std::vector;
+using Bbox = std::array<float, 4>;
+using Bboxes = vector<Bbox>;
+using Point = cv::Point2f;
+using Points = vector<cv::Point2f>;
+using Score = float;
+using Scores = vector<float>;
+
+#define POSE_TRACKER_DEBUG(...) MMDEPLOY_INFO(__VA_ARGS__)
+
+// opencv3 can't construct cv::Mat from std::array
+template <size_t N>
+cv::Mat as_mat(const std::array<float, N>& a) {
+  return cv::Mat_<float>(a.size(), 1, const_cast<float*>(a.data()));
+}
+
+// scale = 1.5, kpt_thr = 0.3
+std::optional<Bbox> keypoints_to_bbox(const Points& keypoints, const Scores& scores, float img_h,
+                                      float img_w, float scale, float kpt_thr, int min_keypoints);
+
+// xyxy format
+float intersection_over_union(const Bbox& a, const Bbox& b);
+
+float object_keypoint_similarity(const Points& pts_a, const Bbox& box_a, const Points& pts_b,
+                                 const Bbox& box_b, const vector<float>& sigmas);
+
+template <typename T>
+void suppress_non_maximum(const vector<T>& scores, const vector<float>& similarities,
+                          vector<int>& is_valid, float thresh);
+
+inline float get_area(const Bbox& bbox) { return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]); }
+
+inline Point get_center(const Bbox& bbox) {
+  return {.5f * (bbox[0] + bbox[2]), .5f * (bbox[1] + bbox[3])};
+}
+
+inline std::array<float, 2> get_scale(const Bbox& bbox) {
+  return {bbox[2] - bbox[0], bbox[3] - bbox[1]};
+}
+
+inline Bbox get_bbox(const Point& center, const std::array<float, 2>& scale) {
+  return {
+      center.x - .5f * scale[0],
+      center.y - .5f * scale[1],
+      center.x + .5f * scale[0],
+      center.y + .5f * scale[1],
+  };
+}
+
+vector<std::tuple<int, int, float>> greedy_assignment(const vector<float>& scores,
+                                                      vector<int>& is_valid_row,
+                                                      vector<int>& is_valid_col, float thr);
+
+template <typename T>
+inline void suppress_non_maximum(const vector<T>& scores, const vector<float>& similarities,
+                                 vector<int>& is_valid, float thresh) {
+  assert(is_valid.size() == scores.size());
+  vector<int> indices(scores.size());
+  std::iota(indices.begin(), indices.end(), 0);
+  std::sort(indices.begin(), indices.end(), [&](int i, int j) { return scores[i] > scores[j]; });
+  // suppress similar samples
+  for (int i = 0; i < indices.size(); ++i) {
+    if (auto u = indices[i]; is_valid[u]) {
+      for (int j = i + 1; j < indices.size(); ++j) {
+        if (auto v = indices[j]; is_valid[v]) {
+          if (similarities[u * scores.size() + v] >= thresh) {
+            is_valid[v] = false;
+          }
+        }
+      }
+    }
+  }
+}
+
+// TopDownAffine's internal logic for mapping pose model inputs
+Bbox map_bbox(const Bbox& box);
+
+}  // namespace mmdeploy::mmpose::_pose_tracker
+
+#endif  // MMDEPLOY_UTILS_H
diff --git a/csrc/mmdeploy/experimental/module_adapter.h b/csrc/mmdeploy/experimental/module_adapter.h
index 74dd9d9b86..763a71ba6b 100644
--- a/csrc/mmdeploy/experimental/module_adapter.h
+++ b/csrc/mmdeploy/experimental/module_adapter.h
@@ -9,7 +9,7 @@
 
 namespace mmdeploy {
 
-namespace module_detail {
+namespace module_adapter {
 
 template <typename T>
 struct is_tuple : std::false_type {};
@@ -20,16 +20,13 @@ struct is_tuple<std::tuple<Ts...>> : std::true_type {};
 template <typename T>
 inline constexpr auto is_tuple_v = is_tuple<T>::value;
 
-template <typename Ret, typename... Args>
+template <typename... Args>
 struct InvokeImpl {
-  template <typename F, typename... Ts>
-  static Result<Value> apply(F&& f, const Value& params, Ts&&... ts) {
-    std::tuple<uncvref_t<Args>...> args;
+  template <typename F>
+  static Result<Value> apply(F&& f, const Value& args) {
     try {
-      from_value(params, args);
-      auto ret = apply_impl(std::forward<F>(f), std::move(args), std::index_sequence_for<Args...>{},
-                            std::forward<Ts>(ts)...);
-      return make_ret_val(std::move(ret));
+      using ArgsType = std::tuple<uncvref_t<Args>...>;
+      return make_ret_val(std::apply((F &&) f, from_value<ArgsType>(args)));
     } catch (const std::exception& e) {
       MMDEPLOY_ERROR("unhandled exception: {}", e.what());
       return Status(eFail);
@@ -38,15 +35,9 @@ struct InvokeImpl {
     }
   }
 
-  template <typename F, typename Tuple, size_t... Is, typename... Ts>
-  static decltype(auto) apply_impl(F&& f, Tuple&& tuple, std::index_sequence<Is...>, Ts&&... ts) {
-    return std::invoke(std::forward<F>(f), std::forward<Ts>(ts)...,
-                       std::get<Is>(std::forward<Tuple>(tuple))...);
-  }
-
   template <typename T, typename T0 = uncvref_t<T>>
   static Result<Value> make_ret_val(T&& ret) {
-    if constexpr (module_detail::is_tuple_v<T0>) {
+    if constexpr (is_tuple_v<T0>) {
       return to_value(std::forward<T>(ret));
     } else if constexpr (is_result_v<T0>) {
       return ret ? make_ret_val(std::forward<T>(ret).value()) : std::forward<T>(ret).as_failure();
@@ -56,60 +47,49 @@ struct InvokeImpl {
   }
 };
 
-// function pointer
+// match function pointer
 template <typename Ret, typename... Args>
 Result<Value> Invoke(Ret (*f)(Args...), const Value& args) {
-  return InvokeImpl<Ret, Args...>::apply(f, args);
+  return InvokeImpl<Args...>::apply(f, args);
 }
 
-// member function pointer
-template <typename Ret, typename C0, typename C1, typename... Args>
-Result<Value> Invoke(Ret (C0::*f)(Args...) const, C1* inst, const Value& args) {
-  return InvokeImpl<Ret, Args...>::apply(f, args, inst);
+// match member function pointer `&C::operator()`
+template <typename Ret, typename C, typename F, typename... Args>
+Result<Value> Invoke(Ret (C::*)(Args...) const, const F& f, const Value& args) {
+  return InvokeImpl<Args...>::apply(f, args);
 }
-template <typename Ret, typename C0, typename C1, typename... Args>
-Result<Value> Invoke(Ret (C0::*f)(Args...), C1* inst, const Value& args) {
-  return InvokeImpl<Ret, Args...>::apply(f, args, inst);
+template <typename Ret, typename C, typename F, typename... Args>
+Result<Value> Invoke(Ret (C::*)(Args...), F& f, const Value& args) {
+  return InvokeImpl<Args...>::apply(f, args);
 }
 
-// function object
-template <typename T, typename C = std::remove_reference_t<T>,
+// match function object
+template <typename F, typename C = std::remove_reference_t<F>,
           typename = std::void_t<decltype(&C::operator())>>
-Result<Value> Invoke(T&& t, const Value& args) {
-  return Invoke(&C::operator(), &t, args);
+Result<Value> Invoke(F&& f, const Value& args) {
+  return Invoke(&C::operator(), (F &&) f, args);
 }
 
-template <typename T>
-struct IsPointer : std::false_type {};
-template <typename R, typename... Args>
-struct IsPointer<R (*)(Args...)> : std::false_type {};
-template <typename T>
-struct IsPointer<std::shared_ptr<T>> : std::true_type {};
-template <typename T, typename D>
-struct IsPointer<std::unique_ptr<T, D>> : std::true_type {};
-template <typename T>
-struct IsPointer<T*> : std::true_type {};
-
-template <typename T, typename SFINAE = void>
-struct AccessPolicy {
-  static constexpr auto apply = [](auto& x) -> decltype(auto) { return x; };
-};
-template <typename T>
-struct AccessPolicy<T, std::enable_if_t<IsPointer<T>::value>> {
-  static constexpr auto apply = [](auto& x) -> decltype(auto) { return *x; };
-};
+template <typename F>
+Result<Value> Invoke(const std::unique_ptr<F>& f, const Value& args) {
+  return Invoke(*f, args);
+}
+template <typename F>
+Result<Value> Invoke(const std::shared_ptr<F>& f, const Value& args) {
+  return Invoke(*f, args);
+}
 
-template <typename T, typename A = AccessPolicy<T>>
+template <typename Func>
 class Task : public Module {
  public:
-  explicit Task(T task) : task_(std::move(task)) {}
+  explicit Task(Func func) : func_(std::move(func)) {}
 
   Result<Value> Process(const Value& arg) override {
-    return module_detail::Invoke(A::apply(task_), arg);
+    return ::mmdeploy::module_adapter::Invoke(func_, arg);
   }
 
  private:
-  T task_;
+  Func func_;
 };
 
 template <typename T>
@@ -122,11 +102,10 @@ auto MakeTask(T&& x) {
   return Task(std::forward<T>(x));
 }
 
-}  // namespace module_detail
-
-using module_detail::CreateTask;
+}  // namespace module_adapter
 
-using module_detail::MakeTask;
+using module_adapter::CreateTask;
+using module_adapter::MakeTask;
 
 }  // namespace mmdeploy
 
diff --git a/csrc/mmdeploy/graph/inference.cpp b/csrc/mmdeploy/graph/inference.cpp
index ec6cb59076..9f722d0e62 100644
--- a/csrc/mmdeploy/graph/inference.cpp
+++ b/csrc/mmdeploy/graph/inference.cpp
@@ -41,7 +41,7 @@ Result<unique_ptr<Node>> InferenceBuilder::BuildImpl() {
   }
   pipeline_config["context"] = context;
 
-  MMDEPLOY_INFO("{}", pipeline_config);
+  MMDEPLOY_DEBUG("{}", pipeline_config);
 
   OUTCOME_TRY(auto pipeline_builder, Builder::CreateFromConfig(pipeline_config));
   OUTCOME_TRY(auto node, pipeline_builder->Build());
diff --git a/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp b/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp
index 8bf3232d03..e12ae06a1e 100644
--- a/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp
+++ b/csrc/mmdeploy/operation/cuda/crop_resize_pad.cpp
@@ -21,15 +21,17 @@ class CropResizePadImpl : public CropResizePad {
     Tensor dst_tensor(desc);
     cudaMemsetAsync(dst_tensor.data<uint8_t>(), 0, dst_tensor.byte_size(), cuda_stream);
 
-    if (src.data_type() == DataType::kINT8) {
-      OUTCOME_TRY(
-          ResizeDispatch<uint8_t>(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream));
-    } else if (src.data_type() == DataType::kFLOAT) {
-      OUTCOME_TRY(
-          ResizeDispatch<float>(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream));
-    } else {
-      MMDEPLOY_ERROR("unsupported data type {}", src.data_type());
-      return Status(eNotSupported);
+    if (crop_rect[2] - crop_rect[0] + 1 > 0 && crop_rect[3] - crop_rect[1] + 1 > 0) {
+      if (src.data_type() == DataType::kINT8) {
+        OUTCOME_TRY(ResizeDispatch<uint8_t>(src, crop_rect, target_size, pad_rect, dst_tensor,
+                                            cuda_stream));
+      } else if (src.data_type() == DataType::kFLOAT) {
+        OUTCOME_TRY(
+            ResizeDispatch<float>(src, crop_rect, target_size, pad_rect, dst_tensor, cuda_stream));
+      } else {
+        MMDEPLOY_ERROR("unsupported data type {}", src.data_type());
+        return Status(eNotSupported);
+      }
     }
 
     dst = std::move(dst_tensor);
diff --git a/csrc/mmdeploy/utils/opencv/opencv_utils.cpp b/csrc/mmdeploy/utils/opencv/opencv_utils.cpp
index 17a87bbf9b..d3cd1ad87b 100644
--- a/csrc/mmdeploy/utils/opencv/opencv_utils.cpp
+++ b/csrc/mmdeploy/utils/opencv/opencv_utils.cpp
@@ -295,10 +295,12 @@ cv::Mat CropResizePad(const cv::Mat& src, const std::vector<int>& crop_rect,
   int width = target_size[0] + pad_rect[1] + pad_rect[3];
   int height = target_size[1] + pad_rect[0] + pad_rect[2];
   cv::Mat dst = cv::Mat::zeros(height, width, src.type());
-  cv::Rect roi1 = {crop_rect[1], crop_rect[0], crop_rect[3] - crop_rect[1] + 1,
-                   crop_rect[2] - crop_rect[0] + 1};
-  cv::Rect roi2 = {pad_rect[1], pad_rect[0], target_size[0], target_size[1]};
-  cv::resize(src(roi1), dst(roi2), {target_size[0], target_size[1]});
+  if (crop_rect[2] - crop_rect[0] + 1 > 0 && crop_rect[3] - crop_rect[1] + 1 > 0) {
+    cv::Rect roi1 = {crop_rect[1], crop_rect[0], crop_rect[3] - crop_rect[1] + 1,
+                     crop_rect[2] - crop_rect[0] + 1};
+    cv::Rect roi2 = {pad_rect[1], pad_rect[0], target_size[0], target_size[1]};
+    cv::resize(src(roi1), dst(roi2), {target_size[0], target_size[1]});
+  }
   return dst;
 }
 
diff --git a/demo/csrc/CMakeLists.txt b/demo/csrc/CMakeLists.txt
index 85ffc6f32c..8d25e43597 100644
--- a/demo/csrc/CMakeLists.txt
+++ b/demo/csrc/CMakeLists.txt
@@ -41,7 +41,6 @@ add_example(rotated_detector c rotated_object_detection)
 add_example(video_recognizer c video_recognition)
 # TODO: figure out a better way
 # add_example("" c det_cls)
-# add_example("" c det_pose)
 
 if (MMDEPLOY_BUILD_SDK_CXX_API)
     add_example(classifier cpp classifier)
@@ -52,6 +51,7 @@ if (MMDEPLOY_BUILD_SDK_CXX_API)
     add_example(text_detector cpp text_det_recog)
     add_example(pose_detector cpp pose_detector)
     add_example(rotated_detector cpp rotated_detector)
-    add_example(pose_detector cpp pose_tracker)
+    add_example(pose_tracker cpp pose_tracker)
+    add_example(pose_detector cpp det_pose)
     add_example(video_recognizer cpp video_cls)
 endif ()
diff --git a/demo/csrc/cpp/det_pose.cpp b/demo/csrc/cpp/det_pose.cpp
new file mode 100644
index 0000000000..a12dd3c0b1
--- /dev/null
+++ b/demo/csrc/cpp/det_pose.cpp
@@ -0,0 +1,113 @@
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include <iostream>
+
+#include "mmdeploy/detector.hpp"
+#include "mmdeploy/pose_detector.hpp"
+#include "opencv2/imgcodecs/imgcodecs.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+
+using std::vector;
+
+cv::Mat Visualize(cv::Mat frame, const std::vector<mmdeploy_pose_detection_t>& poses, int size);
+
+int main(int argc, char* argv[]) {
+  const auto device_name = argv[1];
+  const auto det_model_path = argv[2];
+  const auto pose_model_path = argv[3];
+  const auto image_path = argv[4];
+
+  if (argc != 5) {
+    std::cerr << "usage:\n\tpose_tracker device_name det_model_path pose_model_path image_path\n";
+    return -1;
+  }
+  auto img = cv::imread(image_path);
+  if (!img.data) {
+    std::cerr << "failed to load image: " << image_path << "\n";
+    return -1;
+  }
+
+  using namespace mmdeploy;
+
+  Context context(Device{device_name});  // create context for holding the device handle
+  Detector detector(Model(det_model_path), context);   // create object detector
+  PoseDetector pose(Model(pose_model_path), context);  // create pose detector
+
+  // apply detector
+  auto dets = detector.Apply(img);
+
+  // filter detections and extract bboxes for pose model
+  std::vector<mmdeploy_rect_t> bboxes;
+  for (const auto& det : dets) {
+    if (det.label_id == 0 && det.score > .6f) {
+      bboxes.push_back(det.bbox);
+    }
+  }
+  // apply pose detector
+  auto poses = pose.Apply(img, bboxes);
+
+  // visualize
+  auto vis = Visualize(img, {poses.begin(), poses.end()}, 1280);
+  cv::imwrite("det_pose_output.jpg", vis);
+
+  return 0;
+}
+
+struct Skeleton {
+  vector<std::pair<int, int>> skeleton;
+  vector<cv::Scalar> palette;
+  vector<int> link_color;
+  vector<int> point_color;
+};
+
+const Skeleton& gCocoSkeleton() {
+  static const Skeleton inst{
+      {
+          {15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, {6, 12},
+          {5, 6},   {5, 7},   {6, 8},   {7, 9},   {8, 10},  {1, 2},  {0, 1},
+          {0, 2},   {1, 3},   {2, 4},   {3, 5},   {4, 6},
+      },
+      {
+          {255, 128, 0},   {255, 153, 51},  {255, 178, 102}, {230, 230, 0},   {255, 153, 255},
+          {153, 204, 255}, {255, 102, 255}, {255, 51, 255},  {102, 178, 255}, {51, 153, 255},
+          {255, 153, 153}, {255, 102, 102}, {255, 51, 51},   {153, 255, 153}, {102, 255, 102},
+          {51, 255, 51},   {0, 255, 0},     {0, 0, 255},     {255, 0, 0},     {255, 255, 255},
+      },
+      {0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16},
+      {16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0},
+  };
+  return inst;
+}
+
+cv::Mat Visualize(cv::Mat frame, const vector<mmdeploy_pose_detection_t>& poses, int size) {
+  auto& [skeleton, palette, link_color, point_color] = gCocoSkeleton();
+  auto scale = (float)size / (float)std::max(frame.cols, frame.rows);
+  if (scale != 1) {
+    cv::resize(frame, frame, {}, scale, scale);
+  } else {
+    frame = frame.clone();
+  }
+  for (const auto& pose : poses) {
+    vector<float> kpts(&pose.point[0].x, &pose.point[pose.length - 1].y + 1);
+    vector<float> scores(pose.score, pose.score + pose.length);
+    std::for_each(kpts.begin(), kpts.end(), [&](auto& x) { x *= scale; });
+    constexpr auto score_thr = .5f;
+    vector<int> used(kpts.size());
+    for (size_t i = 0; i < skeleton.size(); ++i) {
+      auto [u, v] = skeleton[i];
+      if (scores[u] > score_thr && scores[v] > score_thr) {
+        used[u] = used[v] = 1;
+        cv::Point p_u(kpts[u * 2], kpts[u * 2 + 1]);
+        cv::Point p_v(kpts[v * 2], kpts[v * 2 + 1]);
+        cv::line(frame, p_u, p_v, palette[link_color[i]], 1, cv::LINE_AA);
+      }
+    }
+    for (size_t i = 0; i < kpts.size(); i += 2) {
+      if (used[i / 2]) {
+        cv::Point p(kpts[i], kpts[i + 1]);
+        cv::circle(frame, p, 1, palette[point_color[i / 2]], 2, cv::LINE_AA);
+      }
+    }
+  }
+  return frame;
+}
diff --git a/demo/csrc/cpp/pose_tracker.cpp b/demo/csrc/cpp/pose_tracker.cpp
index ec95afe03d..077b3d39ea 100644
--- a/demo/csrc/cpp/pose_tracker.cpp
+++ b/demo/csrc/cpp/pose_tracker.cpp
@@ -1,1064 +1,180 @@
+// Copyright (c) OpenMMLab. All rights reserved.
 
+#include "mmdeploy/pose_tracker.hpp"
 
-#include <cmath>
-#include <numeric>
+#include <iostream>
 
-#include "mmdeploy/archive/json_archive.h"
-#include "mmdeploy/archive/value_archive.h"
-#include "mmdeploy/common.hpp"
-#include "mmdeploy/core/mat.h"
-#include "mmdeploy/core/module.h"
-#include "mmdeploy/core/utils/formatter.h"
-#include "mmdeploy/experimental/module_adapter.h"
-#include "mmdeploy/pipeline.hpp"
-#include "opencv2/highgui.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/videoio.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/imgcodecs/imgcodecs.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+#include "opencv2/videoio/videoio.hpp"
 
-const auto config_json = R"(
-{
-  "type": "Pipeline",
-  "input": ["img", "use_det", "state"],
-  "output": "targets",
-  "tasks": [
-    {
-      "type": "Task",
-      "module": "Transform",
-      "name": "preload",
-      "input": "img",
-      "output": "data",
-      "transforms": [ { "type": "LoadImageFromFile" } ]
-    },
-    {
-      "type": "Cond",
-      "input": ["use_det", "data"],
-      "output": "dets",
-      "body": {
-        "name": "detection",
-        "type": "Inference",
-        "params": { "model": "detection" }
-      }
-    },
-    {
-      "type": "Task",
-      "module": "ProcessBboxes",
-      "input": ["dets", "data", "state"],
-      "output": ["rois", "track_ids"]
-    },
-    {
-      "input": "*rois",
-      "output": "*keypoints",
-      "name": "pose",
-      "type": "Inference",
-      "params": { "model": "pose" }
-    },
-    {
-      "type": "Task",
-      "module": "TrackPose",
-      "scheduler": "pool",
-      "input": ["keypoints", "track_ids", "state"],
-      "output": "targets"
-    }
-  ]
-}
-)"_json;
-
-namespace mmdeploy {
-
-#define REGISTER_SIMPLE_MODULE(name, fn) \
-  MMDEPLOY_REGISTER_FACTORY_FUNC(Module, (name, 0), [](const Value&) { return CreateTask(fn); });
-
-#define POSE_TRACKER_DEBUG(...) MMDEPLOY_INFO(__VA_ARGS__)
-
-using std::vector;
-using Bbox = std::array<float, 4>;
-using Bboxes = vector<Bbox>;
-using Point = cv::Point2f;
-using Points = vector<cv::Point2f>;
-using Score = float;
-using Scores = vector<float>;
-
-// scale = 1.5, kpt_thr = 0.3
-std::optional<Bbox> keypoints_to_bbox(const Points& keypoints, const Scores& scores, float img_h,
-                                      float img_w, float scale, float kpt_thr, int min_keypoints) {
-  int valid = 0;
-  auto x1 = static_cast<float>(img_w);
-  auto y1 = static_cast<float>(img_h);
-  auto x2 = 0.f;
-  auto y2 = 0.f;
-  for (size_t i = 0; i < keypoints.size(); ++i) {
-    auto& kpt = keypoints[i];
-    if (scores[i] >= kpt_thr) {
-      x1 = std::min(x1, kpt.x);
-      y1 = std::min(y1, kpt.y);
-      x2 = std::max(x2, kpt.x);
-      y2 = std::max(y2, kpt.y);
-      ++valid;
-    }
-  }
-  if (min_keypoints < 0) {
-    min_keypoints = (static_cast<int>(scores.size()) + 1) / 2;
-  }
-  if (valid < min_keypoints) {
-    return std::nullopt;
-  }
-  auto xc = .5f * (x1 + x2);
-  auto yc = .5f * (y1 + y2);
-  auto w = (x2 - x1) * scale;
-  auto h = (y2 - y1) * scale;
-
-  return std::array<float, 4>{
-      std::max(0.f, std::min(img_w, xc - .5f * w)),
-      std::max(0.f, std::min(img_h, yc - .5f * h)),
-      std::max(0.f, std::min(img_w, xc + .5f * w)),
-      std::max(0.f, std::min(img_h, yc + .5f * h)),
-  };
-}
-
-class Filter {
- public:
-  virtual ~Filter() = default;
-  virtual cv::Mat_<float> Predict(float t) = 0;
-  virtual cv::Mat_<float> Correct(const cv::Mat_<float>& x) = 0;
-};
-
-class OneEuroFilter : public Filter {
- public:
-  explicit OneEuroFilter(const cv::Mat_<float>& x, float beta, float fc_min, float fc_d)
-      : x_(x.clone()), beta_(beta), fc_min_(fc_min), fc_d_(fc_d) {
-    v_ = cv::Mat::zeros(x_.size(), x.type());
-  }
-
-  cv::Mat_<float> Predict(float t) override { return x_ + v_; }
-
-  cv::Mat_<float> Correct(const cv::Mat_<float>& x) override {
-    auto a_v = SmoothingFactor(fc_d_);
-    v_ = ExponentialSmoothing(a_v, x - x_, v_);
-    auto fc = fc_min_ + beta_ * (float)cv::norm(v_);
-    auto a_x = SmoothingFactor(fc);
-    x_ = ExponentialSmoothing(a_x, x, x_);
-    return x_.clone();
-  }
-
- private:
-  static float SmoothingFactor(float cutoff) {
-    static constexpr float kPi = 3.1415926;
-    auto r = 2 * kPi * cutoff;
-    return r / (r + 1);
-  }
-
-  static cv::Mat_<float> ExponentialSmoothing(float a, const cv::Mat_<float>& x,
-                                              const cv::Mat_<float>& x0) {
-    return a * x + (1 - a) * x0;
-  }
-
- private:
-  cv::Mat_<float> x_;
-  cv::Mat_<float> v_;
-  float beta_;
-  float fc_min_;
-  float fc_d_;
-};
-
-template <typename T>
-class PointFilterArray : public Filter {
- public:
-  template <typename... Args>
-  explicit PointFilterArray(const Points& ps, const Args&... args) {
-    for (const auto& p : ps) {
-      fs_.emplace_back(cv::Mat_<float>(p, false), args...);
-    }
-  }
-
-  cv::Mat_<float> Predict(float t) override {
-    cv::Mat_<float> m(fs_.size() * 2, 1);
-    for (int i = 0; i < fs_.size(); ++i) {
-      cv::Range r(i * 2, i * 2 + 2);
-      fs_[i].Predict(1).copyTo(m.rowRange(r));
-    }
-    return m.reshape(0, fs_.size());
-  }
-
-  cv::Mat_<float> Correct(const cv::Mat_<float>& x) override {
-    cv::Mat_<float> m(fs_.size() * 2, 1);
-    auto _x = x.reshape(1, x.rows * x.cols);
-    for (int i = 0; i < fs_.size(); ++i) {
-      cv::Range r(i * 2, i * 2 + 2);
-      fs_[i].Correct(_x.rowRange(r)).copyTo(m.rowRange(r));
-    }
-    return m.reshape(0, fs_.size());
-  }
-
- private:
-  vector<T> fs_;
-};
-
-class TrackerFilter {
- public:
-  using Points = vector<cv::Point2f>;
-
-  explicit TrackerFilter(float c_beta, float c_fc_min, float c_fc_d, float k_beta, float k_fc_min,
-                         float k_fc_d, const Bbox& bbox, const Points& kpts)
-      : n_kpts_(kpts.size()) {
-    c_ = std::make_unique<OneEuroFilter>(cv::Mat_<float>(Center(bbox)), c_beta, c_fc_min, c_fc_d);
-    s_ = std::make_unique<OneEuroFilter>(cv::Mat_<float>(Scale(bbox)), 0, 1, 0);
-    kpts_ = std::make_unique<PointFilterArray<OneEuroFilter>>(kpts, k_beta, k_fc_min, k_fc_d);
-  }
-
-  std::pair<Bbox, Points> Predict() {
-    cv::Point2f c;
-    c_->Predict(1).copyTo(cv::Mat(c, false));
-    cv::Point2f s;
-    s_->Predict(0).copyTo(cv::Mat(s, false));
-    Points p(n_kpts_);
-    kpts_->Predict(1).copyTo(cv::Mat(p, false).reshape(1));
-    return {GetBbox(c, s), std::move(p)};
-  }
-
-  std::pair<Bbox, Points> Correct(const Bbox& bbox, const Points& kpts) {
-    cv::Point2f c;
-    c_->Correct(cv::Mat_<float>(Center(bbox), false)).copyTo(cv::Mat(c, false));
-    cv::Point2f s;
-    s_->Correct(cv::Mat_<float>(Scale(bbox), false)).copyTo(cv::Mat(s, false));
-    Points p(kpts.size());
-    kpts_->Correct(cv::Mat(kpts, false)).copyTo(cv::Mat(p, false).reshape(1));
-    return {GetBbox(c, s), std::move(p)};
-  }
-
- private:
-  static cv::Point2f Center(const Bbox& bbox) {
-    return {.5f * (bbox[0] + bbox[2]), .5f * (bbox[1] + bbox[3])};
-  }
-  static cv::Point2f Scale(const Bbox& bbox) {
-    return {bbox[2] - bbox[0], bbox[3] - bbox[1]};
-    //    return {std::log(bbox[2] - bbox[0]), std::log(bbox[3] - bbox[1])};
-  }
-  static Bbox GetBbox(const cv::Point2f& center, const cv::Point2f& scale) {
-    //    cv::Point2f half_size(.5 * std::exp(scale.x), .5 * std::exp(scale.y));
-    Point half_size(.5f * scale.x, .5f * scale.y);
-    auto lo = center - half_size;
-    auto hi = center + half_size;
-    return {lo.x, lo.y, hi.x, hi.y};
-  }
-  int n_kpts_;
-  std::unique_ptr<Filter> c_;
-  std::unique_ptr<Filter> s_;
-  std::unique_ptr<Filter> kpts_;
-};
-
-struct Track {
-  vector<Points> keypoints;
-  vector<Scores> scores;
-  vector<float> avg_scores;
-  vector<Bbox> bboxes;
-  vector<int> is_missing;
-  int64_t track_id{-1};
-  std::shared_ptr<TrackerFilter> filter;
-  Bbox bbox_pred{};
-  Points kpts_pred;
-  int64_t age{0};
-  int64_t n_missing{0};
-};
-
-struct TrackInfo {
-  vector<Track> tracks;
-  int64_t next_id{0};
-};
-
-static inline float Area(const Bbox& bbox) { return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]); }
-
-struct TrackerParams {
-  // detector params
-  int det_interval = 5;           // detection interval
-  int det_label = 0;              // label used to filter detections
-  float det_min_bbox_size = 100;  // threshold for sqrt(area(bbox))
-  float det_thr = .5f;            // confidence threshold used to filter detections
-  float det_nms_thr = .7f;        // detection nms threshold
-
-  // pose model params
-  int pose_max_num_bboxes = 1;    // max num of bboxes for pose model per frame
-  int pose_min_keypoints = -1;    // min of visible key-points for valid bbox, -1 -> len(kpts)/2
-  float pose_min_bbox_size = 64;  // threshold for sqrt(area(bbox))
-  vector<float> sigmas;           // sigmas for key-points
-
-  // tracker params
-  float track_nms_oks_thr = .5f;        // OKS threshold for suppressing duplicated key-points
-  float track_kpts_thr = .6f;           // threshold for key-point visibility
-  float track_oks_thr = .3f;            // OKS assignment threshold
-  float track_iou_thr = .3f;            // IOU assignment threshold
-  float track_bbox_scale = 1.25f;       // scale factor for bboxes
-  int track_max_missing = 10;           // max number of missing frames before track removal
-  float track_missing_momentum = .95f;  // extrapolation momentum for missing tracks
-  int track_n_history = 10;             // track history length
-
-  // filter params for bbox center
-  float filter_c_beta = .005;
-  float filter_c_fc_min = .05;
-  float filter_c_fc_d = 1.;
-  // filter params for key-points
-  float filter_k_beta = .0075;
-  float filter_k_fc_min = .1;
-  float filter_k_fc_d = .25;
+struct Args {
+  std::string device;
+  std::string det_model;
+  std::string pose_model;
+  std::string video;
+  std::string output_dir;
 };
 
-class Tracker {
- public:
-  explicit Tracker(const TrackerParams& _params) : params(_params) {}
-  // xyxy format
-  float IntersectionOverUnion(const std::array<float, 4>& a, const std::array<float, 4>& b) {
-    auto x1 = std::max(a[0], b[0]);
-    auto y1 = std::max(a[1], b[1]);
-    auto x2 = std::min(a[2], b[2]);
-    auto y2 = std::min(a[3], b[3]);
-
-    auto inter_area = std::max(0.f, x2 - x1) * std::max(0.f, y2 - y1);
-
-    auto a_area = Area(a);
-    auto b_area = Area(b);
-    auto union_area = a_area + b_area - inter_area;
-
-    if (union_area == 0.f) {
-      return 0;
-    }
-
-    return inter_area / union_area;
-  }
-
-  // TopDownAffine's internal logic for mapping pose detector inputs
-  Bbox MapBbox(const Bbox& box) {
-    Point p0(box[0], box[1]);
-    Point p1(box[2], box[3]);
-    auto c = .5f * (p0 + p1);
-    auto s = p1 - p0;
-    static constexpr std::array image_size{192.f, 256.f};
-    float aspect_ratio = image_size[0] * 1.0 / image_size[1];
-    if (s.x > aspect_ratio * s.y) {
-      s.y = s.x / aspect_ratio;
-    } else if (s.x < aspect_ratio * s.y) {
-      s.x = s.y * aspect_ratio;
-    }
-    s.x *= 1.25f;
-    s.y *= 1.25f;
-    p0 = c - .5f * s;
-    p1 = c + .5f * s;
-    return {p0.x, p0.y, p1.x, p1.y};
-  }
-
-  template <typename T>
-  vector<int> SuppressNonMaximum(const vector<T>& scores, const vector<float>& similarities,
-                                 vector<int> is_valid, float thresh) {
-    assert(is_valid.size() == scores.size());
-    vector<int> indices(scores.size());
-    std::iota(indices.begin(), indices.end(), 0);
-    // stable sort, useful when the scores are equal
-    std::sort(indices.begin(), indices.end(), [&](int i, int j) { return scores[i] > scores[j]; });
-    // suppress similar samples
-    for (int i = 0; i < indices.size(); ++i) {
-      if (auto u = indices[i]; is_valid[u]) {
-        for (int j = i + 1; j < indices.size(); ++j) {
-          if (auto v = indices[j]; is_valid[v]) {
-            if (similarities[u * scores.size() + v] >= thresh) {
-              is_valid[v] = false;
-            }
-          }
-        }
-      }
-    }
-    return is_valid;
-  }
-
-  struct Detections {
-    Bboxes bboxes;
-    Scores scores;
-    vector<int> labels;
-  };
-
-  void GetObjectsByDetection(const Detections& dets, vector<Bbox>& bboxes,
-                             vector<int64_t>& track_ids, vector<int>& types) const {
-    auto& [_bboxes, _scores, _labels] = dets;
-    for (size_t i = 0; i < _bboxes.size(); ++i) {
-      if (_labels[i] == params.det_label && _scores[i] > params.det_thr &&
-          Area(_bboxes[i]) >= params.det_min_bbox_size * params.det_min_bbox_size) {
-        bboxes.push_back(_bboxes[i]);
-        track_ids.push_back(-1);
-        types.push_back(1);
-      }
-    }
-  }
-
-  void GetObjectsByTracking(vector<Bbox>& bboxes, vector<int64_t>& track_ids,
-                            vector<int>& types) const {
-    for (auto& track : track_info.tracks) {
-      std::optional<Bbox> bbox;
-      if (track.n_missing) {
-        bbox = track.bbox_pred;
-      } else {
-        bbox = keypoints_to_bbox(track.kpts_pred, track.scores.back(), static_cast<float>(frame_h),
-                                 static_cast<float>(frame_w), params.track_bbox_scale,
-                                 params.track_kpts_thr, params.pose_min_keypoints);
-      }
-      if (bbox && Area(*bbox) >= params.pose_min_bbox_size * params.pose_min_bbox_size) {
-        bboxes.push_back(*bbox);
-        track_ids.push_back(track.track_id);
-        types.push_back(track.n_missing ? 0 : 2);
-      }
-    }
-  }
-
-  std::tuple<vector<Bbox>, vector<int64_t>> ProcessBboxes(const std::optional<Detections>& dets) {
-    vector<Bbox> bboxes;
-    vector<int64_t> track_ids;
-
-    // 2 - visible tracks
-    // 1 - detection
-    // 0 - missing tracks
-    vector<int> types;
-
-    if (dets) {
-      GetObjectsByDetection(*dets, bboxes, track_ids, types);
-    }
+Args ParseArgs(int argc, char* argv[]);
 
-    GetObjectsByTracking(bboxes, track_ids, types);
-
-    vector<int> is_valid_bboxes(bboxes.size(), 1);
-
-    auto count = [&] {
-      std::array<int, 3> acc{};
-      for (size_t i = 0; i < is_valid_bboxes.size(); ++i) {
-        if (is_valid_bboxes[i]) {
-          ++acc[types[i]];
-        }
-      }
-      return acc;
-    };
-    POSE_TRACKER_DEBUG("frame {}, bboxes {}", frame_id, count());
-
-    vector<std::pair<int, float>> ranks;
-    ranks.reserve(bboxes.size());
-    for (int i = 0; i < bboxes.size(); ++i) {
-      ranks.emplace_back(types[i], Area(bboxes[i]));
-    }
-
-    vector<float> iou(ranks.size() * ranks.size());
-    for (int i = 0; i < bboxes.size(); ++i) {
-      for (int j = 0; j < i; ++j) {
-        iou[i * bboxes.size() + j] = iou[j * bboxes.size() + i] =
-            IntersectionOverUnion(bboxes[i], bboxes[j]);
-      }
-    }
-
-    is_valid_bboxes =
-        SuppressNonMaximum(ranks, iou, std::move(is_valid_bboxes), params.det_nms_thr);
-    POSE_TRACKER_DEBUG("frame {}, bboxes after nms: {}", frame_id, count());
-
-    vector<int> idxs;
-    idxs.reserve(bboxes.size());
-    for (int i = 0; i < bboxes.size(); ++i) {
-      if (is_valid_bboxes[i]) {
-        idxs.push_back(i);
-      }
-    }
-
-    std::stable_sort(idxs.begin(), idxs.end(), [&](int i, int j) { return ranks[i] > ranks[j]; });
-    std::fill(is_valid_bboxes.begin(), is_valid_bboxes.end(), 0);
-    {
-      vector<Bbox> tmp_bboxes;
-      vector<int64_t> tmp_track_ids;
-      for (const auto& i : idxs) {
-        if (tmp_bboxes.size() >= params.pose_max_num_bboxes) {
-          break;
-        }
-        tmp_bboxes.push_back(bboxes[i]);
-        tmp_track_ids.push_back(track_ids[i]);
-        is_valid_bboxes[i] = 1;
-      }
-      bboxes = std::move(tmp_bboxes);
-      track_ids = std::move(tmp_track_ids);
-    }
-
-    POSE_TRACKER_DEBUG("frame {}, bboxes after sort: {}", frame_id, count());
-
-    pose_bboxes.clear();
-    for (const auto& bbox : bboxes) {
-      //    pose_bboxes.push_back(MapBbox(bbox));
-      pose_bboxes.push_back(bbox);
-    }
-
-    return {bboxes, track_ids};
-  }
-
-  float ObjectKeypointSimilarity(const Points& pts_a, const Bbox& box_a, const Points& pts_b,
-                                 const Bbox& box_b) {
-    assert(pts_a.size() == params.sigmas.size());
-    assert(pts_b.size() == params.sigmas.size());
-    auto scale = [](const Bbox& bbox) -> float {
-      auto a = bbox[2] - bbox[0];
-      auto b = bbox[3] - bbox[1];
-      return std::sqrt(a * a + b * b);
-    };
-    auto oks = [](const Point& pa, const Point& pb, float s, float k) {
-      return std::exp(-(pa - pb).dot(pa - pb) / (2.f * s * s * k * k));
-    };
-    auto sum = 0.f;
-    const auto s = .5f * (scale(box_a) + scale(box_b));
-    for (int i = 0; i < params.sigmas.size(); ++i) {
-      sum += oks(pts_a[i], pts_b[i], s, params.sigmas[i]);
-    }
-    sum /= static_cast<float>(params.sigmas.size());
-    return sum;
-  }
+using std::vector;
+using namespace mmdeploy;
 
-  void UpdateTrack(Track& track, Points kpts, Scores score, const Bbox& bbox, int is_missing) {
-    auto avg_score = std::accumulate(score.begin(), score.end(), 0.f) / score.size();
-    if (track.scores.size() == params.track_n_history) {
-      std::rotate(track.keypoints.begin(), track.keypoints.begin() + 1, track.keypoints.end());
-      std::rotate(track.scores.begin(), track.scores.begin() + 1, track.scores.end());
-      std::rotate(track.bboxes.begin(), track.bboxes.begin() + 1, track.bboxes.end());
-      std::rotate(track.avg_scores.begin(), track.avg_scores.begin() + 1, track.avg_scores.end());
-      std::rotate(track.is_missing.begin(), track.is_missing.begin() + 1, track.is_missing.end());
-      track.keypoints.back() = std::move(kpts);
-      track.scores.back() = std::move(score);
-      track.bboxes.back() = bbox;
-      track.avg_scores.back() = avg_score;
-      track.is_missing.back() = is_missing;
-    } else {
-      track.keypoints.push_back(std::move(kpts));
-      track.scores.push_back(std::move(score));
-      track.bboxes.push_back(bbox);
-      track.avg_scores.push_back(avg_score);
-      track.is_missing.push_back(is_missing);
-    }
-    ++track.age;
-    track.n_missing = is_missing ? track.n_missing + 1 : 0;
-  }
+bool Visualize(cv::Mat frame, const PoseTracker::Result& result, int size,
+               const std::string& output_dir, int frame_id, bool with_bbox);
 
-  vector<std::tuple<int, int, float>> GreedyAssignment(const vector<float>& scores,
-                                                       vector<int>& is_valid_rows,
-                                                       vector<int>& is_valid_cols, float thr) {
-    const auto n_rows = is_valid_rows.size();
-    const auto n_cols = is_valid_cols.size();
-    vector<std::tuple<int, int, float>> assignment;
-    assignment.reserve(std::max(n_rows, n_cols));
-    while (true) {
-      auto max_score = 0.f;
-      int max_row = -1;
-      int max_col = -1;
-      for (int i = 0; i < n_rows; ++i) {
-        if (is_valid_rows[i]) {
-          for (int j = 0; j < n_cols; ++j) {
-            if (is_valid_cols[j]) {
-              if (scores[i * n_cols + j] > max_score) {
-                max_score = scores[i * n_cols + j];
-                max_row = i;
-                max_col = j;
-              }
-            }
-          }
-        }
-      }
-      if (max_score < thr) {
-        break;
-      }
-      is_valid_rows[max_row] = 0;
-      is_valid_cols[max_col] = 0;
-      assignment.emplace_back(max_row, max_col, max_score);
-    }
-    return assignment;
-  }
-
-  vector<int> SuppressOverlappingBboxes(
-      const vector<Points>& keypoints, const vector<Scores>& scores,
-      const vector<int>& is_present,  // bbox from a visible track?
-      const vector<Bbox>& bboxes, vector<int> is_valid, const vector<float>& sigmas,
-      float oks_thr) {
-    assert(keypoints.size() == is_valid.size());
-    assert(scores.size() == is_valid.size());
-    assert(bboxes.size() == is_valid.size());
-    const auto size = is_valid.size();
-    vector<float> oks(size * size);
-    for (int i = 0; i < size; ++i) {
-      if (is_valid[i]) {
-        for (int j = 0; j < i; ++j) {
-          if (is_valid[j]) {
-            oks[i * size + j] = oks[j * size + i] =
-                ObjectKeypointSimilarity(keypoints[i], bboxes[i], keypoints[j], bboxes[j]);
-          }
-        }
-      }
-    }
-    vector<std::pair<int, float>> ranks;
-    ranks.reserve(size);
-    for (int i = 0; i < size; ++i) {
-      auto& s = scores[i];
-      auto avg = std::accumulate(s.begin(), s.end(), 0.f) / static_cast<float>(s.size());
-      // prevents bboxes from missing tracks to suppress visible tracks
-      ranks.emplace_back(is_present[i], avg);
-    }
-    return SuppressNonMaximum(ranks, oks, is_valid, oks_thr);
+int main(int argc, char* argv[]) {
+  auto args = ParseArgs(argc, argv);
+  if (args.device.empty()) {
+    return 0;
   }
 
-  void TrackStep(vector<Points>& keypoints, vector<Scores>& scores,
-                 const vector<int64_t>& track_ids) {
-    auto& tracks = track_info.tracks;
-
-    vector<Track> new_tracks;
-    new_tracks.reserve(tracks.size());
-
-    vector<Bbox> bboxes(keypoints.size());
-    vector<int> is_valid_bboxes(keypoints.size(), 1);
-
-    pose_results.clear();
-
-    // key-points to bboxes
-    for (size_t i = 0; i < keypoints.size(); ++i) {
-      if (auto bbox =
-              keypoints_to_bbox(keypoints[i], scores[i], frame_h, frame_w, params.track_bbox_scale,
-                                params.track_kpts_thr, params.pose_min_keypoints)) {
-        bboxes[i] = *bbox;
-        pose_results.push_back(*bbox);
-      } else {
-        is_valid_bboxes[i] = false;
-        //      MMDEPLOY_INFO("frame {}: invalid key-points {}", frame_id, scores[i]);
-      }
-    }
-
-    vector<int> is_present(is_valid_bboxes.size());
-    for (int i = 0; i < track_ids.size(); ++i) {
-      for (const auto& t : tracks) {
-        if (t.track_id == track_ids[i]) {
-          is_present[i] = !t.n_missing;
-          break;
-        }
-      }
-    }
-    is_valid_bboxes =
-        SuppressOverlappingBboxes(keypoints, scores, is_present, bboxes, is_valid_bboxes,
-                                  params.sigmas, params.track_nms_oks_thr);
-    assert(is_valid_bboxes.size() == bboxes.size());
-
-    const auto n_rows = static_cast<int>(bboxes.size());
-    const auto n_cols = static_cast<int>(tracks.size());
+  // create pose tracker pipeline
+  PoseTracker tracker(Model(args.det_model), Model(args.pose_model), Context{Device{args.device}});
 
-    // generate similarity matrix
-    vector<float> iou(n_rows * n_cols);
-    vector<float> oks(n_rows * n_cols);
-    for (size_t i = 0; i < n_rows; ++i) {
-      const auto& bbox = bboxes[i];
-      const auto& kpts = keypoints[i];
-      for (size_t j = 0; j < n_cols; ++j) {
-        const auto& track = tracks[j];
-        if (track_ids[i] != -1 && track_ids[i] != track.track_id) {
-          continue;
-        }
-        const auto index = i * n_cols + j;
-        iou[index] = IntersectionOverUnion(bbox, track.bbox_pred);
-        oks[index] = ObjectKeypointSimilarity(kpts, bbox, track.kpts_pred, track.bbox_pred);
-      }
-    }
+  // set parameters
+  PoseTracker::Params params;
+  params->det_min_bbox_size = 100;
+  params->det_interval = 1;
+  params->pose_max_num_bboxes = 6;
 
-    vector<int> is_valid_tracks(n_cols, 1);
-    // disable missing tracks in the #1 assignment
-    for (int i = 0; i < tracks.size(); ++i) {
-      if (tracks[i].n_missing) {
-        is_valid_tracks[i] = 0;
-      }
-    }
-    const auto oks_assignment =
-        GreedyAssignment(oks, is_valid_bboxes, is_valid_tracks, params.track_oks_thr);
+  // optionally use OKS for keypoints similarity comparison
+  std::array<float, 17> sigmas{0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072,
+                               0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089};
+  params->keypoint_sigmas = sigmas.data();
+  params->keypoint_sigmas_size = sigmas.size();
 
-    // enable missing tracks in the #2 assignment
-    for (int i = 0; i < tracks.size(); ++i) {
-      if (tracks[i].n_missing) {
-        is_valid_tracks[i] = 1;
-      }
-    }
-    const auto iou_assignment =
-        GreedyAssignment(iou, is_valid_bboxes, is_valid_tracks, params.track_iou_thr);
+  // create a tracker state for each video
+  PoseTracker::State state = tracker.CreateState(params);
 
-    POSE_TRACKER_DEBUG("frame {}, oks assignment {}", frame_id, oks_assignment);
-    POSE_TRACKER_DEBUG("frame {}, iou assignment {}", frame_id, iou_assignment);
-
-    auto assignment = oks_assignment;
-    assignment.insert(assignment.end(), iou_assignment.begin(), iou_assignment.end());
-
-    // update assigned tracks
-    for (auto [i, j, _] : assignment) {
-      auto& track = tracks[j];
-      if (track.n_missing) {
-        // re-initialize filter for recovering tracks
-        track.filter = CreateFilter(bboxes[i], keypoints[i]);
-        UpdateTrack(track, keypoints[i], scores[i], bboxes[i], false);
-        POSE_TRACKER_DEBUG("frame {}, track recovered {}", frame_id, track.track_id);
-      } else {
-        auto [bbox, kpts] = track.filter->Correct(bboxes[i], keypoints[i]);
-        UpdateTrack(track, std::move(kpts), std::move(scores[i]), bbox, false);
-      }
-      new_tracks.push_back(std::move(track));
-    }
-
-    // generating new tracks
-    for (size_t i = 0; i < is_valid_bboxes.size(); ++i) {
-      // only newly detected bboxes are allowed to form new tracks
-      if (is_valid_bboxes[i] && track_ids[i] == -1) {
-        auto& track = new_tracks.emplace_back();
-        track.track_id = track_info.next_id++;
-        track.filter = CreateFilter(bboxes[i], keypoints[i]);
-        UpdateTrack(track, std::move(keypoints[i]), std::move(scores[i]), bboxes[i], false);
-        is_valid_bboxes[i] = 0;
-        POSE_TRACKER_DEBUG("frame {}, new track {}", frame_id, track.track_id);
-      }
-    }
-
-    if (1) {
-      // diagnostic for missing tracks
-      int n_missing = 0;
-      for (int i = 0; i < is_valid_tracks.size(); ++i) {
-        if (is_valid_tracks[i]) {
-          float best_oks = 0.f;
-          float best_iou = 0.f;
-          for (int j = 0; j < is_valid_bboxes.size(); ++j) {
-            if (is_valid_bboxes[j]) {
-              best_oks = std::max(oks[j * n_cols + i], best_oks);
-              best_iou = std::max(iou[j * n_cols + i], best_iou);
-            }
-          }
-          POSE_TRACKER_DEBUG("frame {}: track missing {}, best_oks={}, best_iou={}", frame_id,
-                             tracks[i].track_id, best_oks, best_iou);
-          ++n_missing;
-        }
-      }
-      if (n_missing) {
-        {
-          std::stringstream ss;
-          ss << cv::Mat_<float>(n_rows, n_cols, oks.data());
-          POSE_TRACKER_DEBUG("frame {}, oks: \n{}", frame_id, ss.str());
-        }
-        {
-          std::stringstream ss;
-          ss << cv::Mat_<float>(n_rows, n_cols, iou.data());
-          POSE_TRACKER_DEBUG("frame {}, iou: \n{}", frame_id, ss.str());
-        }
-      }
-    }
-
-    for (int i = 0; i < is_valid_tracks.size(); ++i) {
-      if (is_valid_tracks[i]) {
-        if (auto& track = tracks[i]; track.n_missing < params.track_max_missing) {
-          // use predicted state to update missing tracks
-          auto [bbox, kpts] = track.filter->Correct(track.bbox_pred, track.kpts_pred);
-          vector<float> score(track.kpts_pred.size());
-          POSE_TRACKER_DEBUG("frame {}, track {}, bbox width {}", frame_id, track.track_id,
-                             bbox[2] - bbox[0]);
-          UpdateTrack(track, std::move(kpts), std::move(score), bbox, true);
-          new_tracks.push_back(std::move(track));
-        } else {
-          POSE_TRACKER_DEBUG("frame {}, track lost {}", frame_id, track.track_id);
-        }
-        is_valid_tracks[i] = false;
-      }
-    }
-
-    tracks = std::move(new_tracks);
-    for (auto& t : tracks) {
-      if (t.n_missing == 0) {
-        std::tie(t.bbox_pred, t.kpts_pred) = t.filter->Predict();
-      } else {
-        auto [bbox, kpts] = t.filter->Predict();
-        const auto alpha = params.track_missing_momentum;
-        cv::Mat tmp_bbox = alpha * cv::Mat(bbox, false) + (1 - alpha) * cv::Mat(t.bbox_pred, false);
-        tmp_bbox.copyTo(cv::Mat(t.bbox_pred, false));
-      }
-    }
-
-    if (0) {
-      vector<std::tuple<int64_t, int>> summary;
-      for (const auto& track : tracks) {
-        summary.emplace_back(track.track_id, track.n_missing);
-      }
-      POSE_TRACKER_DEBUG("frame {}, track summary {}", frame_id, summary);
-      for (const auto& track : tracks) {
-        if (!track.n_missing) {
-          POSE_TRACKER_DEBUG("frame {}, track {}, scores {}", frame_id, track.track_id,
-                             track.scores.back());
-        }
-      }
-    }
+  cv::VideoCapture video;
+  if (args.video.size() == 1 && std::isdigit(args.video[0])) {
+    video.open(std::stoi(args.video));  // open by camera index
+  } else {
+    video.open(args.video);  // open video file
   }
-
-  std::shared_ptr<TrackerFilter> CreateFilter(const Bbox& bbox, const Points& kpts) const {
-    return std::make_shared<TrackerFilter>(
-        params.filter_c_beta, params.filter_c_fc_min, params.filter_c_fc_d, params.filter_k_beta,
-        params.filter_k_fc_min, params.filter_k_fc_d, bbox, kpts);
+  if (!video.isOpened()) {
+    std::cerr << "failed to open video: " << args.video << "\n";
   }
 
-  struct Target {
-    Bbox bbox;
-    vector<float> keypoints;
-    Scores scores;
-    MMDEPLOY_ARCHIVE_MEMBERS(bbox, keypoints, scores);
-  };
-
-  vector<Target> TrackPose(vector<Points> keypoints, vector<Scores> scores,
-                           const vector<int64_t>& track_ids) {
-    TrackStep(keypoints, scores, track_ids);
-    vector<Target> targets;
-    for (const auto& track : track_info.tracks) {
-      if (track.n_missing) {
-        continue;
-      }
-      if (auto bbox = keypoints_to_bbox(track.keypoints.back(), track.scores.back(), frame_h,
-                                        frame_w, params.track_bbox_scale, params.track_kpts_thr,
-                                        params.pose_min_keypoints)) {
-        vector<float> kpts;
-        kpts.reserve(track.keypoints.back().size());
-        for (const auto& kpt : track.keypoints.back()) {
-          kpts.emplace_back(kpt.x);
-          kpts.emplace_back(kpt.y);
-        }
-        targets.push_back(Target{*bbox, std::move(kpts), track.scores.back()});
-      }
-    }
-    return targets;
-  }
-
-  float frame_h = 0;
-  float frame_w = 0;
-  TrackInfo track_info;
-
-  TrackerParams params;
-
+  cv::Mat frame;
   int frame_id = 0;
-
-  vector<Bbox> pose_bboxes;
-  vector<Bbox> pose_results;
-};
-
-MMDEPLOY_REGISTER_TYPE_ID(Tracker, 0xcfe87980aa895d3a);
-
-std::tuple<Value, Value> ProcessBboxes(const Value& det_val, const Value& data, Value state) {
-  auto& tracker = state.get_ref<Tracker&>();
-
-  std::optional<Tracker::Detections> dets;
-
-  if (det_val.is_array()) {  // has detections
-    auto& [bboxes, scores, labels] = dets.emplace();
-    for (const auto& det : det_val.array()) {
-      bboxes.push_back(from_value<Bbox>(det["bbox"]));
-      scores.push_back(det["score"].get<float>());
-      labels.push_back(det["label_id"].get<int>());
+  while (true) {
+    video >> frame;
+    if (!frame.data) {
+      break;
     }
-  }
-
-  auto [bboxes, ids] = tracker.ProcessBboxes(dets);
-
-  Value::Array bbox_array;
-  Value track_ids_array;
-  // attach bboxes to image data
-  for (auto& bbox : bboxes) {
-    cv::Rect rect(cv::Rect2f(cv::Point2f(bbox[0], bbox[1]), cv::Point2f(bbox[2], bbox[3])));
-    bbox_array.push_back({
-        {"img", data["img"]},                                 // img
-        {"bbox", {rect.x, rect.y, rect.width, rect.height}},  // bbox
-        {"rotation", 0.f}                                     // rotation
-    });
-  }
-
-  track_ids_array = to_value(ids);
-  return {std::move(bbox_array), std::move(track_ids_array)};
-}
-
-REGISTER_SIMPLE_MODULE(ProcessBboxes, ProcessBboxes);
-
-Value TrackPose(const Value& poses, const Value& track_indices, Value state) {
-  assert(poses.is_array());
-  vector<Points> keypoints;
-  vector<Scores> scores;
-  for (auto& output : poses.array()) {
-    auto& k = keypoints.emplace_back();
-    auto& s = scores.emplace_back();
-    float avg = 0.f;
-    for (auto& kpt : output["key_points"].array()) {
-      k.emplace_back(kpt["bbox"][0].get<float>(), kpt["bbox"][1].get<float>());
-      s.push_back(kpt["score"].get<float>());
-      avg += s.back();
+    // apply the pipeline with the tracker state and video frame
+    auto result = tracker.Apply(state, frame);
+    // visualize the results
+    if (!Visualize(frame, result, 1280, args.output_dir, frame_id++, false)) {
+      break;
     }
   }
-  vector<int64_t> track_ids;
-  from_value(track_indices, track_ids);
-  auto& tracker = state.get_ref<Tracker&>();
-  auto targets = tracker.TrackPose(std::move(keypoints), std::move(scores), track_ids);
-  return to_value(targets);
-}
-
-REGISTER_SIMPLE_MODULE(TrackPose, TrackPose);
-
-class PoseTracker {
- public:
-  using State = Value;
-
- public:
-  PoseTracker(const Model& det_model, const Model& pose_model, Context context)
-      : pipeline_([&] {
-          context.Add("detection", det_model);
-          context.Add("pose", pose_model);
-          auto config = from_json<Value>(config_json);
-          return Pipeline{config, context};
-        }()) {}
-
-  State CreateState(const TrackerParams& params) {
-    auto state = make_pointer(Tracker{params});
-    auto& tracker = state.get_ref<Tracker&>();
-    return state;
-  }
-
-  Value Track(const Mat& img, State& state, int use_detector = -1) {
-    assert(state.is_pointer());
-    framework::Mat mat(img.desc().height, img.desc().width,
-                       static_cast<PixelFormat>(img.desc().format),
-                       static_cast<DataType>(img.desc().type), {img.desc().data, [](void*) {}});
-
-    auto& tracker = state.get_ref<Tracker&>();
-
-    if (use_detector < 0) {
-      if (tracker.frame_id % tracker.params.det_interval == 0) {
-        use_detector = 1;
-        POSE_TRACKER_DEBUG("frame {}, use detector", tracker.frame_id);
-      } else {
-        use_detector = 0;
-      }
-    }
-
-    if (tracker.frame_id == 0) {
-      tracker.frame_h = static_cast<float>(mat.height());
-      tracker.frame_w = static_cast<float>(mat.width());
-    }
-
-    Value::Object data{{"ori_img", mat}};
-    Value input{{data}, {use_detector}, {state}};
-    auto ret = pipeline_.Apply(input)[0][0];
 
-    ++tracker.frame_id;
-
-    return ret;
-  }
+  return 0;
+}
 
- private:
-  Pipeline pipeline_;
+struct Skeleton {
+  vector<std::pair<int, int>> skeleton;
+  vector<cv::Scalar> palette;
+  vector<int> link_color;
+  vector<int> point_color;
 };
 
-}  // namespace mmdeploy
-
-using namespace mmdeploy;
-
-const cv::Scalar& gPalette(int index) {
-  static vector<cv::Scalar> inst{
-      {255, 128, 0},   {255, 153, 51},  {255, 178, 102}, {230, 230, 0},   {255, 153, 255},
-      {153, 204, 255}, {255, 102, 255}, {255, 51, 255},  {102, 178, 255}, {51, 153, 255},
-      {255, 153, 153}, {255, 102, 102}, {255, 51, 51},   {153, 255, 153}, {102, 255, 102},
-      {51, 255, 51},   {0, 255, 0},     {0, 0, 255},     {255, 0, 0},     {255, 255, 255}};
-  return inst[index];
+const Skeleton& gCocoSkeleton() {
+  static const Skeleton inst{
+      {
+          {15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, {6, 12},
+          {5, 6},   {5, 7},   {6, 8},   {7, 9},   {8, 10},  {1, 2},  {0, 1},
+          {0, 2},   {1, 3},   {2, 4},   {3, 5},   {4, 6},
+      },
+      {
+          {255, 128, 0},   {255, 153, 51},  {255, 178, 102}, {230, 230, 0},   {255, 153, 255},
+          {153, 204, 255}, {255, 102, 255}, {255, 51, 255},  {102, 178, 255}, {51, 153, 255},
+          {255, 153, 153}, {255, 102, 102}, {255, 51, 51},   {153, 255, 153}, {102, 255, 102},
+          {51, 255, 51},   {0, 255, 0},     {0, 0, 255},     {255, 0, 0},     {255, 255, 255},
+      },
+      {0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16},
+      {16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0},
+  };
+  return inst;
 }
 
-void Visualize(cv::Mat& frame, const Value& result, const Bboxes& pose_bboxes,
-               const Bboxes& pose_results, int size) {
-  static vector<std::pair<int, int>> skeleton{
-      {15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, {6, 12}, {5, 6}, {5, 7}, {6, 8},
-      {7, 9},   {8, 10},  {1, 2},   {0, 1},   {0, 2},   {1, 3},  {2, 4},  {3, 5}, {4, 6}};
-  static vector link_color{0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16};
-  static vector kpt_color{16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0};
+bool Visualize(cv::Mat frame, const PoseTracker::Result& result, int size,
+               const std::string& output_dir, int frame_id, bool with_bbox) {
+  auto& [skeleton, palette, link_color, point_color] = gCocoSkeleton();
   auto scale = (float)size / (float)std::max(frame.cols, frame.rows);
   if (scale != 1) {
     cv::resize(frame, frame, {}, scale, scale);
+  } else {
+    frame = frame.clone();
   }
-  auto draw_bbox = [](cv::Mat& image, Bbox bbox, const cv::Scalar& color, float scale = 1) {
+  auto draw_bbox = [&](std::array<float, 4> bbox, const cv::Scalar& color) {
     std::for_each(bbox.begin(), bbox.end(), [&](auto& x) { x *= scale; });
-    cv::Point p1(bbox[0], bbox[1]);
-    cv::Point p2(bbox[2], bbox[3]);
-    cv::rectangle(image, p1, p2, color);
+    cv::rectangle(frame, cv::Point2f(bbox[0], bbox[1]), cv::Point2f(bbox[2], bbox[3]), color);
   };
-  const auto& targets = result.array();
-  for (const auto& target : targets) {
-    auto bbox = from_value<std::array<float, 4>>(target["bbox"]);
-    auto kpts = from_value<vector<float>>(target["keypoints"]);
-    std::for_each(bbox.begin(), bbox.end(), [&](auto& x) { x *= scale; });
+  for (const auto& r : result) {
+    vector<float> kpts(&r.keypoints[0].x, &r.keypoints[0].x + r.keypoint_count * 2);
+    vector<float> scores(r.scores, r.scores + r.keypoint_count);
     std::for_each(kpts.begin(), kpts.end(), [&](auto& x) { x *= scale; });
-    auto scores = from_value<vector<float>>(target["scores"]);
-    if (0) {
-      draw_bbox(frame, bbox, cv::Scalar(0, 255, 0));
-    }
     constexpr auto score_thr = .5f;
     vector<int> used(kpts.size());
-    for (int i = 0; i < skeleton.size(); ++i) {
+    for (size_t i = 0; i < skeleton.size(); ++i) {
       auto [u, v] = skeleton[i];
       if (scores[u] > score_thr && scores[v] > score_thr) {
         used[u] = used[v] = 1;
-        cv::Point p_u(kpts[u * 2], kpts[u * 2 + 1]);
-        cv::Point p_v(kpts[v * 2], kpts[v * 2 + 1]);
-        cv::line(frame, p_u, p_v, gPalette(link_color[i]), 1, cv::LINE_AA);
+        cv::Point2f p_u(kpts[u * 2], kpts[u * 2 + 1]);
+        cv::Point2f p_v(kpts[v * 2], kpts[v * 2 + 1]);
+        cv::line(frame, p_u, p_v, palette[link_color[i]], 1, cv::LINE_AA);
       }
     }
-    for (int i = 0; i < kpts.size(); i += 2) {
+    for (size_t i = 0; i < kpts.size(); i += 2) {
       if (used[i / 2]) {
-        cv::Point p(kpts[i], kpts[i + 1]);
-        cv::circle(frame, p, 1, gPalette(kpt_color[i / 2]), 2, cv::LINE_AA);
+        cv::Point2f p(kpts[i], kpts[i + 1]);
+        cv::circle(frame, p, 1, palette[point_color[i / 2]], 2, cv::LINE_AA);
       }
     }
-  }
-  if (0) {
-    for (auto bbox : pose_bboxes) {
-      draw_bbox(frame, bbox, {0, 255, 255}, scale);
-    }
-    for (auto bbox : pose_results) {
-      draw_bbox(frame, bbox, {0, 255, 0}, scale);
+    if (with_bbox) {
+      draw_bbox((std::array<float, 4>&)r.bbox, cv::Scalar(0, 255, 0));
     }
   }
-  static int frame_id = 0;
-  cv::imwrite(fmt::format("pose_{}.jpg", frame_id++), frame, {cv::IMWRITE_JPEG_QUALITY, 90});
-}
-
-// ffmpeg -f image2 -i pose_%d.jpg -vcodec hevc -crf 30 pose.mp4
-
-int main(int argc, char* argv[]) {
-  const auto device_name = argv[1];
-  const auto det_model_path = argv[2];
-  const auto pose_model_path = argv[3];
-  const auto video_path = argv[4];
-  Device device(device_name);
-  Context context(device);
-  Profiler profiler("pose_tracker.perf");
-  context.Add(profiler);
-  PoseTracker tracker(Model(det_model_path), Model(pose_model_path), context);
-  TrackerParams params;
-  // coco
-  params.sigmas = {0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072,
-                   0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089};
-  params.pose_max_num_bboxes = 5;
-  params.det_interval = 5;
-
-  auto state = tracker.CreateState(params);
-
-  cv::Mat frame;
-  std::chrono::duration<double, std::milli> dt{};
-
-  int frame_id{};
-
-  cv::VideoCapture video(video_path);
-  while (true) {
-    video >> frame;
-    if (!frame.data) {
-      break;
-    }
-    auto t0 = std::chrono::high_resolution_clock::now();
-    auto result = tracker.Track(frame, state);
-    auto t1 = std::chrono::high_resolution_clock::now();
-    dt += t1 - t0;
-    ++frame_id;
-
-    auto& pose_bboxes = state.get_ref<Tracker&>().pose_bboxes;
-    auto& pose_results = state.get_ref<Tracker&>().pose_results;
-
-    Visualize(frame, result, pose_bboxes, pose_results, 1024);
+  if (output_dir.empty()) {
+    cv::imshow("pose_tracker", frame);
+    return cv::waitKey(1) != 'q';
   }
+  auto join = [](const std::string& a, const std::string& b) {
+#if _MSC_VER
+    return a + "\\" + b;
+#else
+    return a + "/" + b;
+#endif
+  };
+  cv::imwrite(join(output_dir, std::to_string(frame_id) + ".jpg"), frame,
+              {cv::IMWRITE_JPEG_QUALITY, 90});
+  return true;
+}
 
-  MMDEPLOY_INFO("frames: {}, time {} ms", frame_id, dt.count());
+Args ParseArgs(int argc, char* argv[]) {
+  if (argc < 5 || argc > 6) {
+    std::cout << R"(Usage: pose_tracker device_name det_model pose_model video [output]
+  device_name  device name for execution, e.g. "cpu", "cuda"
+  det_model    object detection model path
+  pose_model   pose estimation model path
+  video        video path or camera index
+  output       output directory, will cv::imshow if omitted
+)";
+    return {};
+  }
+  Args args;
+  args.device = argv[1];
+  args.det_model = argv[2];
+  args.pose_model = argv[3];
+  args.video = argv[4];
+  if (argc == 6) {
+    args.output_dir = argv[5];
+  }
+  return args;
 }
diff --git a/demo/python/det_pose.py b/demo/python/det_pose.py
new file mode 100644
index 0000000000..3422eee9fa
--- /dev/null
+++ b/demo/python/det_pose.py
@@ -0,0 +1,85 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+
+import cv2
+import numpy as np
+from mmdeploy_python import Detector, PoseDetector
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='show how to use SDK Python API')
+    parser.add_argument('device_name', help='name of device, cuda or cpu')
+    parser.add_argument(
+        'det_model_path',
+        help='path of mmdeploy SDK model dumped by model converter')
+    parser.add_argument(
+        'pose_model_path',
+        help='path of mmdeploy SDK model dumped by model converter')
+    parser.add_argument('image_path', help='path of input image')
+    args = parser.parse_args()
+    return args
+
+
+def visualize(frame, keypoints, filename, thr=0.5, resize=1280):
+    skeleton = [(15, 13), (13, 11), (16, 14), (14, 12), (11, 12), (5, 11),
+                (6, 12), (5, 6), (5, 7), (6, 8), (7, 9), (8, 10), (1, 2),
+                (0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6)]
+    palette = [(255, 128, 0), (255, 153, 51), (255, 178, 102), (230, 230, 0),
+               (255, 153, 255), (153, 204, 255), (255, 102, 255),
+               (255, 51, 255), (102, 178, 255),
+               (51, 153, 255), (255, 153, 153), (255, 102, 102), (255, 51, 51),
+               (153, 255, 153), (102, 255, 102), (51, 255, 51), (0, 255, 0),
+               (0, 0, 255), (255, 0, 0), (255, 255, 255)]
+    link_color = [
+        0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16
+    ]
+    point_color = [16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0]
+
+    scale = resize / max(frame.shape[0], frame.shape[1])
+
+    scores = keypoints[..., 2]
+    keypoints = (keypoints[..., :2] * scale).astype(int)
+
+    img = cv2.resize(frame, (0, 0), fx=scale, fy=scale)
+    for kpts, score in zip(keypoints, scores):
+        show = [0] * len(kpts)
+        for (u, v), color in zip(skeleton, link_color):
+            if score[u] > thr and score[v] > thr:
+                cv2.line(img, kpts[u], tuple(kpts[v]), palette[color], 1,
+                         cv2.LINE_AA)
+                show[u] = show[v] = 1
+        for kpt, show, color in zip(kpts, show, point_color):
+            if show:
+                cv2.circle(img, kpt, 1, palette[color], 2, cv2.LINE_AA)
+    cv2.imwrite(filename, img)
+
+
+def main():
+    args = parse_args()
+
+    # load image
+    img = cv2.imread(args.image_path)
+
+    # create object detector
+    detector = Detector(
+        model_path=args.det_model_path, device_name=args.device_name)
+    # create pose detector
+    pose_detector = PoseDetector(
+        model_path=args.pose_model_path, device_name=args.device_name)
+
+    # apply detector
+    bboxes, labels, _ = detector(img)
+
+    # filter detections
+    keep = np.logical_and(labels == 0, bboxes[..., 4] > 0.6)
+    bboxes = bboxes[keep, :4]
+
+    # apply pose detector
+    poses = pose_detector(img, bboxes)
+
+    visualize(img, poses, 'det_pose_output.jpg', 0.5, 1280)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/demo/python/pose_tracker.py b/demo/python/pose_tracker.py
new file mode 100644
index 0000000000..9b1b8fa3d6
--- /dev/null
+++ b/demo/python/pose_tracker.py
@@ -0,0 +1,98 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+
+import cv2
+from mmdeploy_python import PoseTracker
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='show how to use SDK Python API')
+    parser.add_argument('device_name', help='name of device, cuda or cpu')
+    parser.add_argument(
+        'det_model',
+        help='path of mmdeploy SDK model dumped by model converter')
+    parser.add_argument(
+        'pose_model',
+        help='path of mmdeploy SDK model dumped by model converter')
+    parser.add_argument('video', help='video path or camera index')
+    parser.add_argument('--output_dir', help='output directory', default=None)
+    args = parser.parse_args()
+    if args.video.isnumeric():
+        args.video = int(args.video)
+    return args
+
+
+def visualize(frame, results, output_dir, frame_id, thr=0.5, resize=1280):
+    skeleton = [(15, 13), (13, 11), (16, 14), (14, 12), (11, 12), (5, 11),
+                (6, 12), (5, 6), (5, 7), (6, 8), (7, 9), (8, 10), (1, 2),
+                (0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6)]
+    palette = [(255, 128, 0), (255, 153, 51), (255, 178, 102), (230, 230, 0),
+               (255, 153, 255), (153, 204, 255), (255, 102, 255),
+               (255, 51, 255), (102, 178, 255),
+               (51, 153, 255), (255, 153, 153), (255, 102, 102), (255, 51, 51),
+               (153, 255, 153), (102, 255, 102), (51, 255, 51), (0, 255, 0),
+               (0, 0, 255), (255, 0, 0), (255, 255, 255)]
+    link_color = [
+        0, 0, 0, 0, 7, 7, 7, 9, 9, 9, 9, 9, 16, 16, 16, 16, 16, 16, 16
+    ]
+    point_color = [16, 16, 16, 16, 16, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0]
+    scale = resize / max(frame.shape[0], frame.shape[1])
+    keypoints, bboxes, _ = results
+    scores = keypoints[..., 2]
+    keypoints = (keypoints[..., :2] * scale).astype(int)
+    bboxes *= scale
+    img = cv2.resize(frame, (0, 0), fx=scale, fy=scale)
+    for kpts, score, bbox in zip(keypoints, scores, bboxes):
+        show = [0] * len(kpts)
+        for (u, v), color in zip(skeleton, link_color):
+            if score[u] > thr and score[v] > thr:
+                cv2.line(img, kpts[u], tuple(kpts[v]), palette[color], 1,
+                         cv2.LINE_AA)
+                show[u] = show[v] = 1
+        for kpt, show, color in zip(kpts, show, point_color):
+            if show:
+                cv2.circle(img, kpt, 1, palette[color], 2, cv2.LINE_AA)
+    if output_dir:
+        cv2.imwrite(f'{output_dir}/{str(frame_id).zfill(6)}.jpg', img)
+    else:
+        cv2.imshow('pose_tracker', img)
+        return cv2.waitKey(1) != 'q'
+    return True
+
+
+def main():
+    args = parse_args()
+
+    video = cv2.VideoCapture(args.video)
+
+    tracker = PoseTracker(
+        det_model=args.det_model,
+        pose_model=args.pose_model,
+        device_name=args.device_name)
+
+    # optionally use OKS for keypoints similarity comparison
+    coco_sigmas = [
+        0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+        0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+    ]
+    state = tracker.create_state(
+        det_interval=1, det_min_bbox_size=100, keypoint_sigmas=coco_sigmas)
+
+    if args.output_dir:
+        os.makedirs(args.output_dir, exist_ok=True)
+
+    frame_id = 0
+    while True:
+        success, frame = video.read()
+        if not success:
+            break
+        results = tracker(state, frame, detect=-1)
+        if not visualize(frame, results, args.output_dir, frame_id):
+            break
+        frame_id += 1
+
+
+if __name__ == '__main__':
+    main()