diff --git a/mmpose/apis/inference_3d.py b/mmpose/apis/inference_3d.py index 8725b27caa..d4b9623b86 100644 --- a/mmpose/apis/inference_3d.py +++ b/mmpose/apis/inference_3d.py @@ -317,6 +317,7 @@ def inference_pose_lifter_model(model, K, ), dtype=np.float32) data_info['lifting_target'] = np.zeros((1, K, 3), dtype=np.float32) + data_info['factor'] = np.zeros((T, ), dtype=np.float32) data_info['lifting_target_visible'] = np.ones((1, K, 1), dtype=np.float32) diff --git a/mmpose/codecs/motionbert_label.py b/mmpose/codecs/motionbert_label.py index 1d036c49bb..ce3a9b4f65 100644 --- a/mmpose/codecs/motionbert_label.py +++ b/mmpose/codecs/motionbert_label.py @@ -131,8 +131,11 @@ def encode(self, ..., :2] = keypoint_labels[..., :2] / w * 2 - [1, h / w] # convert target to image coordinate - lifting_target_label, factor_ = camera_to_image_coord( - self.root_index, lifting_target_label, _camera_param) + T = keypoint_labels.shape[0] + factor_ = np.array([4] * T, dtype=np.float32).reshape(T, ) + if 'f' in _camera_param and 'c' in _camera_param: + lifting_target_label, factor_ = camera_to_image_coord( + self.root_index, lifting_target_label, _camera_param) lifting_target_label[..., :, :] = lifting_target_label[ ..., :, :] - lifting_target_label[..., self.root_index:self.root_index + @@ -141,7 +144,7 @@ def encode(self, factor = factor_ if factor.ndim == 1: factor = factor[:, None] - lifting_target_label *= 1000 * factor[..., None] + lifting_target_label *= factor[..., None] if self.concat_vis: keypoints_visible_ = keypoints_visible @@ -206,4 +209,5 @@ def decode( keypoints *= factor[..., None] keypoints[..., :, :] = keypoints[..., :, :] - keypoints[ ..., self.root_index:self.root_index + 1, :] + keypoints /= 1000. return keypoints, scores