Skip to content

Commit

Permalink
Detailed poseExtractor Timings.
Browse files Browse the repository at this point in the history
  • Loading branch information
bushibushi committed Sep 27, 2017
1 parent b54ae11 commit 7808f89
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 13 deletions.
38 changes: 32 additions & 6 deletions src/openpose/pose/poseExtractorCaffe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,22 @@
#include <openpose/utilities/openCv.hpp>
#include <openpose/pose/poseExtractorCaffe.hpp>

typedef std::vector<std::pair<std::string, std::chrono::high_resolution_clock::time_point>> OpTimings;

static OpTimings timings;

static void timeNow(const std::string& label){
const auto now = std::chrono::high_resolution_clock::now();
const auto timing = std::make_pair(label, now);
timings.push_back(timing);
}

static std::string timeDiffToString(const std::chrono::high_resolution_clock::time_point& t1,
const std::chrono::high_resolution_clock::time_point& t2 ) {
return std::to_string((double)std::chrono::duration_cast<std::chrono::duration<double>>(t1 - t2).count() * 1e3) + " ms";
}


namespace op
{
PoseExtractorCaffe::PoseExtractorCaffe(const Point<int>& netInputSize, const Point<int>& netOutputSize, const Point<int>& outputSize, const int scaleNumber,
Expand Down Expand Up @@ -79,10 +95,10 @@ namespace op
// Security checks
if (inputNetData.empty())
error("Empty inputNetData.", __LINE__, __FUNCTION__, __FILE__);

timeNow("Start");
// 1. Caffe deep network
spNet->forwardPass(inputNetData.getConstPtr()); // ~79.3836ms

timeNow("Caffe Forward");
// 2. Resize heat maps + merge different scales
spResizeAndMergeCaffe->setScaleRatios(scaleRatios);
#ifndef CPU_ONLY
Expand All @@ -91,7 +107,7 @@ namespace op
#else
error("ResizeAndMergeCaffe CPU version not implemented yet.", __LINE__, __FUNCTION__, __FILE__);
#endif

timeNow("Resize Heat Maps");
// 3. Get peaks by Non-Maximum Suppression
spNmsCaffe->setThreshold((float)get(PoseProperty::NMSThreshold));
#ifndef CPU_ONLY
Expand All @@ -100,22 +116,32 @@ namespace op
#else
error("NmsCaffe CPU version not implemented yet.", __LINE__, __FUNCTION__, __FILE__);
#endif

timeNow("Peaks by nms");
// Get scale net to output
const auto scaleProducerToNetInput = resizeGetScaleFactor(inputDataSize, mNetOutputSize);
const Point<int> netSize{intRound(scaleProducerToNetInput*inputDataSize.x), intRound(scaleProducerToNetInput*inputDataSize.y)};
mScaleNetToOutput = {(float)resizeGetScaleFactor(netSize, mOutputSize)};

timeNow("Scale net to output");
// 4. Connecting body parts
spBodyPartConnectorCaffe->setScaleNetToOutput(mScaleNetToOutput);
spBodyPartConnectorCaffe->setInterMinAboveThreshold((int)get(PoseProperty::ConnectInterMinAboveThreshold));
spBodyPartConnectorCaffe->setInterThreshold((float)get(PoseProperty::ConnectInterThreshold));
spBodyPartConnectorCaffe->setMinSubsetCnt((int)get(PoseProperty::ConnectMinSubsetCnt));
spBodyPartConnectorCaffe->setMinSubsetScore((float)get(PoseProperty::ConnectMinSubsetScore));

// GPU version not implemented yet
spBodyPartConnectorCaffe->Forward_cpu({spHeatMapsBlob.get(), spPeaksBlob.get()}, mPoseKeypoints);
// spBodyPartConnectorCaffe->Forward_gpu({spHeatMapsBlob.get(), spPeaksBlob.get()}, {spPoseBlob.get()}, mPoseKeypoints);
timeNow("Connect Body Parts");

const auto totalTimeSec = timeDiffToString(timings.back().second, timings.front().second);
const auto message = "Pose estimation successfully finished. Total time: " + totalTimeSec + " seconds.";
op::log(message, op::Priority::High);

for(OpTimings::iterator timing = timings.begin()+1; timing != timings.end(); ++timing) {
const auto log_time = (*timing).first + " - " + timeDiffToString((*timing).second, (*(timing-1)).second);
op::log(log_time, op::Priority::High);
}

}
catch (const std::exception& e)
{
Expand Down
41 changes: 34 additions & 7 deletions src/openpose/pose/poseExtractorTensorRT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,21 @@
#include <openpose/utilities/openCv.hpp>
#include <openpose/pose/poseExtractorTensorRT.hpp>

typedef std::vector<std::pair<std::string, std::chrono::high_resolution_clock::time_point>> OpTimings;

static OpTimings timings;

static void timeNow(const std::string& label){
const auto now = std::chrono::high_resolution_clock::now();
const auto timing = std::make_pair(label, now);
timings.push_back(timing);
}

static std::string timeDiffToString(const std::chrono::high_resolution_clock::time_point& t1,
const std::chrono::high_resolution_clock::time_point& t2 ) {
return std::to_string((double)std::chrono::duration_cast<std::chrono::duration<double>>(t1 - t2).count() * 1e3) + " ms";
}


namespace op
{
Expand Down Expand Up @@ -82,19 +97,22 @@ namespace op
// Security checks
if (inputNetData.empty())
error("Empty inputNetData.", __LINE__, __FUNCTION__, __FILE__);

timeNow("Start");
// 1. TensorRT deep network
spNet->forwardPass(inputNetData.getConstPtr());

timeNow("TensorRT forward");
// 2. Resize heat maps + merge different scales
spResizeAndMergeTensorRT->setScaleRatios(scaleRatios);
timeNow("SpResizeAndMergeTensorRT");
#ifndef CPU_ONLY
spResizeAndMergeTensorRT->Forward_gpu({spTensorRTNetOutputBlob.get()}, {spHeatMapsBlob.get()}); // ~5ms
spResizeAndMergeTensorRT->Forward_cpu({spTensorRTNetOutputBlob.get()}, {spHeatMapsBlob.get()}); // ~5ms
timeNow("RaM forward_gpu");
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
timeNow("CudaCheck");
#else
error("ResizeAndMergeTensorRT CPU version not implemented yet.", __LINE__, __FUNCTION__, __FILE__);
#endif

timeNow("Resize heat Maps");
// 3. Get peaks by Non-Maximum Suppression
spNmsTensorRT->setThreshold((float)get(PoseProperty::NMSThreshold));
#ifndef CPU_ONLY
Expand All @@ -103,22 +121,31 @@ namespace op
#else
error("NmsTensorRT CPU version not implemented yet.", __LINE__, __FUNCTION__, __FILE__);
#endif

timeNow("Peaks by nms");
// Get scale net to output
const auto scaleProducerToNetInput = resizeGetScaleFactor(inputDataSize, mNetOutputSize);
const Point<int> netSize{intRound(scaleProducerToNetInput*inputDataSize.x), intRound(scaleProducerToNetInput*inputDataSize.y)};
mScaleNetToOutput = {(float)resizeGetScaleFactor(netSize, mOutputSize)};

timeNow("Scale net to output");
// 4. Connecting body parts
spBodyPartConnectorTensorRT->setScaleNetToOutput(mScaleNetToOutput);
spBodyPartConnectorTensorRT->setInterMinAboveThreshold((int)get(PoseProperty::ConnectInterMinAboveThreshold));
spBodyPartConnectorTensorRT->setInterThreshold((float)get(PoseProperty::ConnectInterThreshold));
spBodyPartConnectorTensorRT->setMinSubsetCnt((int)get(PoseProperty::ConnectMinSubsetCnt));
spBodyPartConnectorTensorRT->setMinSubsetScore((float)get(PoseProperty::ConnectMinSubsetScore));

// GPU version not implemented yet
spBodyPartConnectorTensorRT->Forward_cpu({spHeatMapsBlob.get(), spPeaksBlob.get()}, mPoseKeypoints);
// spBodyPartConnectorTensorRT->Forward_gpu({spHeatMapsBlob.get(), spPeaksBlob.get()}, {spPoseBlob.get()}, mPoseKeypoints);
timeNow("Connect Body Parts");

const auto totalTimeSec = timeDiffToString(timings.back().second, timings.front().second);
const auto message = "Pose estimation successfully finished. Total time: " + totalTimeSec + " seconds.";
op::log(message, op::Priority::High);

for(OpTimings::iterator timing = timings.begin()+1; timing != timings.end(); ++timing) {
const auto log_time = (*timing).first + " - " + timeDiffToString((*timing).second, (*(timing-1)).second);
op::log(log_time, op::Priority::High);
}
}
catch (const std::exception& e)
{
Expand Down

0 comments on commit 7808f89

Please sign in to comment.