-
Notifications
You must be signed in to change notification settings - Fork 648
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add sdk profiler docs * fix typos
- Loading branch information
Showing
4 changed files
with
222 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ Setup & Usage | |
:maxdepth: 1 | ||
|
||
quick_start | ||
profiler | ||
|
||
|
||
API Reference | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# profiler | ||
|
||
The SDK has ability to record the time consumption of each module in the pipeline. It's closed by default. To use this ability, two steps are required: | ||
|
||
- Generate profiler data | ||
- Analyze profiler Data | ||
|
||
## Generate profiler data | ||
|
||
Using the C interface and classification pipeline as an example, when creating the pipeline, the create api with context information needs to be used, and profiler handle needs to be added to the context. The detailed code is shown below. Running the demo normally will generate profiler data "profiler_data.txt" in the current directory. | ||
|
||
```c++ | ||
#include <fstream> | ||
#include <opencv2/imgcodecs/imgcodecs.hpp> | ||
#include <string> | ||
|
||
#include "mmdeploy/classifier.h" | ||
|
||
int main(int argc, char* argv[]) { | ||
if (argc != 4) { | ||
fprintf(stderr, "usage:\n image_classification device_name dump_model_directory image_path\n"); | ||
return 1; | ||
} | ||
auto device_name = argv[1]; | ||
auto model_path = argv[2]; | ||
auto image_path = argv[3]; | ||
cv::Mat img = cv::imread(image_path); | ||
if (!img.data) { | ||
fprintf(stderr, "failed to load image: %s\n", image_path); | ||
return 1; | ||
} | ||
|
||
mmdeploy_model_t model{}; | ||
mmdeploy_model_create_by_path(model_path, &model); | ||
|
||
// create profiler and add it to context | ||
// profiler data will save to profiler_data.txt | ||
mmdeploy_profiler_t profiler{}; | ||
mmdeploy_profiler_create("profiler_data.txt", &profiler); | ||
|
||
mmdeploy_context_t context{}; | ||
mmdeploy_context_create_by_device(device_name, 0, &context); | ||
mmdeploy_context_add(context, MMDEPLOY_TYPE_PROFILER, nullptr, profiler); | ||
|
||
mmdeploy_classifier_t classifier{}; | ||
int status{}; | ||
status = mmdeploy_classifier_create_v2(model, context, &classifier); | ||
if (status != MMDEPLOY_SUCCESS) { | ||
fprintf(stderr, "failed to create classifier, code: %d\n", (int)status); | ||
return 1; | ||
} | ||
|
||
mmdeploy_mat_t mat{ | ||
img.data, img.rows, img.cols, 3, MMDEPLOY_PIXEL_FORMAT_BGR, MMDEPLOY_DATA_TYPE_UINT8}; | ||
|
||
// inference loop | ||
for (int i = 0; i < 100; i++) { | ||
mmdeploy_classification_t* res{}; | ||
int* res_count{}; | ||
status = mmdeploy_classifier_apply(classifier, &mat, 1, &res, &res_count); | ||
|
||
mmdeploy_classifier_release_result(res, res_count, 1); | ||
} | ||
|
||
mmdeploy_classifier_destroy(classifier); | ||
|
||
mmdeploy_model_destroy(model); | ||
mmdeploy_profiler_destroy(profiler); | ||
mmdeploy_context_destroy(context); | ||
|
||
return 0; | ||
} | ||
|
||
``` | ||
## Analyze profiler Data | ||
The performance data can be visualized using a script. | ||
```bash | ||
python tools/sdk_analyze.py profiler_data.txt | ||
``` | ||
|
||
The parsing results are as follows: "name" represents the name of the node, "n_call" represents the number of calls, "t_mean" represents the average time consumption, "t_50%" and "t_90%" represent the percentiles of the time consumption. | ||
|
||
```bash | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| name | occupy | usage | n_call | t_mean | t_50% | t_90% | | ||
+===========================+========+=======+========+========+=======+=======+ | ||
| ./Pipeline | - | - | 100 | 4.831 | 1.913 | 1.946 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| Preprocess/Compose | - | - | 100 | 0.125 | 0.118 | 0.144 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| LoadImageFromFile | 0.017 | 0.017 | 100 | 0.081 | 0.077 | 0.098 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| Resize | 0.003 | 0.003 | 100 | 0.012 | 0.012 | 0.013 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| CenterCrop | 0.002 | 0.002 | 100 | 0.008 | 0.008 | 0.008 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| Normalize | 0.002 | 0.002 | 100 | 0.009 | 0.009 | 0.009 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| ImageToTensor | 0.002 | 0.002 | 100 | 0.008 | 0.007 | 0.007 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| Collect | 0.001 | 0.001 | 100 | 0.005 | 0.005 | 0.005 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| resnet | 0.968 | 0.968 | 100 | 4.678 | 1.767 | 1.774 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| postprocess | 0.003 | 0.003 | 100 | 0.015 | 0.015 | 0.017 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ SDK 使用说明 | |
:maxdepth: 1 | ||
|
||
quick_start | ||
profiler | ||
|
||
|
||
API Reference | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# Pipeline 速度分析 (profiler) | ||
|
||
sdk 提供 pipeline 各模块耗时统计功能,默认关闭,若要使用该功能,需要两个步骤: | ||
|
||
- 生成性能数据 | ||
- 分析性能数据 | ||
|
||
## 生成性能数据 | ||
|
||
以 C 接口,分类 pipeline 为例。在创建 pipeline 时需要使用带有 context 信息的接口,并在 context 中加入 profiler 信息。 详细代码如下。 正常运行 demo 会在当前目录生成 profiler 数据 `profiler_data.txt`。 | ||
|
||
```c++ | ||
#include <fstream> | ||
#include <opencv2/imgcodecs/imgcodecs.hpp> | ||
#include <string> | ||
|
||
#include "mmdeploy/classifier.h" | ||
|
||
int main(int argc, char* argv[]) { | ||
if (argc != 4) { | ||
fprintf(stderr, "usage:\n image_classification device_name dump_model_directory image_path\n"); | ||
return 1; | ||
} | ||
auto device_name = argv[1]; | ||
auto model_path = argv[2]; | ||
auto image_path = argv[3]; | ||
cv::Mat img = cv::imread(image_path); | ||
if (!img.data) { | ||
fprintf(stderr, "failed to load image: %s\n", image_path); | ||
return 1; | ||
} | ||
|
||
mmdeploy_model_t model{}; | ||
mmdeploy_model_create_by_path(model_path, &model); | ||
|
||
// create profiler and add it to context | ||
// profiler data will save to profiler_data.txt | ||
mmdeploy_profiler_t profiler{}; | ||
mmdeploy_profiler_create("profiler_data.txt", &profiler); | ||
|
||
mmdeploy_context_t context{}; | ||
mmdeploy_context_create_by_device(device_name, 0, &context); | ||
mmdeploy_context_add(context, MMDEPLOY_TYPE_PROFILER, nullptr, profiler); | ||
|
||
mmdeploy_classifier_t classifier{}; | ||
int status{}; | ||
status = mmdeploy_classifier_create_v2(model, context, &classifier); | ||
if (status != MMDEPLOY_SUCCESS) { | ||
fprintf(stderr, "failed to create classifier, code: %d\n", (int)status); | ||
return 1; | ||
} | ||
|
||
mmdeploy_mat_t mat{ | ||
img.data, img.rows, img.cols, 3, MMDEPLOY_PIXEL_FORMAT_BGR, MMDEPLOY_DATA_TYPE_UINT8}; | ||
|
||
// inference loop | ||
for (int i = 0; i < 100; i++) { | ||
mmdeploy_classification_t* res{}; | ||
int* res_count{}; | ||
status = mmdeploy_classifier_apply(classifier, &mat, 1, &res, &res_count); | ||
|
||
mmdeploy_classifier_release_result(res, res_count, 1); | ||
} | ||
|
||
mmdeploy_classifier_destroy(classifier); | ||
|
||
mmdeploy_model_destroy(model); | ||
mmdeploy_profiler_destroy(profiler); | ||
mmdeploy_context_destroy(context); | ||
|
||
return 0; | ||
} | ||
|
||
``` | ||
## 分析性能数据 | ||
使用脚本可对性能数据进行解析。 | ||
```bash | ||
python tools/sdk_analyze.py profiler_data.txt | ||
``` | ||
|
||
解析结果如下,其中 name 表示节点的名称,n_call表示调用的次数,t_mean 表示平均耗时,t_50% t_90% 表示耗时的百分位数。 | ||
|
||
```bash | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| name | occupy | usage | n_call | t_mean | t_50% | t_90% | | ||
+===========================+========+=======+========+========+=======+=======+ | ||
| ./Pipeline | - | - | 100 | 4.831 | 1.913 | 1.946 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| Preprocess/Compose | - | - | 100 | 0.125 | 0.118 | 0.144 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| LoadImageFromFile | 0.017 | 0.017 | 100 | 0.081 | 0.077 | 0.098 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| Resize | 0.003 | 0.003 | 100 | 0.012 | 0.012 | 0.013 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| CenterCrop | 0.002 | 0.002 | 100 | 0.008 | 0.008 | 0.008 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| Normalize | 0.002 | 0.002 | 100 | 0.009 | 0.009 | 0.009 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| ImageToTensor | 0.002 | 0.002 | 100 | 0.008 | 0.007 | 0.007 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| Collect | 0.001 | 0.001 | 100 | 0.005 | 0.005 | 0.005 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| resnet | 0.968 | 0.968 | 100 | 4.678 | 1.767 | 1.774 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
| postprocess | 0.003 | 0.003 | 100 | 0.015 | 0.015 | 0.017 | | ||
+---------------------------+--------+-------+--------+--------+-------+-------+ | ||
``` |