Add table transformer example (#1043)

Signed-off-by: Mengni Wang <[email protected]>
intel · Sep 11, 2023 · eb8a956 · eb8a956
1 parent 3c3673d
commit eb8a956
Show file tree

Hide file tree

Showing 10 changed files with 537 additions and 0 deletions.
diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt
@@ -204,6 +204,7 @@ DCMAKE
 DDP
 DDR
 DENABLE
+DETR
 DFS
 DFabiansResUNet
 DGAN
@@ -729,6 +730,7 @@ Protobuf
 PrunePolicy
 Pruning's
 PruningConf
+PubTables
 PyObject
 PyPI
 PyPi
@@ -1242,6 +1244,7 @@ brgemm
 brighly
 broadcasted
 bs
+bsmock
 bsnone
 bugfix
 buildin
@@ -1438,6 +1441,7 @@ dest
 destructor
 detections
 detectron
+detr
 dev
 devcloud
 devel
@@ -2244,6 +2248,7 @@ pth
 ptq
 ptr
 publis
+pubtables
 pudae
 pw
 pwd
@@ -2491,6 +2496,7 @@ synset
 sys
 tLoss
 tanh
+tatr
 tb
 tbe
 tbody

diff --git a/examples/.config/model_params_onnxrt.json b/examples/.config/model_params_onnxrt.json
@@ -868,6 +868,13 @@
       "main_script": "main.py",
       "batch_size": 1
     },
+    "table_transformer": {
+      "model_src_dir": "object_detection/table_transformer/quantization/ptq_static",
+      "dataset_location": "/tf_dataset/dataset/PubTables-1M-Structure",
+      "input_model": "/tf_dataset2/models/onnx/table-transformer/model.onnx",
+      "main_script": "table-transformer/src/main.py",
+      "batch_size": 1
+    },
     "hf_codebert": {
         "model_src_dir": "nlp/huggingface_model/code_detection/quantization/ptq_static",
         "dataset_location": "/tf_dataset2/datasets/devign_dataset/valid.jsonl",

diff --git a/examples/README.md b/examples/README.md
@@ -1395,6 +1395,12 @@ Intel® Neural Compressor validated examples with multiple compression technique
     <td>Post-Training Static Quantization</td>
     <td><a href="./onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static">qdq</a></td>
   </tr>
+  <tr>
+    <td>Table Transformer</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/table_transformer/quantization/ptq_static">qlinearops</a></td>
+  </tr>
   <tr>
     <td>SSD MobileNet V1 (ONNX Model Zoo)</td>
     <td>Object Detection</td>

diff --git a/...les/onnxrt/object_detection/table_transformer/quantization/ptq_static/README.md b/...les/onnxrt/object_detection/table_transformer/quantization/ptq_static/README.md
@@ -0,0 +1,49 @@
+Step-by-Step
+============
+
+This example show how to export, quantize and evaluate the DETR R18 model for table structure recognition task based on PubTables-1M dataset.
+
+# Prerequisite
+
+## 1. Environment
+
+```shell
+pip install neural-compressor
+pip install -r requirements.txt
+bash prepare.sh
+```
+> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+## 2. Prepare Dataset
+
+Download dataset according to this [doc](https://github.com/microsoft/table-transformer/tree/main#training-and-evaluation-data).
+
+## 3. Prepare Model
+
+```shell
+wget https://huggingface.co/bsmock/tatr-pubtables1m-v1.0/resolve/main/pubtables1m_structure_detr_r18.pth
+
+bash export.sh --input_model=/path/to/pubtables1m_structure_detr_r18.pth \
+               --output_model=/path/to/export \ # model path as *.onnx
+               --dataset_location=/path/to/dataset_folder # dataset_folder should contains 'words' sub-folder
+```
+
+# Run
+
+## 1. Quantization
+
+Static quantization with QOperator format:
+
+```bash
+bash run_tuning.sh --input_model=path/to/model  \ # model path as *.onnx
+                   --output_model=path/to/save \ # model path as *.onnx
+                   --dataset_location=/path/to/dataset_folder # dataset_folder should contains 'words' sub-folder
+```
+
+## 2. Benchmark
+
+```bash
+bash run_benchmark.sh --input_model=path/to/model  \ # model path as *.onnx
+                      --dataset_location=/path/to/dataset_folder # dataset_folder should contains 'words' sub-folder
+                      --mode=performance # or accuracy
+```
diff --git a/examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/export.sh b/examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/export.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+set -x
+
+function main {
+  init_params "$@"
+  export_model
+
+}
+
+# init params
+function init_params {
+
+  for var in "$@"
+  do
+    case $var in
+      --input_model=*)
+          input_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --output_model=*)
+          output_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --dataset_location=*)
+          dataset_location=$(echo $var |cut -f2 -d=)
+      ;;
+    esac
+  done
+
+}
+
+function export_model {
+    cd table-transformer/src
+    python main.py \
+            --model_load_path ${input_model} \
+            --output_model ${output_model} \
+            --data_root_dir ${dataset_location}/PubTables1M-Structure-PASCAL-VOC \
+            --table_words_dir ${dataset_location}/PubTables1M-Table-Words-JSON \
+            --mode 'export' \
+            --data_type structure \
+            --device cpu \
+            --config_file structure_config.json
+}
+
+main "$@"