From d0ab881d070756d50d8b75fa03e80d78f9b0a0cc Mon Sep 17 00:00:00 2001 From: wraveane <75912517+wraveane@users.noreply.github.com> Date: Fri, 4 Feb 2022 15:10:04 -0500 Subject: [PATCH] Contrib ops for TRT plugins: EfficientNMS and Pyramid ROI Align (#9486) * Contrib ops for TRT plugins: EfficientNMS and Pyramid ROI Align * Contrib ops for TRT plugins: Multilevel Crop and Resize --- .../core/graph/contrib_ops/contrib_defs.cc | 163 ++++++++++++++++++ 1 file changed, 163 insertions(+) diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 950dd1760eded..52e076cdf66b0 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -2820,6 +2820,169 @@ Example 4: } }); + static const char* EfficientNMS_TRT_ver1_doc = + R"DOC(Efficient NMS TensorRT Plugin.)DOC"; + + ONNX_CONTRIB_OPERATOR_SCHEMA(EfficientNMS_TRT) + .SetDomain(kOnnxDomain) + .SinceVersion(1) + .SetDoc(EfficientNMS_TRT_ver1_doc) + .Input(0, "boxes", "The boxes input tensor.", "T") + .Input(1, "scores", "The scores input tensor.", "T") + .Input(2, "anchors", "The anchors input tensor.", "T", OpSchema::Optional) + .Output(0, "num_detections", "The num_detections output tensor.", "tensor(int32)") + .Output(1, "detection_boxes", "The detection_boxes output tensor.", "T") + .Output(2, "detection_scores", "The detection_scores output tensor.", "T") + .Output(3, "detection_classes", "The detection_classes output tensor.", "tensor(int32)") + .TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float tensors.") + .Attr("background_class", "Background class ID.", AttributeProto::INT) + .Attr("box_coding", "Encoding type for the boxes or anchors inputs.", AttributeProto::INT) + .Attr("iou_threshold", "Box IOU threshold value.", AttributeProto::FLOAT) + .Attr("max_output_boxes", "Max detections to output.", AttributeProto::INT) + .Attr("plugin_version", "Version number of the TRT plugin.", AttributeProto::STRING) + .Attr("score_activation", "Activation function to apply to the scores input.", AttributeProto::INT) + .Attr("score_threshold", "Score threshold value.", AttributeProto::FLOAT) + .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { + // Type inference + using namespace ONNX_NAMESPACE; + ONNX_NAMESPACE::updateOutputElemType(ctx, 0, ONNX_NAMESPACE::TensorProto::INT32); + propagateElemTypeFromInputToOutput(ctx, 0, 1); + propagateElemTypeFromInputToOutput(ctx, 0, 2); + ONNX_NAMESPACE::updateOutputElemType(ctx, 3, ONNX_NAMESPACE::TensorProto::INT32); + + // Shape Inference + if (!hasInputShape(ctx, 0)) { + return; + } + int64_t max_output_boxes = 1; + auto max_output_boxes_proto = ctx.getAttribute("max_output_boxes"); + if (max_output_boxes_proto) { + max_output_boxes = max_output_boxes_proto->i(); + } + if (max_output_boxes < 1) { + fail_shape_inference("Attribute 'max_output_boxes' must be >= 1.") + } + + Dim batch_size; + unifyInputDim(ctx, 0, 0, batch_size); + + ONNX_NAMESPACE::TensorShapeProto num_detections_shape; + *num_detections_shape.add_dim() = batch_size; + num_detections_shape.add_dim()->set_dim_value(1); + updateOutputShape(ctx, 0, num_detections_shape); + + ONNX_NAMESPACE::TensorShapeProto detection_boxes_shape; + *detection_boxes_shape.add_dim() = batch_size; + detection_boxes_shape.add_dim()->set_dim_value(max_output_boxes); + detection_boxes_shape.add_dim()->set_dim_value(4); + updateOutputShape(ctx, 1, detection_boxes_shape); + + ONNX_NAMESPACE::TensorShapeProto detection_scores_shape; + *detection_scores_shape.add_dim() = batch_size; + detection_scores_shape.add_dim()->set_dim_value(max_output_boxes); + updateOutputShape(ctx, 2, detection_scores_shape); + + ONNX_NAMESPACE::TensorShapeProto detection_classes_shape; + *detection_classes_shape.add_dim() = batch_size; + detection_classes_shape.add_dim()->set_dim_value(max_output_boxes); + updateOutputShape(ctx, 3, detection_classes_shape); + }); + + static const char* MultilevelCropAndResize_TRT_ver1_doc = + R"DOC(Multilevel Crop and Resize TensorRT Plugin.)DOC"; + + ONNX_CONTRIB_OPERATOR_SCHEMA(MultilevelCropAndResize_TRT) + .SetDomain(kOnnxDomain) + .SinceVersion(1) + .SetDoc(MultilevelCropAndResize_TRT_ver1_doc) + .Input(0, "boxes", "The boxes input tensor.", "T") + .Input(1, "feature_map_0", "The first feature map input tensor.", "T") + .Input(2, "feature_map_1", "The second feature map input tensor.", "T") + .Input(3, "feature_map_2", "The third feature map input tensor.", "T") + .Input(4, "feature_map_3", "The fourth feature map input tensor.", "T") + .Output(0, "patches", "The cropped patches output tensor.", "T") + .TypeConstraint("T", {"tensor(float)"}, "Constrain input and output types to float tensors.") + .Attr("image_size", "Image size.", AttributeProto::INTS) + .Attr("pooled_size", "Pooled size.", AttributeProto::INT) + .Attr("plugin_version", "Version number of the TRT plugin.", AttributeProto::STRING) + .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { + // Type inference + propagateElemTypeFromInputToOutput(ctx, 0, 0); + + // Shape Inference + if (!hasInputShape(ctx, 0)) { + return; + } + int64_t pooled_size = 1; + auto pooled_size_proto = ctx.getAttribute("pooled_size"); + if (pooled_size_proto) { + pooled_size = pooled_size_proto->i(); + } + if (pooled_size < 1) { + fail_shape_inference("Attribute 'pooled_size' must be >= 1.") + } + + Dim batch_size, number_boxes, channels; + unifyInputDim(ctx, 0, 0, batch_size); + unifyInputDim(ctx, 0, 1, number_boxes); + unifyInputDim(ctx, 1, 1, channels); + + ONNX_NAMESPACE::TensorShapeProto output_shape; + *output_shape.add_dim() = batch_size; + *output_shape.add_dim() = number_boxes; + *output_shape.add_dim() = channels; + output_shape.add_dim()->set_dim_value(pooled_size); + output_shape.add_dim()->set_dim_value(pooled_size); + updateOutputShape(ctx, 0, output_shape); + }); + + static const char* PyramidROIAlign_TRT_ver1_doc = + R"DOC(Pyramid ROI Align TensorRT Plugin.)DOC"; + + ONNX_CONTRIB_OPERATOR_SCHEMA(PyramidROIAlign_TRT) + .SetDomain(kOnnxDomain) + .SinceVersion(1) + .SetDoc(PyramidROIAlign_TRT_ver1_doc) + .Input(0, "boxes", "The boxes input tensor.", "T") + .Input(1, "feature_map_0", "The first feature map input tensor.", "T") + .Input(2, "feature_map_1", "The second feature map input tensor.", "T") + .Input(3, "feature_map_2", "The third feature map input tensor.", "T") + .Input(4, "feature_map_3", "The fourth feature map input tensor.", "T") + .Output(0, "patches", "The cropped patches output tensor.", "T") + .TypeConstraint("T", {"tensor(float)"}, "Constrain input and output types to float tensors.") + .Attr("pooled_size", "Pooled size.", AttributeProto::INT) + .Attr("plugin_version", "Version number of the TRT plugin.", AttributeProto::STRING) + .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { + // Type inference + propagateElemTypeFromInputToOutput(ctx, 0, 0); + + // Shape Inference + if (!hasInputShape(ctx, 0)) { + return; + } + int64_t pooled_size = 1; + auto pooled_size_proto = ctx.getAttribute("pooled_size"); + if (pooled_size_proto) { + pooled_size = pooled_size_proto->i(); + } + if (pooled_size < 1) { + fail_shape_inference("Attribute 'pooled_size' must be >= 1.") + } + + Dim batch_size, number_boxes, channels; + unifyInputDim(ctx, 0, 0, batch_size); + unifyInputDim(ctx, 0, 1, number_boxes); + unifyInputDim(ctx, 1, 1, channels); + + ONNX_NAMESPACE::TensorShapeProto output_shape; + *output_shape.add_dim() = batch_size; + *output_shape.add_dim() = number_boxes; + *output_shape.add_dim() = channels; + output_shape.add_dim()->set_dim_value(pooled_size); + output_shape.add_dim()->set_dim_value(pooled_size); + updateOutputShape(ctx, 0, output_shape); + }); + static const char* Gelu_ver1_doc = R"DOC(Gaussian Error Linear Unit. A high-performing neural network activation function.The GELU nonlinearity is