From e98993d8e016ecd21d5bc841601e2eff3576f48e Mon Sep 17 00:00:00 2001 From: HHH-ComputeLab Date: Wed, 6 Apr 2022 23:57:15 -0700 Subject: [PATCH 1/7] Add disentangled attention TRT plugin as contrib op --- .../core/graph/contrib_ops/contrib_defs.cc | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index a433951987f5f..6b48b06a4c555 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -2339,6 +2339,44 @@ void RegisterContribSchemas() { updateOutputShape(ctx, 0, output_shape); }); + static const char* DisentangledAttention_TRT_ver1_doc = + R"DOC(Disentangled Attention TensorRT Plugin.)DOC"; + + ONNX_CONTRIB_OPERATOR_SCHEMA(DisentangledAttention_TRT) + .SetDomain(kOnnxDomain) + .SinceVersion(1) + .SetDoc(DisentangledAttention_TRT_ver1_doc) + .Input(0, "c2c_attention", "content-to-content attention tensor, QcKc^T.", "T") + .Input(1, "c2p_attention", "content-to-position attention tensor, QcKr^T.", "T") + .Input(2, "p2c_attention", "position-to-content attention tensor, KcQr^T.", "T") + .Output(0, "disentangled_attention", "The disentangled attention output tensor.", "T") + .TypeConstraint("T", {"tensor(float)", "tensor(float16)"}, "Constrain input and output types to float tensors.") + .Attr("span", "Maximum relative distance, k.", AttributeProto::INT) + .Attr("factor", "Scaling factor applied to attention values, 1/sqrt(3d). d is hidden size per head = H/N. H is hidden size, N is number of heads.", AttributeProto::FLOAT) + .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { + // Type inference + using namespace ONNX_NAMESPACE; + propagateElemTypeFromInputToOutput(ctx, 0, 0); + + // Shape Inference + if (!hasInputShape(ctx, 0)) { + return; + } + + auto& input0_shape = getInputShape(ctx, 0); + auto& input0_dims = input0_shape.dim(); + if (input0_dims.size() != 3) { + fail_shape_inference("Input 0 shall be 3 dimensions"); + } + + // output dims is same as input[0] dims, i.e., regular c2c attention dims + ONNX_NAMESPACE::TensorShapeProto disentangled_attention_shape; + for (auto& dim : input0_dims) { + *disentangled_attention_shape.add_dim() = dim; + } + updateOutputShape(ctx, 0, disentangled_attention_shape); + }); + #ifndef _OPSCHEMA_LIB_ // Register the NCHWc schemas if supported by the platform. if (MlasNchwcGetBlockSize() > 1) { @@ -2353,3 +2391,4 @@ void RegisterContribSchemas() { } // namespace contrib } // namespace onnxruntime + \ No newline at end of file From b5aa92ab946b55cc1cdb7b74c576b3843fae18be Mon Sep 17 00:00:00 2001 From: HHH-Desktop Date: Thu, 14 Apr 2022 11:18:57 -0700 Subject: [PATCH 2/7] update plugin name & remove null character --- onnxruntime/core/graph/contrib_ops/contrib_defs.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 6b48b06a4c555..f491868d29444 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -2342,7 +2342,7 @@ void RegisterContribSchemas() { static const char* DisentangledAttention_TRT_ver1_doc = R"DOC(Disentangled Attention TensorRT Plugin.)DOC"; - ONNX_CONTRIB_OPERATOR_SCHEMA(DisentangledAttention_TRT) + ONNX_CONTRIB_OPERATOR_SCHEMA(DisentangledAttentionPlugin) .SetDomain(kOnnxDomain) .SinceVersion(1) .SetDoc(DisentangledAttention_TRT_ver1_doc) @@ -2390,5 +2390,4 @@ void RegisterContribSchemas() { } } // namespace contrib -} // namespace onnxruntime - \ No newline at end of file +} // namespace onnxruntime \ No newline at end of file From f28014abb09a8961bcf8b1d2d54713df7eab79a5 Mon Sep 17 00:00:00 2001 From: HHH-Desktop Date: Tue, 19 Apr 2022 21:05:42 -0700 Subject: [PATCH 3/7] update onnx-tensorrt submodule with my beta version --- cmake/external/onnx-tensorrt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/external/onnx-tensorrt b/cmake/external/onnx-tensorrt index 4f54a1950e117..0510e54ed2d99 160000 --- a/cmake/external/onnx-tensorrt +++ b/cmake/external/onnx-tensorrt @@ -1 +1 @@ -Subproject commit 4f54a1950e1174dca490900eb7b07cc374f53d41 +Subproject commit 0510e54ed2d99a4fcfc619a5a97b7b682344a07e From b1d042897ef892e019732cf1e5174b979c2717bc Mon Sep 17 00:00:00 2001 From: HHH-Desktop Date: Fri, 22 Apr 2022 13:38:48 -0700 Subject: [PATCH 4/7] use suggested plugin name & simpler shape propagation --- onnxruntime/core/graph/contrib_ops/contrib_defs.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index f491868d29444..b0d64c01c5d5b 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -2342,7 +2342,7 @@ void RegisterContribSchemas() { static const char* DisentangledAttention_TRT_ver1_doc = R"DOC(Disentangled Attention TensorRT Plugin.)DOC"; - ONNX_CONTRIB_OPERATOR_SCHEMA(DisentangledAttentionPlugin) + ONNX_CONTRIB_OPERATOR_SCHEMA(DisentangledAttention_TRT) .SetDomain(kOnnxDomain) .SinceVersion(1) .SetDoc(DisentangledAttention_TRT_ver1_doc) @@ -2370,11 +2370,13 @@ void RegisterContribSchemas() { } // output dims is same as input[0] dims, i.e., regular c2c attention dims - ONNX_NAMESPACE::TensorShapeProto disentangled_attention_shape; - for (auto& dim : input0_dims) { - *disentangled_attention_shape.add_dim() = dim; - } - updateOutputShape(ctx, 0, disentangled_attention_shape); + // ONNX_NAMESPACE::TensorShapeProto disentangled_attention_shape; + // for (auto& dim : input0_dims) { + // *disentangled_attention_shape.add_dim() = dim; + // } + // updateOutputShape(ctx, 0, disentangled_attention_shape); + propagateShapeFromInputToOutput(ctx, 0, 0); + }); #ifndef _OPSCHEMA_LIB_ From 1bbc01444656a400fe689f843bedfb7ff0eeec6a Mon Sep 17 00:00:00 2001 From: HHH-Desktop Date: Fri, 22 Apr 2022 15:20:22 -0700 Subject: [PATCH 5/7] update onnx-tensorrt gitsubmodule to temporary fork --- .gitmodules | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 770156842e28c..11c0185c1d3de 100644 --- a/.gitmodules +++ b/.gitmodules @@ -71,4 +71,6 @@ url = https://github.com/pytorch/cpuinfo.git [submodule "cmake/external/onnx-tensorrt"] path = cmake/external/onnx-tensorrt - url = https://github.com/onnx/onnx-tensorrt.git + url = https://github.com/symphonylyh/onnx-tensorrt.git + branch = deberta_trt_plugin + \ No newline at end of file From a18c96ac8ff74dd70d632c4f6462b116634b4cd6 Mon Sep 17 00:00:00 2001 From: HHH-Desktop Date: Fri, 22 Apr 2022 16:02:23 -0700 Subject: [PATCH 6/7] update onnx-tensorrt to temporary commit --- cmake/external/onnx-tensorrt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/external/onnx-tensorrt b/cmake/external/onnx-tensorrt index 0510e54ed2d99..ffda4fdc55846 160000 --- a/cmake/external/onnx-tensorrt +++ b/cmake/external/onnx-tensorrt @@ -1 +1 @@ -Subproject commit 0510e54ed2d99a4fcfc619a5a97b7b682344a07e +Subproject commit ffda4fdc558468c3a55067f265c5eb5e517e4f95 From 86cffed591857dd2cc12ee774ac2c60d093b9e26 Mon Sep 17 00:00:00 2001 From: HHH-Desktop Date: Thu, 5 May 2022 11:52:19 -0700 Subject: [PATCH 7/7] redirect submodule back to latest 8.2-GA release of onnx-tensorrt repo --- .gitmodules | 5 ++--- cmake/external/onnx-tensorrt | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.gitmodules b/.gitmodules index 11c0185c1d3de..95a9344ebf919 100644 --- a/.gitmodules +++ b/.gitmodules @@ -71,6 +71,5 @@ url = https://github.com/pytorch/cpuinfo.git [submodule "cmake/external/onnx-tensorrt"] path = cmake/external/onnx-tensorrt - url = https://github.com/symphonylyh/onnx-tensorrt.git - branch = deberta_trt_plugin - \ No newline at end of file + url = https://github.com/onnx/onnx-tensorrt.git + branch = 8.2-GA \ No newline at end of file diff --git a/cmake/external/onnx-tensorrt b/cmake/external/onnx-tensorrt index ffda4fdc55846..f42daeee49f25 160000 --- a/cmake/external/onnx-tensorrt +++ b/cmake/external/onnx-tensorrt @@ -1 +1 @@ -Subproject commit ffda4fdc558468c3a55067f265c5eb5e517e4f95 +Subproject commit f42daeee49f2517a954c5601f0f76bef9ed94b62