From a405658370cc05989209c6608ff9f591799c84ae Mon Sep 17 00:00:00 2001 From: Yi-Hong Lyu Date: Thu, 3 Feb 2022 10:29:30 +0800 Subject: [PATCH] Fuse Clip->Q to Q (#10434) * Fuse Clip->Q to Q * Remove unused variable argmax_node * Remove braces around scalar initializer * Move GetClipConstantMinMax under ORT_MINIMAL_BUILD * Consider epsilon so we can fuse more cases --- .../core/optimizer/conv_activation_fusion.cc | 65 +---------- .../core/optimizer/graph_transformer_utils.cc | 2 + .../qdq_transformer/clip_quantizelinear.cc | 107 ++++++++++++++++++ .../qdq_transformer/clip_quantizelinear.h | 29 +++++ onnxruntime/core/optimizer/utils.cc | 62 +++++++++- onnxruntime/core/optimizer/utils.h | 5 + .../test/optimizer/qdq_transformer_test.cc | 75 ++++++++++++ 7 files changed, 280 insertions(+), 65 deletions(-) create mode 100644 onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.cc create mode 100644 onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.h diff --git a/onnxruntime/core/optimizer/conv_activation_fusion.cc b/onnxruntime/core/optimizer/conv_activation_fusion.cc index da5a0f0c1e293..94211ab1b4d8e 100644 --- a/onnxruntime/core/optimizer/conv_activation_fusion.cc +++ b/onnxruntime/core/optimizer/conv_activation_fusion.cc @@ -5,73 +5,12 @@ #include "core/graph/graph_utils.h" #include "core/optimizer/conv_activation_fusion.h" #include "core/optimizer/initializer.h" +#include "core/optimizer/utils.h" using namespace ONNX_NAMESPACE; using namespace ::onnxruntime::common; namespace onnxruntime { -namespace { -// get min/max values from Clip if they are constant. Returns false if mutable and cannot be used -static bool GetClipConstantMinMax(const Graph& graph, const Node& node, float& min, float& max) { - min = std::numeric_limits::lowest(); - max = std::numeric_limits::max(); - - // Clip opset 6 has min and max as attributes. they're inputs from opset 11 on. - bool min_max_are_attributes = graph_utils::IsSupportedOptypeVersionAndDomain(node, "Clip", {6}); - bool min_max_are_constant_values = true; - - if (min_max_are_attributes) { - min = graph_utils::GetNodeAttribute(node, "min")->f(); - max = graph_utils::GetNodeAttribute(node, "max")->f(); - } else { - // update min/max if provided via a constant initializer - // return true if value is default or coming from a constant initializer and update 'value' - // return false if value is mutable - auto update_if_constant_value = [&graph](const Node& node, size_t input_idx, float& value) { - const auto& input_defs = node.InputDefs(); - const NodeArg* input = (input_defs.size() > input_idx) ? input_defs[input_idx] : nullptr; - - if (input == nullptr || !input->Exists()) { - // optional input not specified so using default value - return true; - } - - bool is_constant = true; - const ONNX_NAMESPACE::TensorProto* initializer = graph_utils::GetConstantInitializer(graph, input->Name()); - if (initializer) { - Initializer i(*initializer, graph.ModelPath()); - switch (initializer->data_type()) { - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: - value = *i.data(); - break; - // double isn't currently supported - //case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: - // value = static_cast(*i.data()); - // break; - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: - value = math::halfToFloat(i.data()->val); - break; - default: - ORT_THROW("Unexpected data type for Clip input of ", initializer->data_type()); - } - } else { - is_constant = false; - } - - return is_constant; - }; - - // 'min' is input 1, 'max' is input 2. both are optional. - // if the input is constant, 'min' or 'max' is updated by the call to get_if_constant_value - min_max_are_constant_values = update_if_constant_value(node, 1, min) && - update_if_constant_value(node, 2, max); - } - - return min_max_are_constant_values; -} - -} // namespace - Status ConvActivationFusion::ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const { GraphViewer graph_viewer(graph); const auto& order = graph_viewer.GetNodesInTopologicalOrder(); @@ -173,7 +112,7 @@ Status ConvActivationFusion::ApplyImpl(Graph& graph, bool& modified, int graph_l activation_params.push_back(graph_utils::GetNodeAttribute(next_node, "alpha")->f()); } else if (graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Clip", {6, 11, 12, 13})) { float min, max; - if (GetClipConstantMinMax(graph, next_node, min, max)) { + if (optimizer_utils::GetClipConstantMinMax(graph, next_node, min, max)) { activation_params.push_back(min); activation_params.push_back(max); } else { diff --git a/onnxruntime/core/optimizer/graph_transformer_utils.cc b/onnxruntime/core/optimizer/graph_transformer_utils.cc index ad1da8a266642..0f1d5324098a3 100644 --- a/onnxruntime/core/optimizer/graph_transformer_utils.cc +++ b/onnxruntime/core/optimizer/graph_transformer_utils.cc @@ -44,6 +44,7 @@ #include "core/optimizer/nhwc_transformer.h" #include "core/optimizer/noop_elimination.h" #include "core/optimizer/not_where_fusion.h" +#include "core/optimizer/qdq_transformer/clip_quantizelinear.h" #include "core/optimizer/qdq_transformer/qdq_propagation.h" #include "core/optimizer/qdq_transformer/qdq_s8_to_u8.h" #include "core/optimizer/qdq_transformer/relu_quantizelinear.h" @@ -99,6 +100,7 @@ std::vector> GenerateRewriteRules( rules.push_back(std::make_unique()); rules.push_back(std::make_unique()); rules.push_back(std::make_unique()); + rules.push_back(std::make_unique()); rules.push_back(std::make_unique()); break; diff --git a/onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.cc b/onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.cc new file mode 100644 index 0000000000000..55d0a31e64e44 --- /dev/null +++ b/onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.cc @@ -0,0 +1,107 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/optimizer/initializer.h" +#include "core/optimizer/qdq_transformer/clip_quantizelinear.h" +#include "core/optimizer/utils.h" +#include "core/graph/graph_utils.h" + +using namespace ONNX_NAMESPACE; +using namespace onnxruntime::common; +namespace onnxruntime { + +static bool GetQConstantLowerUpper(const Graph& graph, const Node& node, float& lower, float& upper) { + const auto& input_defs = node.InputDefs(); + + constexpr size_t input_cnt_required = 3; + if (input_defs.size() != input_cnt_required) { + return false; + } + + constexpr size_t s_idx = 1; + const NodeArg* s_input = input_defs[s_idx]; + + const ONNX_NAMESPACE::TensorProto* s_tensor_proto = graph_utils::GetConstantInitializer(graph, s_input->Name()); + if (!s_tensor_proto) { + return false; + } + + Initializer s_initializer(*s_tensor_proto, graph.ModelPath()); + if (s_initializer.dims().size() != 0 || + s_initializer.data_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + return false; + } + const float scale = s_initializer.data()[0]; + + constexpr size_t zp_idx = 2; + const NodeArg* zp_input = input_defs[zp_idx]; + + const ONNX_NAMESPACE::TensorProto* zp_tensor_proto = graph_utils::GetConstantInitializer(graph, zp_input->Name()); + if (!zp_tensor_proto) { + return false; + } + + Initializer zp_initializer(*zp_tensor_proto, graph.ModelPath()); + if (zp_initializer.dims().size() != 0) { + return false; + } + + switch (zp_initializer.data_type()) { + case ONNX_NAMESPACE::TensorProto_DataType_INT8: { + const int8_t zero_point = zp_initializer.data()[0]; + lower = scale * (-128 - zero_point); + upper = scale * (127 - zero_point); + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_UINT8: { + const uint8_t zero_point = zp_initializer.data()[0]; + lower = scale * (0 - zero_point); + upper = scale * (255 - zero_point); + break; + } + default: + ORT_THROW("Unexpected data type for QuantizeLinear input y_zero_point of ", zp_initializer.data_type()); + } + return true; +} + +bool ClipQuantFusion::SatisfyCondition(const Graph& graph, const Node& node, const logging::Logger& /*logger*/) const { + if (!graph_utils::IsSupportedOptypeVersionAndDomain(node, "Clip", {1, 6, 11, 12, 13}) || + !optimizer_utils::CheckOutputEdges(graph, node, 1)) { + return false; + } + + // if Clip is followed by QuantizeLinear, it can be fused into QuantizeLinear potentially + const auto& next_node = *node.OutputNodesBegin(); + if (!graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "QuantizeLinear", {10, 13})) { + return false; + } + + return true; +} + +Status ClipQuantFusion::Apply(Graph& graph, Node& node, RewriteRuleEffect& rule_effect, const logging::Logger&) const { + float min, max; + if (!optimizer_utils::GetClipConstantMinMax(graph, node, min, max)) { + return Status::OK(); + } + + const Node& q_node = *graph.GetNode(node.OutputNodesBegin()->Index()); + + float lower, upper; + if (!GetQConstantLowerUpper(graph, q_node, lower, upper)) { + return Status::OK(); + } + + constexpr float epsilon = std::numeric_limits::epsilon(); + if (epsilon < min - lower || epsilon < upper - max) { + return Status::OK(); + } + + if (graph_utils::RemoveNode(graph, node)) { + rule_effect = RewriteRuleEffect::kRemovedCurrentNode; + } + + return Status::OK(); +} +} // namespace onnxruntime diff --git a/onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.h b/onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.h new file mode 100644 index 0000000000000..870ee607ab7b2 --- /dev/null +++ b/onnxruntime/core/optimizer/qdq_transformer/clip_quantizelinear.h @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/optimizer/rewrite_rule.h" + +namespace onnxruntime { + +/** + @Class ClipQuantFusion + + Rewrite rule that fuses Clip into followed QuantizeLinear + */ +class ClipQuantFusion : public RewriteRule { + public: + ClipQuantFusion() noexcept : RewriteRule("ClipQuantRewrite") {} + + std::vector TargetOpTypes() const noexcept override { + return {"Clip"}; + } + + private: + bool SatisfyCondition(const Graph& graph, const Node& node, const logging::Logger& logger) const override; + + Status Apply(Graph& graph, Node& node, RewriteRuleEffect& rule_effect, const logging::Logger& logger) const override; +}; + +} // namespace onnxruntime diff --git a/onnxruntime/core/optimizer/utils.cc b/onnxruntime/core/optimizer/utils.cc index 3faf803af4572..e9260e553b703 100644 --- a/onnxruntime/core/optimizer/utils.cc +++ b/onnxruntime/core/optimizer/utils.cc @@ -281,9 +281,67 @@ bool IsOperationDeterministic(const std::string& domain, const std::string& op) if (domain.compare(kOnnxDomain) == 0) { auto iter = std::find(kOnnxDomainNonDeterministicOps.begin(), kOnnxDomainNonDeterministicOps.end(), op); return iter == kOnnxDomainNonDeterministicOps.end(); - } + } // Unknown domain. Assume the op is not deterministic. - return false; + return false; +} + +bool GetClipConstantMinMax(const Graph& graph, const Node& node, float& min, float& max) { + min = std::numeric_limits::lowest(); + max = std::numeric_limits::max(); + + // Clip opset 1 and 6 has min and max as attributes. they're inputs from opset 11 on. + bool min_max_are_attributes = graph_utils::IsSupportedOptypeVersionAndDomain(node, "Clip", {1, 6}); + bool min_max_are_constant_values = true; + + if (min_max_are_attributes) { + min = graph_utils::GetNodeAttribute(node, "min")->f(); + max = graph_utils::GetNodeAttribute(node, "max")->f(); + } else { + // update min/max if provided via a constant initializer + // return true if value is default or coming from a constant initializer and update 'value' + // return false if value is mutable + auto update_if_constant_value = [&graph](const Node& node, size_t input_idx, float& value) { + const auto& input_defs = node.InputDefs(); + const NodeArg* input = (input_defs.size() > input_idx) ? input_defs[input_idx] : nullptr; + + if (input == nullptr || !input->Exists()) { + // optional input not specified so using default value + return true; + } + + bool is_constant = true; + const ONNX_NAMESPACE::TensorProto* initializer = graph_utils::GetConstantInitializer(graph, input->Name()); + if (initializer) { + Initializer i(*initializer, graph.ModelPath()); + switch (initializer->data_type()) { + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: + value = *i.data(); + break; + // double isn't currently supported + //case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: + // value = static_cast(*i.data()); + // break; + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: + value = math::halfToFloat(i.data()->val); + break; + default: + ORT_THROW("Unexpected data type for Clip input of ", initializer->data_type()); + } + } else { + is_constant = false; + } + + return is_constant; + }; + + // 'min' is input 1, 'max' is input 2. both are optional. + // if the input is constant, 'min' or 'max' is updated by the call to get_if_constant_value + min_max_are_constant_values = update_if_constant_value(node, 1, min) && + update_if_constant_value(node, 2, max); + } + + return min_max_are_constant_values; } #endif // #if !defined(ORT_MINIMAL_BUILD) diff --git a/onnxruntime/core/optimizer/utils.h b/onnxruntime/core/optimizer/utils.h index 9b87ee680d9c6..d7c1fa71ead59 100644 --- a/onnxruntime/core/optimizer/utils.h +++ b/onnxruntime/core/optimizer/utils.h @@ -102,6 +102,11 @@ bool CheckOutputEdges(const Graph& graph, const Node& node, size_t expected_outp bool IsOperationDeterministic(const std::string& domain, const std::string& op); +/** Get min/max values from Clip if they are constant. +@returns false if mutable and cannot be used. +*/ +bool GetClipConstantMinMax(const Graph& graph, const Node& node, float& min, float& max); + #endif // !#if !defined(ORT_MINIMAL_BUILD) #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD) diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 97a051bdfea1e..d74160217f46a 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -1616,6 +1616,81 @@ TEST(QDQTransformerTests, DQForward_MutilpleSteps) { test_case({1, 13, 13, 23}, {30, 23, 3, 3}, {0, 3, 1, 2}); } +TEST(QDQTransformerTests, Clip) { + constexpr float epsilon = std::numeric_limits::epsilon(); + + auto test_case = [&](float scale, auto zero_point, int clip_count, int opset_version = 12) { + auto build_test_case = [&](ModelTestBuilder& builder) { + auto* input_arg = builder.MakeInput({1, 32, 112, 112}, + std::numeric_limits::min(), + std::numeric_limits::max()); + auto* output_arg = builder.MakeOutput(); + + // add DQ + auto* dq_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input_arg, .0035f, 7, dq_output); + + // add Clip + auto* clip_output = builder.MakeIntermediate(); + constexpr float min = .0f; + constexpr float max = 6.0f; + if (opset_version >= 11) { + auto* min_initializer = builder.MakeScalarInitializer(min); + auto* max_initializer = builder.MakeScalarInitializer(max); + builder.AddNode("Clip", {dq_output, min_initializer, max_initializer}, {clip_output}); + } else { + Node& argmax_node = builder.AddNode("Clip", {dq_output}, {clip_output}); + argmax_node.AddAttribute("min", min); + argmax_node.AddAttribute("max", max); + } + + // add Q + DQ + auto* q_output = builder.MakeIntermediate(); + builder.AddQuantizeLinearNode(clip_output, scale, zero_point, q_output); + builder.AddDequantizeLinearNode(q_output, scale, zero_point, output_arg); + }; + + auto check_clip_graph = [&](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + EXPECT_EQ(op_to_count["QuantizeLinear"], 1); + EXPECT_EQ(op_to_count["Clip"], clip_count); + EXPECT_EQ(op_to_count["DequantizeLinear"], 2); + }; + + TransformerTester(build_test_case, check_clip_graph, + TransformerLevel::Default, + TransformerLevel::Level1, + opset_version, + epsilon, + epsilon); + }; + + test_case(.0235294122248888f, static_cast(-128), 0); // [0, 6] + test_case(.02f, static_cast(-128), 0); // [0, 5.1] + test_case(.03f, static_cast(-128), 1); // [0, 7.65] + test_case(.02f, static_cast(127), 1); // [-5.1 , 0] + test_case(.02f, static_cast(0), 1); // [-2.56, 2.54] + test_case(.04f, static_cast(-97), 1); // [-1.24, 8.96] + test_case(.02352941176f, static_cast(0), 0); // [0, 6] + test_case(.02f, static_cast(0), 0); // [0, 5.1] + test_case(.03f, static_cast(0), 1); // [0, 7.65] + test_case(.02f, static_cast(255), 1); // [-5.1, 0] + test_case(.02f, static_cast(128), 1); // [-2.56, 2.54] + test_case(.04f, static_cast(31), 1); // [-1.24, 8.96] + + // opset_version = 10 + test_case(.02f, static_cast(-128), 0, 10); // [0, 5.1] + test_case(.03f, static_cast(-128), 1, 10); // [0, 7.65] + test_case(.02f, static_cast(0), 0, 10); // [0, 5.1] + test_case(.03f, static_cast(0), 1, 10); // [0, 7.65] + + // difference between lower/upper and min/max are within epsilon + test_case(epsilon, static_cast(-127), 0); // [-epsilon, x] (x <= 6 + epsilon) + test_case((6 + epsilon) / 255, static_cast(-128), 0); // [0, 6 + epsilon] + test_case(epsilon, static_cast(1), 0); // [-epsilon, x] (x <= 6 + epsilon) + test_case((6 + epsilon) / 255, static_cast(0), 0); // [0, 6 + epsilon] +} + TEST(QDQTransformerTests, Concat) { auto test_case = [&](const std::vector>& input_shapes, int64_t axis,