microsoft · HectorSVC · Jan 22, 2019 · Dec 20, 2018 · Dec 20, 2018 · Dec 21, 2018
diff --git a/onnxruntime/core/graph/matmul_add_fusion.cc b/onnxruntime/core/graph/matmul_add_fusion.cc
@@ -0,0 +1,94 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/graph/initializer.h"
+#include "core/graph/matmul_add_fusion.h"
+#include "core/graph/graph_utils.h"
+
+using namespace onnx;
+using namespace ::onnxruntime::common;
+namespace onnxruntime {
+
+Status MatMulAddFusion::Apply(Graph& graph, bool& modified) const {
+  GraphViewer graph_viewer(graph);
+  const auto& node_topology_list = graph_viewer.GetNodesInTopologicalOrder();
+  std::vector<onnxruntime::NodeIndex> removed_nodes;
+
+  for (auto node_index : node_topology_list) {
+    auto node = graph.GetNode(node_index);
+    if (nullptr == node ||
+        !utils::IsSupportedOptypeVersionAndDomain(*node, "MatMul", 9) ||
+        node->GetOutputEdgesCount() != 1) {
+      continue;
+    }
+
+    auto next_node_itr = node->OutputNodesBegin();
+    if (next_node_itr == node->OutputNodesEnd()) {
+      continue;
+    }
+
+    const Node& next_node = (*next_node_itr);
+    if (!utils::IsSupportedOptypeVersionAndDomain(next_node, "Add", 7)) {
+      continue;
+    }
+
+    Node* matmul_node = node;
+    Node& add_node = const_cast<Node&>(next_node);
+    std::vector<NodeArg> input_args, output_args;
+    auto matmul_input_defs = matmul_node->MutableInputDefs();
+    auto add_input_defs = add_node.MutableInputDefs();
+
+    // Gemm only support float, so the inputs of MatMul
+    auto matmul_type = matmul_input_defs[0]->Type();
+    auto add_type = add_input_defs[0]->Type();
+    if ((*matmul_type) != "tensor(float)" || (*add_type) != "tensor(float)") {
+      continue;
+    }
+
+    // Gemm only support Matrix, need to check the shape of MatMul and Add
+    auto matmul_a_shape = matmul_input_defs[0]->Shape();
+    auto matmul_b_shape = matmul_input_defs[1]->Shape();
+    if (nullptr == matmul_a_shape || nullptr == matmul_b_shape ||
+        2 != matmul_a_shape->dim_size() || 2 != matmul_b_shape->dim_size()) {
+      continue;
+    }
+
+    auto matmul_output_name = matmul_node->OutputDefs()[0]->Name();
+    auto gemm_input_defs = matmul_input_defs;
+    if (matmul_output_name == add_input_defs[0]->Name()) {
+      // matmul output as Add_A, should used Add_B as input C for gemm
+      // Gemm only support unidirectional broadcast on C
+      if (add_input_defs[1]->Shape()->dim_size() > 2) {
+        continue;
+      }
+      gemm_input_defs.push_back(add_input_defs[1]);
+    } else {
+      // matmul output as Add_B, should used Add_A as input C for gemm
+      // Gemm only support unidirectional broadcast on C
+      if (add_input_defs[0]->Shape()->dim_size() > 2) {
+        continue;
+      }
+      gemm_input_defs.push_back(add_input_defs[0]);
+    }
+
+    graph.AddNode(graph.GenerateNodeName("gemm"),
+                  "Gemm",
+                  "fused Matmul and Add " + add_node.OpType(),
+                  gemm_input_defs,
+                  add_node.MutableOutputDefs());
+
+    removed_nodes.push_back(add_node.Index());
+    removed_nodes.push_back(matmul_node->Index());
+  }
+
+  for (auto i : removed_nodes) {
+    graph.RemoveNode(i);
+  }
+
+  if (!removed_nodes.empty()) {
+    modified = true;
+    ORT_RETURN_IF_ERROR(graph.Resolve());
+  }
+  return Status::OK();
+}
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/graph/matmul_add_fusion.h b/onnxruntime/core/graph/matmul_add_fusion.h
@@ -0,0 +1,16 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/graph/graph_transformer.h"
+
+namespace onnxruntime {
+
+class MatMulAddFusion : public onnxruntime::GraphTransformer {
+ public:
+  MatMulAddFusion() noexcept : onnxruntime::GraphTransformer("MatMulAddFusion", "Fusing MatMul and Add into Gemm") {}
+  Status Apply(onnxruntime::Graph& graph, bool& modified) const override;
+};
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/ir/graph_transform_test.cc b/onnxruntime/test/ir/graph_transform_test.cc
@@ -11,6 +11,7 @@
 #include "core/graph/conv_mul_fusion.h"
 #include "core/graph/conv_add_fusion.h"
 #include "core/graph/conv_activation_fusion.h"
+#include "core/graph/matmul_add_fusion.h"
 #include "core/platform/env.h"
 
 #include "test/capturing_sink.h"
@@ -194,5 +195,42 @@ TEST(GraphTransformationTests, FuseConvAddMul3D) {
   ASSERT_TRUE(session_object.Initialize().IsOK());
 }
 
+TEST(GraphTransformationTests, MatMulAddFusion_two_input) {
+  string model_uri = MODEL_FOLDER + "matmul_add_fusion/2Input/model.onnx";
+
+  SessionOptions so;
+  so.session_logid = "GraphTransformationTests.LoadModelToTransform";
+  InferenceSession session_object{so, &DefaultLoggingManager()};
+  ASSERT_TRUE(session_object.Load(model_uri).IsOK());
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_TRUE(Model::Load(model_uri, p_model).IsOK());
+
+  std::unique_ptr<MatMulAddFusion> matmul_add_fusion_transformer = std::make_unique<MatMulAddFusion>();
+
+  session_object.RegisterGraphTransformer(std::move(matmul_add_fusion_transformer));
+
+  ASSERT_TRUE(session_object.Initialize().IsOK());
+}
+
+TEST(GraphTransformationTests, MatMulAddFusion_three_input) {
+  string model_uri = MODEL_FOLDER + "matmul_add_fusion/3Input/model.onnx";
+
+  SessionOptions so;
+  so.session_logid = "GraphTransformationTests.LoadModelToTransform";
+  InferenceSession session_object{so, &DefaultLoggingManager()};
+  ASSERT_TRUE(session_object.Load(model_uri).IsOK());
+
+  std::shared_ptr<Model> p_model;
+  ASSERT_TRUE(Model::Load(model_uri, p_model).IsOK());
+
+  std::unique_ptr<MatMulAddFusion> matmul_add_fusion_transformer = std::make_unique<MatMulAddFusion>();
+
+  session_object.RegisterGraphTransformer(std::move(matmul_add_fusion_transformer));
+
+  ASSERT_TRUE(session_object.Initialize().IsOK());
+}
+
+
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/testdata/transform/matmul_add_fusion/2Input/model.onnx b/onnxruntime/test/testdata/transform/matmul_add_fusion/2Input/model.onnx
diff --git a/onnxruntime/test/testdata/transform/matmul_add_fusion/2Input/test_data_0/input_0.pb b/onnxruntime/test/testdata/transform/matmul_add_fusion/2Input/test_data_0/input_0.pb
diff --git a/onnxruntime/test/testdata/transform/matmul_add_fusion/2Input/test_data_0/input_1.pb b/onnxruntime/test/testdata/transform/matmul_add_fusion/2Input/test_data_0/input_1.pb
diff --git a/onnxruntime/test/testdata/transform/matmul_add_fusion/2Input/test_data_0/output_0.pb b/onnxruntime/test/testdata/transform/matmul_add_fusion/2Input/test_data_0/output_0.pb
diff --git a/onnxruntime/test/testdata/transform/matmul_add_fusion/3Input/model.onnx b/onnxruntime/test/testdata/transform/matmul_add_fusion/3Input/model.onnx
diff --git a/onnxruntime/test/testdata/transform/matmul_add_fusion/3Input/test_data_0/input_0.pb b/onnxruntime/test/testdata/transform/matmul_add_fusion/3Input/test_data_0/input_0.pb
diff --git a/onnxruntime/test/testdata/transform/matmul_add_fusion/3Input/test_data_0/input_1.pb b/onnxruntime/test/testdata/transform/matmul_add_fusion/3Input/test_data_0/input_1.pb
diff --git a/onnxruntime/test/testdata/transform/matmul_add_fusion/3Input/test_data_0/input_2.pb b/onnxruntime/test/testdata/transform/matmul_add_fusion/3Input/test_data_0/input_2.pb
diff --git a/onnxruntime/test/testdata/transform/matmul_add_fusion/3Input/test_data_0/output_0.pb b/onnxruntime/test/testdata/transform/matmul_add_fusion/3Input/test_data_0/output_0.pb