PaddlePaddle · peterzhang2029 · Feb 3, 2018 · Feb 3, 2018 · Feb 7, 2018 · dzhwinter
diff --git a/paddle/operators/matmul_op.cc b/paddle/operators/matmul_op.cc
@@ -37,118 +37,156 @@ class MatMulOp : public framework::OperatorWithKernel {
     bool transpose_x = context->Attrs().Get<bool>("transpose_X");
     bool transpose_y = context->Attrs().Get<bool>("transpose_Y");
 
+    int x_num_col_dims = context->Attrs().Get<int>("x_num_col_dims");
+    int y_num_col_dims = context->Attrs().Get<int>("y_num_col_dims");
+
     PADDLE_ENFORCE_GE(dim_x.size(), 1,
                       "Input tensor X must be at least 1-dimensional.");
     PADDLE_ENFORCE_GE(dim_y.size(), 1,
                       "Input tensor Y must be at least 1-dimensional.");
 
-    std::vector<int64_t> out_dim;
-    int64_t batch_count = 1;
-    if (dim_x.size() > 3) {
-      PADDLE_ENFORCE_EQ(
-          dim_y.size(), dim_x.size(),
-          "The dimensions of X and Y must be the same, and both of "
-          "them should be %d-dimensional.",
-          dim_x.size());
-
-      // The first rank-2 dimensions are accumulated on the batch_count, and the
-      // last two dimensions are used for matrix multiplication.
-      for (int j = 0; j < dim_x.size() - 2; ++j) {
-        PADDLE_ENFORCE_EQ(dim_y[j], dim_x[j],
-                          "The %d-th dimension of X and Y must be the same.",
-                          j);
-        out_dim.push_back(dim_x[j]);
-        batch_count *= dim_x[j];
+    std::vector<int64_t> dim_out;
+    if (x_num_col_dims == 0 && x_num_col_dims == 0) {
+      std::vector<int64_t> out_dim;
+      int64_t batch_count = 1;
+      if (dim_x.size() > 3) {
+        PADDLE_ENFORCE_EQ(
+            dim_y.size(), dim_x.size(),
+            "The dimensions of X and Y must be the same, and both of "
+            "them should be %d-dimensional.",
+            dim_x.size());
+
+        // The first rank-2 dimensions are accumulated on the batch_count,
+        // and the last two dimensions are used for matrix multiplication.
+        for (int j = 0; j < dim_x.size() - 2; ++j) {
+          PADDLE_ENFORCE_EQ(dim_y[j], dim_x[j],
+                            "The %d-th dimension of X and Y must be the same.",
+                            j);
+          out_dim.push_back(dim_x[j]);
+          batch_count *= dim_x[j];
+        }
       }
-    }
 
-    int M = 0, N = 0, KX = 0, KY = 0, batchCountX = 0, batchCountY = 0;
-    bool remove_initial_dim = false, remove_final_dim = false;
+      int M = 0, N = 0, KX = 0, KY = 0, batchCountX = 0, batchCountY = 0;
+      bool remove_initial_dim = false, remove_final_dim = false;
 
-    switch (dim_x.size()) {
-      case 1:
-        if (transpose_x) {
-          M = dim_x[0];
-          KX = 1;
-        } else {
-          M = 1;
-          KX = dim_x[0];
-          remove_initial_dim = true;
-        }
-        break;
-      case 2:
-        M = transpose_x ? dim_x[1] : dim_x[0];
-        KX = transpose_x ? dim_x[0] : dim_x[1];
-        break;
-      case 3:
-        batchCountX = dim_x[0];
-        M = transpose_x ? dim_x[2] : dim_x[1];
-        KX = transpose_x ? dim_x[1] : dim_x[2];
-        break;
-      default:
-        batchCountX = batch_count;
-        size_t mat_s = dim_x.size() - 2;
-        M = transpose_x ? dim_x[mat_s + 1] : dim_x[mat_s];
-        KX = transpose_x ? dim_x[mat_s] : dim_x[mat_s + 1];
-        break;
-    }
+      switch (dim_x.size()) {
+        case 1:
+          if (transpose_x) {
+            M = dim_x[0];
+            KX = 1;
+          } else {
+            M = 1;
+            KX = dim_x[0];
+            remove_initial_dim = true;
+          }
+          break;
+        case 2:
+          M = transpose_x ? dim_x[1] : dim_x[0];
+          KX = transpose_x ? dim_x[0] : dim_x[1];
+          break;
+        case 3:
+          batchCountX = dim_x[0];
+          M = transpose_x ? dim_x[2] : dim_x[1];
+          KX = transpose_x ? dim_x[1] : dim_x[2];
+          break;
+        default:
+          batchCountX = batch_count;
+          size_t mat_s = dim_x.size() - 2;
+          M = transpose_x ? dim_x[mat_s + 1] : dim_x[mat_s];
+          KX = transpose_x ? dim_x[mat_s] : dim_x[mat_s + 1];
+          break;
+      }
 
-    switch (dim_y.size()) {
-      case 1:
-        if (transpose_y) {
-          N = dim_y[0];
-          KY = 1;
+      switch (dim_y.size()) {
+        case 1:
+          if (transpose_y) {
+            N = dim_y[0];
+            KY = 1;
+          } else {
+            N = 1;
+            KY = dim_y[0];
+            remove_final_dim = true;
+          }
+          break;
+        case 2:
+          KY = transpose_y ? dim_y[1] : dim_y[0];
+          N = transpose_y ? dim_y[0] : dim_y[1];
+          break;
+        case 3:
+          batchCountY = dim_y[0];
+          KY = transpose_y ? dim_y[2] : dim_y[1];
+          N = transpose_y ? dim_y[1] : dim_y[2];
+          break;
+        default:
+          batchCountY = batch_count;
+          size_t mat_s = dim_y.size() - 2;
+          KY = transpose_y ? dim_y[mat_s + 1] : dim_y[mat_s];
+          N = transpose_y ? dim_y[mat_s] : dim_y[mat_s + 1];
+      }
+
+      PADDLE_ENFORCE_EQ(
+          KX, KY,
+          "First matrix's width must be equal with second matrix's height.");
+      if (batchCountX && batchCountY) {
+        PADDLE_ENFORCE_EQ(
+            batchCountX, batchCountY,
+            "When Input(X) and Input(Y) are both three dimensional, they "
+            "must have the same batch dimension.");
+      }
+      int batchCount = std::max(batchCountX, batchCountY);
+
+      if (batchCount) {
+        if (dim_x.size() > 3) {
+          dim_out.insert(dim_out.begin(), out_dim.begin(), out_dim.end());
         } else {
-          N = 1;
-          KY = dim_y[0];
-          remove_final_dim = true;
+          dim_out.push_back(batchCount);
         }
-        break;
-      case 2:
-        KY = transpose_y ? dim_y[1] : dim_y[0];
-        N = transpose_y ? dim_y[0] : dim_y[1];
-        break;
-      case 3:
-        batchCountY = dim_y[0];
-        KY = transpose_y ? dim_y[2] : dim_y[1];
-        N = transpose_y ? dim_y[1] : dim_y[2];
-        break;
-      default:
-        batchCountY = batch_count;
-        size_t mat_s = dim_y.size() - 2;
-        KY = transpose_y ? dim_y[mat_s + 1] : dim_y[mat_s];
-        N = transpose_y ? dim_y[mat_s] : dim_y[mat_s + 1];
-    }
+      }
+      if (!remove_initial_dim) {
+        dim_out.push_back(M);
+      }
+      if (!remove_final_dim) {
+        dim_out.push_back(N);
+      }
+      if (dim_out.size() == 0) {
+        // We don't support 0-dimensional Tensors (scalars), so instead
+        // treat the output as a Tensor of shape (1, ) in this case.
+        dim_out.push_back(1);
+      }
+    } else {
+      if (x_num_col_dims == 0) {
+        x_num_col_dims = 1;
+      }
+      if (y_num_col_dims == 0) {
+        y_num_col_dims = 1;
+      }
+      PADDLE_ENFORCE_GT(
+          dim_x.size(), x_num_col_dims,
+          "The input tensor X's rank of MulOp should be larger than "
+          "x_num_col_dims.");
+      PADDLE_ENFORCE_GT(
+          dim_x.size(), y_num_col_dims,
+          "The input tensor Y's rank of MulOp should be larger than "
+          "y_num_col_dims.");
+
+      auto x_mat_dims = framework::flatten_to_2d(dim_x, x_num_col_dims);
+      auto y_mat_dims = framework::flatten_to_2d(dim_y, y_num_col_dims);
 
-    PADDLE_ENFORCE_EQ(
-        KX, KY,
-        "First matrix's width must be equal with second matrix's height.");
-    if (batchCountX && batchCountY) {
       PADDLE_ENFORCE_EQ(
-          batchCountX, batchCountY,
-          "When Input(X) and Input(Y) are both three dimensional, they "
-          "must have the same batch dimension.");
-    }
-    int batchCount = std::max(batchCountX, batchCountY);
+          x_mat_dims[1], y_mat_dims[0],
+          "First matrix's width must be equal with second matrix's height.");
 
-    std::vector<int64_t> dim_out;
-    if (batchCount) {
-      if (dim_x.size() > 3) {
-        dim_out.insert(dim_out.begin(), out_dim.begin(), out_dim.end());
-      } else {
-        dim_out.push_back(batchCount);
+      dim_out.reserve(
+          static_cast<size_t>(x_num_col_dims + dim_y.size() - y_num_col_dims));
+
+      for (int i = 0; i < x_num_col_dims; ++i) {
+        dim_out.push_back(dim_x[i]);
+      }
+
+      for (int i = y_num_col_dims; i < dim_y.size(); ++i) {
+        dim_out.push_back(dim_y[i]);
       }
-    }
-    if (!remove_initial_dim) {
-      dim_out.push_back(M);
-    }
-    if (!remove_final_dim) {
-      dim_out.push_back(N);
-    }
-    if (dim_out.size() == 0) {
-      // We don't support 0-dimensional Tensors (scalars), so instead
-      // treat the output as a Tensor of shape (1, ) in this case.
-      dim_out.push_back(1);
     }
     context->SetOutputDim("Out", framework::make_ddim(dim_out));
     context->ShareLoD("X", /*->*/ "Out");
@@ -162,6 +200,37 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("X", "The first input of MatMul op");
     AddInput("Y", "The second input of MatMul op");
     AddOutput("Out", "The output of MatMul op");
+    AddAttr<int>(
+        "x_num_col_dims",
+        R"DOC((int, default 0), The matmul_op can take tensors with more than two
+              dimensions as its inputs. If the input $X$ is a tensor with more
+              than two dimensions, $X$ will be flattened into a two-dimensional
+              matrix first. The flattening rule is: the first `num_col_dims`
+              will be flattened to form the first dimension of the final matrix
+              (the height of the matrix), and the rest `rank(X) - num_col_dims`
+              dimensions are flattened to form the second dimension of the final
+              matrix (the width of the matrix). As a result, height of the
+              flattened matrix is equal to the product of $X$'s first
+              `x_num_col_dims` dimensions' sizes, and width of the flattened
+              matrix is equal to the product of $X$'s last `rank(x) - num_col_dims`
+              dimensions' size. For example, suppose $X$ is a 6-dimensional
+              tensor with the shape [2, 3, 4, 5, 6], and `x_num_col_dims` = 3.
+              Thus, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] =
+              [24, 30]. The default value 0 indicates the input is a 2-D Matrix.
+        )DOC")
+        .SetDefault(0)
+        .EqualGreaterThan(0);
+    AddAttr<int>(
+        "y_num_col_dims",
+        R"DOC((int, default 0), The matmul_op can take tensors with more than
+              two, dimensions as its inputs. If the input $Y$ is a tensor with
+              more than two dimensions, $Y$ will be flattened into a
+              two-dimensional matrix first. The attribute `y_num_col_dims`
+              determines how $Y$ is flattened.
+              See comments of `x_num_col_dims` for more details.
+        )DOC")
+        .SetDefault(0)
+        .EqualGreaterThan(0);
     AddAttr<bool>("transpose_X",
                   R"DOC(If true, use the transpose of `X`.
         )DOC")