move padding shape x 1 x 1 in ernie to qkv and fc

PaddlePaddle · Mar 19, 2021 · 70cd7e3 · 70cd7e3
1 parent ab77eb2
commit 70cd7e3
Show file tree

Hide file tree

Showing 5 changed files with 105 additions and 26 deletions.
diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
@@ -142,9 +142,76 @@ class FcOpConverter : public OpConverter {
     TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT,
                                 static_cast<void*>(bias_data),
                                 static_cast<size_t>(bias_num)};
+    PADDLE_ENFORCE_EQ(
+        x_num_col_dims == 1 || x_num_col_dims == 2, true,
+        platform::errors::InvalidArgument(
+            "Wrong x_num_col_dims param of op mul. Paddle-TRT FC converter "
+            "expects x_num_col_dims is either 1 or 2, but got %d",
+            x_num_col_dims));
 
     if (engine_->with_dynamic_shape()) {
-      regist_fc(X, n_output, weight, bias);
+      // not NCHW layout, but NLP layout with added 'x 1 x 1'
+      auto x_dim = X->getDimensions();
+      if (x_dim.nbDims == 3 || x_dim.nbDims == 2) {
+        auto output_name = op_desc.Output("Out").front();
+        // add shuffle before fc
+        nvinfer1::Dims before_reshape_dim;
+        before_reshape_dim.nbDims = x_dim.nbDims + 2;
+        for (int i = 0; i < x_dim.nbDims; i++) {
+          before_reshape_dim.d[i] = 0;
+        }
+        before_reshape_dim.d[x_dim.nbDims] = 1;
+        before_reshape_dim.d[x_dim.nbDims + 1] = 1;
+        auto* before_reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
+        before_reshape_layer->setReshapeDimensions(before_reshape_dim);
+        before_reshape_layer->setName(
+            ("fc_before_shuffle(Output: " + output_name + ")").c_str());
+
+        // add fc layer
+        auto* fc_layer = TRT_ENGINE_ADD_LAYER(
+            engine_, FullyConnected, *before_reshape_layer->getOutput(0),
+            n_output, weight.get(), bias.get());
+        fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());
+
+        // add shuffle after fc
+        nvinfer1::Dims after_reshape_dim;
+        if (x_dim.nbDims == 3) {
+          if (x_num_col_dims == 2) {
+            after_reshape_dim.nbDims = 3;
+            after_reshape_dim.d[0] = 0;
+            after_reshape_dim.d[1] = 0;
+            after_reshape_dim.d[2] = 0;
+          } else {
+            after_reshape_dim.nbDims = 2;
+            after_reshape_dim.d[0] = 0;
+            auto dim = fc_layer->getOutput(0)->getDimensions();
+            after_reshape_dim.d[1] = dim.d[1] * dim.d[2];
+          }
+          // x_dim.nbDims == 2
+        } else {
+          after_reshape_dim.nbDims = 2;
+          after_reshape_dim.d[0] = 0;
+          after_reshape_dim.d[1] = 0;
+        }
+        auto* after_reshape_layer =
+            TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
+        after_reshape_layer->setReshapeDimensions(after_reshape_dim);
+        after_reshape_layer->setName(
+            ("fc_after_shuffle(Output: " + output_name + ")").c_str());
+
+        if (activation_type == "relu") {
+          nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
+              engine_, Activation, *(after_reshape_layer->getOutput(0)),
+              nvinfer1::ActivationType::kRELU);
+          RreplenishLayerAndOutput(relu_layer, "fc_after_relu", {output_name},
+                                   test_mode);
+        } else {
+          RreplenishLayerAndOutput(after_reshape_layer, "fc_after_shuffle",
+                                   {output_name}, test_mode);
+        }
+      } else {
+        regist_fc(X, n_output, weight, bias);
+      }
       return;
     }
     // in order to handle situations in NLP models(input dims < 3,
@@ -154,12 +221,6 @@ class FcOpConverter : public OpConverter {
     auto input_d = X->getDimensions().d;
     int reshape_dim3[3] = {0};
     int reshape_dim4[4] = {0};
-    PADDLE_ENFORCE_EQ(
-        x_num_col_dims == 1 || x_num_col_dims == 2, true,
-        platform::errors::InvalidArgument(
-            "Wrong x_num_col_dims param of op mul. Paddle-TRT FC converter "
-            "expects x_num_col_dims is either 1 or 2, but got %d",
-            x_num_col_dims));
     PADDLE_ENFORCE_LE(x_num_col_dims, input_dims,
                       platform::errors::InvalidArgument(
                           "Params and input dims mismatch. Paddle-TRT FC "

diff --git a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
@@ -8,8 +8,8 @@ You may obtain a copy of the License at
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See
+the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
@@ -28,8 +28,13 @@ class MultiheadMatMulOpConverter : public OpConverter {
                "network structure";
     framework::OpDesc op_desc(op, nullptr);
     // Declare inputs
-    // Shouble be a 5 dims tensor.
+    // Shouble be a 3 dims tensor.
     auto* input = engine_->GetITensor(op_desc.Input("Input").front());
+    PADDLE_ENFORCE_EQ(input->getDimensions().nbDims, 3,
+                      platform::errors::InvalidArgument(
+                          "The Input dim of the MultiheadMatMul should be 3, "
+                          "but it's (%d) now.",
+                          input->getDimensions().nbDims));
 
     // fc weights and fc bias
     auto weight_name = op_desc.Input("W").front();
@@ -69,6 +74,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
     int head_number = BOOST_GET_CONST(int, op_desc.GetAttr("head_number"));
 
     nvinfer1::ILayer* layer = nullptr;
+    auto output_name = op_desc.Output("Out")[0];
 
     if (engine_->with_dynamic_shape()) {
       if (engine_->use_oss()) {
@@ -184,15 +190,37 @@ class MultiheadMatMulOpConverter : public OpConverter {
                                     static_cast<void*>(bias_data),
                                     static_cast<size_t>(bias_t->numel())};
 
-        auto* fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *input,
-                                              n, weight.get(), bias.get());
-        auto* fc_out = fc_layer->getOutput(0);
+        // add shuffle before fc
+        nvinfer1::Dims before_reshape_dim;
+        before_reshape_dim.nbDims = 5;
+        before_reshape_dim.d[0] = 0;
+        before_reshape_dim.d[1] = 0;
+        before_reshape_dim.d[2] = 0;
+        before_reshape_dim.d[3] = 1;
+        before_reshape_dim.d[4] = 1;
+        auto* before_reshape_layer =
+            TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
+        before_reshape_layer->setReshapeDimensions(before_reshape_dim);
+        before_reshape_layer->setName(
+            ("multihead_mamul_before_shuffle(Output: " + output_name + ")")
+                .c_str());
+
+        // add layer fc
+        auto* fc_layer = TRT_ENGINE_ADD_LAYER(
+            engine_, FullyConnected, *before_reshape_layer->getOutput(0), n,
+            weight.get(), bias.get());
+        fc_layer->setName(
+            ("multihead_mamul_fc(Output: " + output_name + ")").c_str());
+
+        // no need to add shuffle after fc, just change it in
+        // QkvToContextPluginDynamic
+
         // add qkv to context
         int head_size = hidden_out / head_number;
         float scale = BOOST_GET_CONST(float, op_desc.GetAttr("alpha"));
 
         std::vector<nvinfer1::ITensor*> plugin_inputs;
-        plugin_inputs.push_back(fc_out);
+        plugin_inputs.push_back(fc_layer->getOutput(0));
         plugin_inputs.push_back(input_bias_qk);
         bool with_fp16 =
             engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
@@ -208,7 +236,6 @@ class MultiheadMatMulOpConverter : public OpConverter {
           "You can use the config.SetTRTDynamicShapeInfo(...) interface to set "
           "the shape information to run the dynamic shape mode."));
     }
-    auto output_name = op_desc.Output("Out")[0];
     RreplenishLayerAndOutput(layer, "multihead_matmul", {output_name},
                              test_mode);
 #else

diff --git a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu
@@ -200,12 +200,10 @@ nvinfer1::DimsExprs EmbEltwiseLayernormPluginDynamic::getOutputDimensions(
                         "but it's (%d)",
                         output_index));
   nvinfer1::DimsExprs ret;
-  ret.nbDims = 5;
+  ret.nbDims = 3;
   ret.d[0] = inputs[0].d[0];
   ret.d[1] = inputs[0].d[1];
   ret.d[2] = expr_builder.constant(hidden_size_);
-  ret.d[3] = expr_builder.constant(1);
-  ret.d[4] = expr_builder.constant(1);
   return ret;
 }
 

diff --git a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu
@@ -169,12 +169,10 @@ nvinfer1::DimsExprs QkvToContextPluginDynamic::getOutputDimensions(
           "it has (%d) inputs",
           nb_inputs));
   nvinfer1::DimsExprs ret;
-  ret.nbDims = 5;
+  ret.nbDims = 3;
   ret.d[0] = inputs[0].d[0];
   ret.d[1] = inputs[0].d[1];
   ret.d[2] = expr_builder.constant(head_size_ * head_number_);
-  ret.d[3] = expr_builder.constant(1);
-  ret.d[4] = expr_builder.constant(1);
   return ret;
 }
 

diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu
@@ -54,11 +54,6 @@ void SkipLayerNormPluginDynamic::terminate() {
 nvinfer1::DimsExprs SkipLayerNormPluginDynamic::getOutputDimensions(
     int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs,
     nvinfer1::IExprBuilder &expr_builder) {
-  PADDLE_ENFORCE_EQ(
-      inputs[0].nbDims, 5,
-      platform::errors::InvalidArgument(
-          "The Input dim of the SkipLayernorm should be 5, but it's (%d) now.",
-          inputs[0].nbDims));
   return inputs[0];
 }