Skip to content

Commit

Permalink
move padding shape x 1 x 1 in ernie to qkv and fc
Browse files Browse the repository at this point in the history
  • Loading branch information
shangzhizhou committed Mar 19, 2021
1 parent ab77eb2 commit 70cd7e3
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 26 deletions.
75 changes: 68 additions & 7 deletions paddle/fluid/inference/tensorrt/convert/fc_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,76 @@ class FcOpConverter : public OpConverter {
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT,
static_cast<void*>(bias_data),
static_cast<size_t>(bias_num)};
PADDLE_ENFORCE_EQ(
x_num_col_dims == 1 || x_num_col_dims == 2, true,
platform::errors::InvalidArgument(
"Wrong x_num_col_dims param of op mul. Paddle-TRT FC converter "
"expects x_num_col_dims is either 1 or 2, but got %d",
x_num_col_dims));

if (engine_->with_dynamic_shape()) {
regist_fc(X, n_output, weight, bias);
// not NCHW layout, but NLP layout with added 'x 1 x 1'
auto x_dim = X->getDimensions();
if (x_dim.nbDims == 3 || x_dim.nbDims == 2) {
auto output_name = op_desc.Output("Out").front();
// add shuffle before fc
nvinfer1::Dims before_reshape_dim;
before_reshape_dim.nbDims = x_dim.nbDims + 2;
for (int i = 0; i < x_dim.nbDims; i++) {
before_reshape_dim.d[i] = 0;
}
before_reshape_dim.d[x_dim.nbDims] = 1;
before_reshape_dim.d[x_dim.nbDims + 1] = 1;
auto* before_reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
before_reshape_layer->setReshapeDimensions(before_reshape_dim);
before_reshape_layer->setName(
("fc_before_shuffle(Output: " + output_name + ")").c_str());

// add fc layer
auto* fc_layer = TRT_ENGINE_ADD_LAYER(
engine_, FullyConnected, *before_reshape_layer->getOutput(0),
n_output, weight.get(), bias.get());
fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());

// add shuffle after fc
nvinfer1::Dims after_reshape_dim;
if (x_dim.nbDims == 3) {
if (x_num_col_dims == 2) {
after_reshape_dim.nbDims = 3;
after_reshape_dim.d[0] = 0;
after_reshape_dim.d[1] = 0;
after_reshape_dim.d[2] = 0;
} else {
after_reshape_dim.nbDims = 2;
after_reshape_dim.d[0] = 0;
auto dim = fc_layer->getOutput(0)->getDimensions();
after_reshape_dim.d[1] = dim.d[1] * dim.d[2];
}
// x_dim.nbDims == 2
} else {
after_reshape_dim.nbDims = 2;
after_reshape_dim.d[0] = 0;
after_reshape_dim.d[1] = 0;
}
auto* after_reshape_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
after_reshape_layer->setReshapeDimensions(after_reshape_dim);
after_reshape_layer->setName(
("fc_after_shuffle(Output: " + output_name + ")").c_str());

if (activation_type == "relu") {
nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(after_reshape_layer->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer, "fc_after_relu", {output_name},
test_mode);
} else {
RreplenishLayerAndOutput(after_reshape_layer, "fc_after_shuffle",
{output_name}, test_mode);
}
} else {
regist_fc(X, n_output, weight, bias);
}
return;
}
// in order to handle situations in NLP models(input dims < 3,
Expand All @@ -154,12 +221,6 @@ class FcOpConverter : public OpConverter {
auto input_d = X->getDimensions().d;
int reshape_dim3[3] = {0};
int reshape_dim4[4] = {0};
PADDLE_ENFORCE_EQ(
x_num_col_dims == 1 || x_num_col_dims == 2, true,
platform::errors::InvalidArgument(
"Wrong x_num_col_dims param of op mul. Paddle-TRT FC converter "
"expects x_num_col_dims is either 1 or 2, but got %d",
x_num_col_dims));
PADDLE_ENFORCE_LE(x_num_col_dims, input_dims,
platform::errors::InvalidArgument(
"Params and input dims mismatch. Paddle-TRT FC "
Expand Down
43 changes: 35 additions & 8 deletions paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See
the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
Expand All @@ -28,8 +28,13 @@ class MultiheadMatMulOpConverter : public OpConverter {
"network structure";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
// Shouble be a 5 dims tensor.
// Shouble be a 3 dims tensor.
auto* input = engine_->GetITensor(op_desc.Input("Input").front());
PADDLE_ENFORCE_EQ(input->getDimensions().nbDims, 3,
platform::errors::InvalidArgument(
"The Input dim of the MultiheadMatMul should be 3, "
"but it's (%d) now.",
input->getDimensions().nbDims));

// fc weights and fc bias
auto weight_name = op_desc.Input("W").front();
Expand Down Expand Up @@ -69,6 +74,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
int head_number = BOOST_GET_CONST(int, op_desc.GetAttr("head_number"));

nvinfer1::ILayer* layer = nullptr;
auto output_name = op_desc.Output("Out")[0];

if (engine_->with_dynamic_shape()) {
if (engine_->use_oss()) {
Expand Down Expand Up @@ -184,15 +190,37 @@ class MultiheadMatMulOpConverter : public OpConverter {
static_cast<void*>(bias_data),
static_cast<size_t>(bias_t->numel())};

auto* fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *input,
n, weight.get(), bias.get());
auto* fc_out = fc_layer->getOutput(0);
// add shuffle before fc
nvinfer1::Dims before_reshape_dim;
before_reshape_dim.nbDims = 5;
before_reshape_dim.d[0] = 0;
before_reshape_dim.d[1] = 0;
before_reshape_dim.d[2] = 0;
before_reshape_dim.d[3] = 1;
before_reshape_dim.d[4] = 1;
auto* before_reshape_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
before_reshape_layer->setReshapeDimensions(before_reshape_dim);
before_reshape_layer->setName(
("multihead_mamul_before_shuffle(Output: " + output_name + ")")
.c_str());

// add layer fc
auto* fc_layer = TRT_ENGINE_ADD_LAYER(
engine_, FullyConnected, *before_reshape_layer->getOutput(0), n,
weight.get(), bias.get());
fc_layer->setName(
("multihead_mamul_fc(Output: " + output_name + ")").c_str());

// no need to add shuffle after fc, just change it in
// QkvToContextPluginDynamic

// add qkv to context
int head_size = hidden_out / head_number;
float scale = BOOST_GET_CONST(float, op_desc.GetAttr("alpha"));

std::vector<nvinfer1::ITensor*> plugin_inputs;
plugin_inputs.push_back(fc_out);
plugin_inputs.push_back(fc_layer->getOutput(0));
plugin_inputs.push_back(input_bias_qk);
bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
Expand All @@ -208,7 +236,6 @@ class MultiheadMatMulOpConverter : public OpConverter {
"You can use the config.SetTRTDynamicShapeInfo(...) interface to set "
"the shape information to run the dynamic shape mode."));
}
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "multihead_matmul", {output_name},
test_mode);
#else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,10 @@ nvinfer1::DimsExprs EmbEltwiseLayernormPluginDynamic::getOutputDimensions(
"but it's (%d)",
output_index));
nvinfer1::DimsExprs ret;
ret.nbDims = 5;
ret.nbDims = 3;
ret.d[0] = inputs[0].d[0];
ret.d[1] = inputs[0].d[1];
ret.d[2] = expr_builder.constant(hidden_size_);
ret.d[3] = expr_builder.constant(1);
ret.d[4] = expr_builder.constant(1);
return ret;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,10 @@ nvinfer1::DimsExprs QkvToContextPluginDynamic::getOutputDimensions(
"it has (%d) inputs",
nb_inputs));
nvinfer1::DimsExprs ret;
ret.nbDims = 5;
ret.nbDims = 3;
ret.d[0] = inputs[0].d[0];
ret.d[1] = inputs[0].d[1];
ret.d[2] = expr_builder.constant(head_size_ * head_number_);
ret.d[3] = expr_builder.constant(1);
ret.d[4] = expr_builder.constant(1);
return ret;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,6 @@ void SkipLayerNormPluginDynamic::terminate() {
nvinfer1::DimsExprs SkipLayerNormPluginDynamic::getOutputDimensions(
int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs,
nvinfer1::IExprBuilder &expr_builder) {
PADDLE_ENFORCE_EQ(
inputs[0].nbDims, 5,
platform::errors::InvalidArgument(
"The Input dim of the SkipLayernorm should be 5, but it's (%d) now.",
inputs[0].nbDims));
return inputs[0];
}

Expand Down

0 comments on commit 70cd7e3

Please sign in to comment.