[x86] add x86 int8 conv framework-info (#6834)

* add x86 int8 conv framework-info. test=develop
PaddlePaddle · Sep 6, 2021 · 80b25fa · 80b25fa
1 parent f28e992
commit 80b25fa
Show file tree

Hide file tree

Showing 7 changed files with 259 additions and 28 deletions.
diff --git a/lite/api/cxx_api.cc b/lite/api/cxx_api.cc
@@ -358,8 +358,12 @@ void Predictor::Build(const std::shared_ptr<cpp::ProgramDesc> &program_desc,
   }
 
   if (IsQuantizedMode(program_desc_)) {
-    inner_places.insert(inner_places.begin(),
-                        Place{TARGET(kARM), PRECISION(kInt8)});
+    for (auto &valid_place : valid_places) {
+      if (valid_place.target == TARGET(kARM)) {
+        inner_places.insert(inner_places.begin(),
+                            Place{TARGET(kARM), PRECISION(kInt8)});
+      }
+    }
   }
 
   Program program(program_desc_, scope_, inner_places);

diff --git a/lite/kernels/arm/conv_depthwise.cc b/lite/kernels/arm/conv_depthwise.cc
@@ -278,6 +278,12 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
     param.activation_param.Relu_clipped_coef =
         param.activation_param.Relu_clipped_coef / param.output_scale;
   }
+  //! update leakyRelu parameter
+  if (param.activation_param.active_type ==
+      lite_api::ActivationType::kLeakyRelu) {
+    param.activation_param.Leaky_relu_alpha =
+        param.activation_param.Leaky_relu_alpha / param.output_scale;
+  }
 
   if (kw == 3) {
     ReInitWhenNeeded();

diff --git a/lite/kernels/arm/conv_gemmlike.cc b/lite/kernels/arm/conv_gemmlike.cc
@@ -84,6 +84,12 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
     param.activation_param.Relu_clipped_coef =
         param.activation_param.Relu_clipped_coef / param.output_scale;
   }
+  //! update leakyRelu parameter
+  if (param.activation_param.active_type ==
+      lite_api::ActivationType::kLeakyRelu) {
+    param.activation_param.Leaky_relu_alpha =
+        param.activation_param.Leaky_relu_alpha / param.output_scale;
+  }
 }
 
 PROFILE_INFO(kFloat, kFloat);

diff --git a/lite/kernels/x86/conv_compute.cc b/lite/kernels/x86/conv_compute.cc
@@ -40,32 +40,32 @@ namespace x86 {
   int n = hout * wout;                  \
   int k = chin * kw * kh / group;
 
+#define PREPARE_PARAM                                                         \
+  auto& param = this->Param<param_t>();                                       \
+  const int input_channel = param.x->dims()[1];                               \
+  const int output_channel = param.filter->dims()[0];                         \
+  const int groups = param.groups;                                            \
+  const int kernel_h = param.filter->dims()[2];                               \
+  const int kernel_w = param.filter->dims()[3];                               \
+  const int stride_h = param.strides[0];                                      \
+  const int stride_w = param.strides[1];                                      \
+  auto paddings = *param.paddings;                                            \
+  auto dilations = *param.dilations;                                          \
+  bool dw_kernel = (input_channel == groups && output_channel == groups);     \
+  bool ks_equal = (stride_h == stride_w) && (kernel_h == kernel_w);           \
+  bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1);              \
+  bool kps_equal = (paddings[0] == paddings[2]) && ks_equal;                  \
+  bool pads_equal =                                                           \
+      ((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));         \
+  bool flag_dw_3x3 =                                                          \
+      (kernel_h == 3) && (kernel_w == 3) && (stride_h == 1 || stride_h == 2); \
+  bool flag_dw_5x5 =                                                          \
+      (kernel_h == 5) && (kernel_w == 5) && (stride_h == 1 || stride_h == 2);
+
 template <>
 void Conv2dCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
-  auto& param = this->Param<param_t>();
-
-  const int input_channel = param.x->dims()[1];
-  const int output_channel = param.filter->dims()[0];
-  const int groups = param.groups;
-
-  const int kernel_h = param.filter->dims()[2];
-  const int kernel_w = param.filter->dims()[3];
-
-  const int stride_h = param.strides[0];
-  const int stride_w = param.strides[1];
-  auto paddings = *param.paddings;
-  auto dilations = *param.dilations;
-  bool dw_kernel = (input_channel == groups && output_channel == groups);
-  bool ks_equal = (stride_h == stride_w) && (kernel_h == kernel_w);
-  bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1);
-  bool kps_equal = (paddings[0] == paddings[2]) && ks_equal;
-  bool pads_equal =
-      ((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
-  bool flag_dw_3x3 =
-      (kernel_h == 3) && (kernel_w == 3) && (stride_h == 1 || stride_h == 2);
-  bool flag_dw_5x5 =
-      (kernel_h == 5) && (kernel_w == 5) && (stride_h == 1 || stride_h == 2);
-  // todo add conv_5x5_depthwise implement
+  PREPARE_PARAM
+  //! todo add conv_5x5_depthwise implement
   flag_dw_5x5 = false;
   bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
   if (kernel_w == 1 && stride_w == 1 && paddings[0] == 0 && kps_equal &&
@@ -75,7 +75,7 @@ void Conv2dCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
     flag_1x1gemm_ = false;
   }
 
-  /// select conv impl
+  //! select conv impl
   if (dw_kernel && kps_equal && no_dilation && flag_dw && (groups & 3) == 0) {
     impl_ = new DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>;
   }
@@ -165,12 +165,136 @@ void Conv2dCompute<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
                            n);
       }
     }
-    // bias and activate
+    //! bias and activate
     lite::x86::math::fill_bias_act(
         dout_batch, bias_ptr, chout, wout * hout, flag_bias, &act_param);
   }
   if (!flag_1x1gemm_) TargetFree(TARGET(kX86), col_data);
 }
+
+template <>
+void Conv2dCompute<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
+  PREPARE_PARAM
+  //! todo add conv_5x5_depthwise implement
+  flag_dw_5x5 = false;
+  bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
+  if (kernel_w == 1 && stride_w == 1 && paddings[0] == 0 && kps_equal &&
+      pads_equal) {
+    flag_1x1gemm_ = true;
+  } else {
+    flag_1x1gemm_ = false;
+  }
+
+  //! select conv impl
+  if (dw_kernel && kps_equal && no_dilation && flag_dw) {
+    impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>;
+  } else {
+    //! update scale
+    w_scale_ = param.weight_scale;
+    if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) {
+      LOG(FATAL) << "weights scale size must equal to filter size";
+      return;
+    }
+    if (w_scale_.size() == 1) {
+      for (int i = 0; i < param.filter->dims()[0] - 1; ++i) {
+        w_scale_.push_back(w_scale_[0]);
+      }
+    }
+    float input_scale = param.input_scale;
+    for (auto& ws : w_scale_) {
+      ws *= input_scale;
+    }
+  }
+
+  if (impl_) {
+    impl_->SetContext(std::move(this->ctx_));
+    impl_->SetParam(param);
+    impl_->PrepareForRun();
+    is_first_epoch_ = false;
+  }
+}
+
+template <>
+void Conv2dCompute<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
+  if (impl_) {
+    return impl_->Run();
+  }
+  //! todo add int8 gemm
+}
+
+template <>
+void Conv2dCompute<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
+  PREPARE_PARAM
+  // todo add conv_5x5_depthwise implement
+  flag_dw_5x5 = false;
+  bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
+  if (kernel_w == 1 && stride_w == 1 && paddings[0] == 0 && kps_equal &&
+      pads_equal) {
+    flag_1x1gemm_ = true;
+  } else {
+    flag_1x1gemm_ = false;
+  }
+
+  //! select conv impl
+  if (dw_kernel && kps_equal && no_dilation && flag_dw) {
+    impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>;
+  } else {
+    //! update scale
+    w_scale_ = param.weight_scale;
+    if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) {
+      LOG(FATAL) << "weights scale size must equal to filter size";
+      return;
+    }
+    if (w_scale_.size() == 1) {
+      for (int i = 0; i < param.filter->dims()[0] - 1; ++i) {
+        w_scale_.push_back(w_scale_[0]);
+      }
+    }
+    float input_scale = param.input_scale;
+    float output_scale = param.output_scale;
+    for (auto& ws : w_scale_) {
+      ws = ws * input_scale / output_scale;
+    }
+    //!  update bias
+    if (param.bias) {
+      bias_.Resize(param.bias->dims());
+      auto ptr = bias_.mutable_data<float>();
+      auto ptr_in = param.bias->data<float>();
+      for (int i = 0; i < bias_.numel(); ++i) {
+        ptr[i] = ptr_in[i] / param.output_scale;
+      }
+      flag_trans_bias_ = true;
+    }
+    //! update relu6 parameter
+    if (param.activation_param.active_type ==
+        lite_api::ActivationType::kRelu6) {
+      param.activation_param.Relu_clipped_coef =
+          param.activation_param.Relu_clipped_coef / param.output_scale;
+    }
+    //! update leakyRelu parameter
+    if (param.activation_param.active_type ==
+        lite_api::ActivationType::kLeakyRelu) {
+      param.activation_param.Leaky_relu_alpha =
+          param.activation_param.Leaky_relu_alpha / param.output_scale;
+    }
+  }
+
+  if (impl_) {
+    impl_->SetContext(std::move(this->ctx_));
+    impl_->SetParam(param);
+    impl_->PrepareForRun();
+    is_first_epoch_ = false;
+  }
+}
+
+template <>
+void Conv2dCompute<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
+  if (impl_) {
+    return impl_->Run();
+  }
+  //! todo add int8 gemm
+}
+#undef PREPARE_PARAM
 #undef INIT_PARAM
 }  // namespace x86
 }  // namespace kernels

diff --git a/lite/kernels/x86/conv_compute.h b/lite/kernels/x86/conv_compute.h
@@ -76,6 +76,10 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), Ptype> {
   using param_t = operators::ConvParam;
   KernelLite<TARGET(kX86), Ptype>* impl_{nullptr};
   bool flag_1x1gemm_{false};
+  bool flag_trans_bias_{true};
+  std::vector<float> w_scale_;
+  Tensor weights_;
+  Tensor bias_;
 };
 
 }  // namespace x86

diff --git a/lite/kernels/x86/conv_depthwise.cc b/lite/kernels/x86/conv_depthwise.cc
@@ -22,6 +22,9 @@ namespace lite {
 namespace kernels {
 namespace x86 {
 
+template <>
+void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {}
+
 template <>
 void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
   auto& param = this->Param<param_t>();
@@ -144,6 +147,86 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
 
 PROFILE_INFO(kFloat, kFloat)
 
+template <>
+void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
+  auto& param = this->Param<param_t>();
+
+  //! update scale
+  w_scale_ = param.weight_scale;
+  if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) {
+    LOG(FATAL) << "weights scale size must equal to filter size";
+    return;
+  }
+  if (w_scale_.size() == 1) {
+    for (int i = 0; i < param.filter->dims()[0] - 1; ++i) {
+      w_scale_.push_back(w_scale_[0]);
+    }
+  }
+  float input_scale = param.input_scale;
+  for (auto& ws : w_scale_) {
+    ws *= input_scale;
+  }
+}
+
+#define CONV_DW_INT8_PARAM                                                 \
+  o_data, i_data, w_data, b_data, bs, ic, iw, ih, oh, ow, flag_act, alpha, \
+      w_scale_, ctx
+template <>
+void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
+  //! todo add implementation
+}
+
+PROFILE_INFO(kInt8, kFloat)
+
+template <>
+void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
+  auto& param = this->Param<param_t>();
+
+  //! update scale
+  w_scale_ = param.weight_scale;
+  if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) {
+    LOG(FATAL) << "weights scale size must equal to filter size";
+    return;
+  }
+  if (w_scale_.size() == 1) {
+    for (int i = 0; i < param.filter->dims()[0] - 1; ++i) {
+      w_scale_.push_back(w_scale_[0]);
+    }
+  }
+  float input_scale = param.input_scale;
+  for (auto& ws : w_scale_) {
+    ws *= input_scale;
+  }
+  //!  update bias
+  if (param.bias) {
+    bias_.Resize(param.bias->dims());
+    auto ptr = bias_.mutable_data<float>();
+    auto ptr_in = param.bias->data<float>();
+    for (int i = 0; i < bias_.numel(); ++i) {
+      ptr[i] = ptr_in[i] / param.output_scale;
+    }
+    flag_trans_bias_ = true;
+  }
+  //! update relu6 parameter
+  if (param.activation_param.active_type == lite_api::ActivationType::kRelu6) {
+    param.activation_param.Relu_clipped_coef =
+        param.activation_param.Relu_clipped_coef / param.output_scale;
+  }
+  //! update leakyRelu parameter
+  if (param.activation_param.active_type ==
+      lite_api::ActivationType::kLeakyRelu) {
+    param.activation_param.Leaky_relu_alpha =
+        param.activation_param.Leaky_relu_alpha / param.output_scale;
+  }
+}
+
+template <>
+void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
+  //! todo add implementation
+}
+
+PROFILE_INFO(kInt8, kInt8)
+#undef CONV_DW_INT8_PARAM
 }  // namespace x86
 }  // namespace kernels
 }  // namespace lite

diff --git a/lite/kernels/x86/conv_depthwise.h b/lite/kernels/x86/conv_depthwise.h
@@ -29,6 +29,7 @@ class DepthwiseConv : public KernelLite<TARGET(kX86), Ptype> {
  public:
   DepthwiseConv() = default;
   ~DepthwiseConv() {}
+  void PrepareForRun() override;
   virtual void Run();
 
 #ifdef LITE_WITH_PROFILE
@@ -58,6 +59,9 @@ class DepthwiseConv : public KernelLite<TARGET(kX86), Ptype> {
   Tensor input_padding_;
   Tensor filter_pack_;
   Tensor output_pack_;
+  bool flag_trans_bias_{true};
+  std::vector<float> w_scale_;
+  Tensor bias_;
 };
 
 }  // namespace x86