From 80b25faa959e58c1e339d1f8ca2cc75ed1d33f25 Mon Sep 17 00:00:00 2001 From: HappyAngel Date: Mon, 6 Sep 2021 14:45:49 +0800 Subject: [PATCH] [x86] add x86 int8 conv framework-info (#6834) * add x86 int8 conv framework-info. test=develop --- lite/api/cxx_api.cc | 8 +- lite/kernels/arm/conv_depthwise.cc | 6 + lite/kernels/arm/conv_gemmlike.cc | 6 + lite/kernels/x86/conv_compute.cc | 176 ++++++++++++++++++++++++----- lite/kernels/x86/conv_compute.h | 4 + lite/kernels/x86/conv_depthwise.cc | 83 ++++++++++++++ lite/kernels/x86/conv_depthwise.h | 4 + 7 files changed, 259 insertions(+), 28 deletions(-) mode change 100755 => 100644 lite/kernels/arm/conv_depthwise.cc diff --git a/lite/api/cxx_api.cc b/lite/api/cxx_api.cc index f0a3ff6481b..34356be9b0d 100644 --- a/lite/api/cxx_api.cc +++ b/lite/api/cxx_api.cc @@ -358,8 +358,12 @@ void Predictor::Build(const std::shared_ptr &program_desc, } if (IsQuantizedMode(program_desc_)) { - inner_places.insert(inner_places.begin(), - Place{TARGET(kARM), PRECISION(kInt8)}); + for (auto &valid_place : valid_places) { + if (valid_place.target == TARGET(kARM)) { + inner_places.insert(inner_places.begin(), + Place{TARGET(kARM), PRECISION(kInt8)}); + } + } } Program program(program_desc_, scope_, inner_places); diff --git a/lite/kernels/arm/conv_depthwise.cc b/lite/kernels/arm/conv_depthwise.cc old mode 100755 new mode 100644 index 65aae5af1f2..51824951532 --- a/lite/kernels/arm/conv_depthwise.cc +++ b/lite/kernels/arm/conv_depthwise.cc @@ -278,6 +278,12 @@ void DepthwiseConv::PrepareForRun() { param.activation_param.Relu_clipped_coef = param.activation_param.Relu_clipped_coef / param.output_scale; } + //! update leakyRelu parameter + if (param.activation_param.active_type == + lite_api::ActivationType::kLeakyRelu) { + param.activation_param.Leaky_relu_alpha = + param.activation_param.Leaky_relu_alpha / param.output_scale; + } if (kw == 3) { ReInitWhenNeeded(); diff --git a/lite/kernels/arm/conv_gemmlike.cc b/lite/kernels/arm/conv_gemmlike.cc index 107fe16ab0d..ebd7a1da4c1 100644 --- a/lite/kernels/arm/conv_gemmlike.cc +++ b/lite/kernels/arm/conv_gemmlike.cc @@ -84,6 +84,12 @@ void GemmLikeConv::PrepareForRun() { param.activation_param.Relu_clipped_coef = param.activation_param.Relu_clipped_coef / param.output_scale; } + //! update leakyRelu parameter + if (param.activation_param.active_type == + lite_api::ActivationType::kLeakyRelu) { + param.activation_param.Leaky_relu_alpha = + param.activation_param.Leaky_relu_alpha / param.output_scale; + } } PROFILE_INFO(kFloat, kFloat); diff --git a/lite/kernels/x86/conv_compute.cc b/lite/kernels/x86/conv_compute.cc index e0c8e1987ae..4cedfde4522 100644 --- a/lite/kernels/x86/conv_compute.cc +++ b/lite/kernels/x86/conv_compute.cc @@ -40,32 +40,32 @@ namespace x86 { int n = hout * wout; \ int k = chin * kw * kh / group; +#define PREPARE_PARAM \ + auto& param = this->Param(); \ + const int input_channel = param.x->dims()[1]; \ + const int output_channel = param.filter->dims()[0]; \ + const int groups = param.groups; \ + const int kernel_h = param.filter->dims()[2]; \ + const int kernel_w = param.filter->dims()[3]; \ + const int stride_h = param.strides[0]; \ + const int stride_w = param.strides[1]; \ + auto paddings = *param.paddings; \ + auto dilations = *param.dilations; \ + bool dw_kernel = (input_channel == groups && output_channel == groups); \ + bool ks_equal = (stride_h == stride_w) && (kernel_h == kernel_w); \ + bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1); \ + bool kps_equal = (paddings[0] == paddings[2]) && ks_equal; \ + bool pads_equal = \ + ((paddings[0] == paddings[1]) && (paddings[2] == paddings[3])); \ + bool flag_dw_3x3 = \ + (kernel_h == 3) && (kernel_w == 3) && (stride_h == 1 || stride_h == 2); \ + bool flag_dw_5x5 = \ + (kernel_h == 5) && (kernel_w == 5) && (stride_h == 1 || stride_h == 2); + template <> void Conv2dCompute::PrepareForRun() { - auto& param = this->Param(); - - const int input_channel = param.x->dims()[1]; - const int output_channel = param.filter->dims()[0]; - const int groups = param.groups; - - const int kernel_h = param.filter->dims()[2]; - const int kernel_w = param.filter->dims()[3]; - - const int stride_h = param.strides[0]; - const int stride_w = param.strides[1]; - auto paddings = *param.paddings; - auto dilations = *param.dilations; - bool dw_kernel = (input_channel == groups && output_channel == groups); - bool ks_equal = (stride_h == stride_w) && (kernel_h == kernel_w); - bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1); - bool kps_equal = (paddings[0] == paddings[2]) && ks_equal; - bool pads_equal = - ((paddings[0] == paddings[1]) && (paddings[2] == paddings[3])); - bool flag_dw_3x3 = - (kernel_h == 3) && (kernel_w == 3) && (stride_h == 1 || stride_h == 2); - bool flag_dw_5x5 = - (kernel_h == 5) && (kernel_w == 5) && (stride_h == 1 || stride_h == 2); - // todo add conv_5x5_depthwise implement + PREPARE_PARAM + //! todo add conv_5x5_depthwise implement flag_dw_5x5 = false; bool flag_dw = flag_dw_3x3 || flag_dw_5x5; if (kernel_w == 1 && stride_w == 1 && paddings[0] == 0 && kps_equal && @@ -75,7 +75,7 @@ void Conv2dCompute::PrepareForRun() { flag_1x1gemm_ = false; } - /// select conv impl + //! select conv impl if (dw_kernel && kps_equal && no_dilation && flag_dw && (groups & 3) == 0) { impl_ = new DepthwiseConv; } @@ -165,12 +165,136 @@ void Conv2dCompute::Run() { n); } } - // bias and activate + //! bias and activate lite::x86::math::fill_bias_act( dout_batch, bias_ptr, chout, wout * hout, flag_bias, &act_param); } if (!flag_1x1gemm_) TargetFree(TARGET(kX86), col_data); } + +template <> +void Conv2dCompute::PrepareForRun() { + PREPARE_PARAM + //! todo add conv_5x5_depthwise implement + flag_dw_5x5 = false; + bool flag_dw = flag_dw_3x3 || flag_dw_5x5; + if (kernel_w == 1 && stride_w == 1 && paddings[0] == 0 && kps_equal && + pads_equal) { + flag_1x1gemm_ = true; + } else { + flag_1x1gemm_ = false; + } + + //! select conv impl + if (dw_kernel && kps_equal && no_dilation && flag_dw) { + impl_ = new DepthwiseConv; + } else { + //! update scale + w_scale_ = param.weight_scale; + if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) { + LOG(FATAL) << "weights scale size must equal to filter size"; + return; + } + if (w_scale_.size() == 1) { + for (int i = 0; i < param.filter->dims()[0] - 1; ++i) { + w_scale_.push_back(w_scale_[0]); + } + } + float input_scale = param.input_scale; + for (auto& ws : w_scale_) { + ws *= input_scale; + } + } + + if (impl_) { + impl_->SetContext(std::move(this->ctx_)); + impl_->SetParam(param); + impl_->PrepareForRun(); + is_first_epoch_ = false; + } +} + +template <> +void Conv2dCompute::Run() { + if (impl_) { + return impl_->Run(); + } + //! todo add int8 gemm +} + +template <> +void Conv2dCompute::PrepareForRun() { + PREPARE_PARAM + // todo add conv_5x5_depthwise implement + flag_dw_5x5 = false; + bool flag_dw = flag_dw_3x3 || flag_dw_5x5; + if (kernel_w == 1 && stride_w == 1 && paddings[0] == 0 && kps_equal && + pads_equal) { + flag_1x1gemm_ = true; + } else { + flag_1x1gemm_ = false; + } + + //! select conv impl + if (dw_kernel && kps_equal && no_dilation && flag_dw) { + impl_ = new DepthwiseConv; + } else { + //! update scale + w_scale_ = param.weight_scale; + if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) { + LOG(FATAL) << "weights scale size must equal to filter size"; + return; + } + if (w_scale_.size() == 1) { + for (int i = 0; i < param.filter->dims()[0] - 1; ++i) { + w_scale_.push_back(w_scale_[0]); + } + } + float input_scale = param.input_scale; + float output_scale = param.output_scale; + for (auto& ws : w_scale_) { + ws = ws * input_scale / output_scale; + } + //! update bias + if (param.bias) { + bias_.Resize(param.bias->dims()); + auto ptr = bias_.mutable_data(); + auto ptr_in = param.bias->data(); + for (int i = 0; i < bias_.numel(); ++i) { + ptr[i] = ptr_in[i] / param.output_scale; + } + flag_trans_bias_ = true; + } + //! update relu6 parameter + if (param.activation_param.active_type == + lite_api::ActivationType::kRelu6) { + param.activation_param.Relu_clipped_coef = + param.activation_param.Relu_clipped_coef / param.output_scale; + } + //! update leakyRelu parameter + if (param.activation_param.active_type == + lite_api::ActivationType::kLeakyRelu) { + param.activation_param.Leaky_relu_alpha = + param.activation_param.Leaky_relu_alpha / param.output_scale; + } + } + + if (impl_) { + impl_->SetContext(std::move(this->ctx_)); + impl_->SetParam(param); + impl_->PrepareForRun(); + is_first_epoch_ = false; + } +} + +template <> +void Conv2dCompute::Run() { + if (impl_) { + return impl_->Run(); + } + //! todo add int8 gemm +} +#undef PREPARE_PARAM #undef INIT_PARAM } // namespace x86 } // namespace kernels diff --git a/lite/kernels/x86/conv_compute.h b/lite/kernels/x86/conv_compute.h index 5f124668e9c..26f9a6da2d6 100644 --- a/lite/kernels/x86/conv_compute.h +++ b/lite/kernels/x86/conv_compute.h @@ -76,6 +76,10 @@ class Conv2dCompute : public KernelLite { using param_t = operators::ConvParam; KernelLite* impl_{nullptr}; bool flag_1x1gemm_{false}; + bool flag_trans_bias_{true}; + std::vector w_scale_; + Tensor weights_; + Tensor bias_; }; } // namespace x86 diff --git a/lite/kernels/x86/conv_depthwise.cc b/lite/kernels/x86/conv_depthwise.cc index ab54521c6fa..5ba0d498b16 100644 --- a/lite/kernels/x86/conv_depthwise.cc +++ b/lite/kernels/x86/conv_depthwise.cc @@ -22,6 +22,9 @@ namespace lite { namespace kernels { namespace x86 { +template <> +void DepthwiseConv::PrepareForRun() {} + template <> void DepthwiseConv::Run() { auto& param = this->Param(); @@ -144,6 +147,86 @@ void DepthwiseConv::Run() { PROFILE_INFO(kFloat, kFloat) +template <> +void DepthwiseConv::PrepareForRun() { + auto& param = this->Param(); + + //! update scale + w_scale_ = param.weight_scale; + if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) { + LOG(FATAL) << "weights scale size must equal to filter size"; + return; + } + if (w_scale_.size() == 1) { + for (int i = 0; i < param.filter->dims()[0] - 1; ++i) { + w_scale_.push_back(w_scale_[0]); + } + } + float input_scale = param.input_scale; + for (auto& ws : w_scale_) { + ws *= input_scale; + } +} + +#define CONV_DW_INT8_PARAM \ + o_data, i_data, w_data, b_data, bs, ic, iw, ih, oh, ow, flag_act, alpha, \ + w_scale_, ctx +template <> +void DepthwiseConv::Run() { + //! todo add implementation +} + +PROFILE_INFO(kInt8, kFloat) + +template <> +void DepthwiseConv::PrepareForRun() { + auto& param = this->Param(); + + //! update scale + w_scale_ = param.weight_scale; + if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) { + LOG(FATAL) << "weights scale size must equal to filter size"; + return; + } + if (w_scale_.size() == 1) { + for (int i = 0; i < param.filter->dims()[0] - 1; ++i) { + w_scale_.push_back(w_scale_[0]); + } + } + float input_scale = param.input_scale; + for (auto& ws : w_scale_) { + ws *= input_scale; + } + //! update bias + if (param.bias) { + bias_.Resize(param.bias->dims()); + auto ptr = bias_.mutable_data(); + auto ptr_in = param.bias->data(); + for (int i = 0; i < bias_.numel(); ++i) { + ptr[i] = ptr_in[i] / param.output_scale; + } + flag_trans_bias_ = true; + } + //! update relu6 parameter + if (param.activation_param.active_type == lite_api::ActivationType::kRelu6) { + param.activation_param.Relu_clipped_coef = + param.activation_param.Relu_clipped_coef / param.output_scale; + } + //! update leakyRelu parameter + if (param.activation_param.active_type == + lite_api::ActivationType::kLeakyRelu) { + param.activation_param.Leaky_relu_alpha = + param.activation_param.Leaky_relu_alpha / param.output_scale; + } +} + +template <> +void DepthwiseConv::Run() { + //! todo add implementation +} + +PROFILE_INFO(kInt8, kInt8) +#undef CONV_DW_INT8_PARAM } // namespace x86 } // namespace kernels } // namespace lite diff --git a/lite/kernels/x86/conv_depthwise.h b/lite/kernels/x86/conv_depthwise.h index 6e78aa9b400..cefefb8ee9b 100644 --- a/lite/kernels/x86/conv_depthwise.h +++ b/lite/kernels/x86/conv_depthwise.h @@ -29,6 +29,7 @@ class DepthwiseConv : public KernelLite { public: DepthwiseConv() = default; ~DepthwiseConv() {} + void PrepareForRun() override; virtual void Run(); #ifdef LITE_WITH_PROFILE @@ -58,6 +59,9 @@ class DepthwiseConv : public KernelLite { Tensor input_padding_; Tensor filter_pack_; Tensor output_pack_; + bool flag_trans_bias_{true}; + std::vector w_scale_; + Tensor bias_; }; } // namespace x86