Skip to content

Commit

Permalink
[x86] add x86 int8 conv framework-info (#6834)
Browse files Browse the repository at this point in the history
* add x86 int8 conv framework-info. test=develop
  • Loading branch information
chenjiaoAngel authored Sep 6, 2021
1 parent f28e992 commit 80b25fa
Show file tree
Hide file tree
Showing 7 changed files with 259 additions and 28 deletions.
8 changes: 6 additions & 2 deletions lite/api/cxx_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,12 @@ void Predictor::Build(const std::shared_ptr<cpp::ProgramDesc> &program_desc,
}

if (IsQuantizedMode(program_desc_)) {
inner_places.insert(inner_places.begin(),
Place{TARGET(kARM), PRECISION(kInt8)});
for (auto &valid_place : valid_places) {
if (valid_place.target == TARGET(kARM)) {
inner_places.insert(inner_places.begin(),
Place{TARGET(kARM), PRECISION(kInt8)});
}
}
}

Program program(program_desc_, scope_, inner_places);
Expand Down
6 changes: 6 additions & 0 deletions lite/kernels/arm/conv_depthwise.cc
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,12 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
param.activation_param.Relu_clipped_coef =
param.activation_param.Relu_clipped_coef / param.output_scale;
}
//! update leakyRelu parameter
if (param.activation_param.active_type ==
lite_api::ActivationType::kLeakyRelu) {
param.activation_param.Leaky_relu_alpha =
param.activation_param.Leaky_relu_alpha / param.output_scale;
}

if (kw == 3) {
ReInitWhenNeeded();
Expand Down
6 changes: 6 additions & 0 deletions lite/kernels/arm/conv_gemmlike.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ void GemmLikeConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
param.activation_param.Relu_clipped_coef =
param.activation_param.Relu_clipped_coef / param.output_scale;
}
//! update leakyRelu parameter
if (param.activation_param.active_type ==
lite_api::ActivationType::kLeakyRelu) {
param.activation_param.Leaky_relu_alpha =
param.activation_param.Leaky_relu_alpha / param.output_scale;
}
}

PROFILE_INFO(kFloat, kFloat);
Expand Down
176 changes: 150 additions & 26 deletions lite/kernels/x86/conv_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,32 +40,32 @@ namespace x86 {
int n = hout * wout; \
int k = chin * kw * kh / group;

#define PREPARE_PARAM \
auto& param = this->Param<param_t>(); \
const int input_channel = param.x->dims()[1]; \
const int output_channel = param.filter->dims()[0]; \
const int groups = param.groups; \
const int kernel_h = param.filter->dims()[2]; \
const int kernel_w = param.filter->dims()[3]; \
const int stride_h = param.strides[0]; \
const int stride_w = param.strides[1]; \
auto paddings = *param.paddings; \
auto dilations = *param.dilations; \
bool dw_kernel = (input_channel == groups && output_channel == groups); \
bool ks_equal = (stride_h == stride_w) && (kernel_h == kernel_w); \
bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1); \
bool kps_equal = (paddings[0] == paddings[2]) && ks_equal; \
bool pads_equal = \
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3])); \
bool flag_dw_3x3 = \
(kernel_h == 3) && (kernel_w == 3) && (stride_h == 1 || stride_h == 2); \
bool flag_dw_5x5 = \
(kernel_h == 5) && (kernel_w == 5) && (stride_h == 1 || stride_h == 2);

template <>
void Conv2dCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
auto& param = this->Param<param_t>();

const int input_channel = param.x->dims()[1];
const int output_channel = param.filter->dims()[0];
const int groups = param.groups;

const int kernel_h = param.filter->dims()[2];
const int kernel_w = param.filter->dims()[3];

const int stride_h = param.strides[0];
const int stride_w = param.strides[1];
auto paddings = *param.paddings;
auto dilations = *param.dilations;
bool dw_kernel = (input_channel == groups && output_channel == groups);
bool ks_equal = (stride_h == stride_w) && (kernel_h == kernel_w);
bool no_dilation = (dilations[0] == 1) && (dilations[1] == 1);
bool kps_equal = (paddings[0] == paddings[2]) && ks_equal;
bool pads_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
bool flag_dw_3x3 =
(kernel_h == 3) && (kernel_w == 3) && (stride_h == 1 || stride_h == 2);
bool flag_dw_5x5 =
(kernel_h == 5) && (kernel_w == 5) && (stride_h == 1 || stride_h == 2);
// todo add conv_5x5_depthwise implement
PREPARE_PARAM
//! todo add conv_5x5_depthwise implement
flag_dw_5x5 = false;
bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
if (kernel_w == 1 && stride_w == 1 && paddings[0] == 0 && kps_equal &&
Expand All @@ -75,7 +75,7 @@ void Conv2dCompute<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {
flag_1x1gemm_ = false;
}

/// select conv impl
//! select conv impl
if (dw_kernel && kps_equal && no_dilation && flag_dw && (groups & 3) == 0) {
impl_ = new DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>;
}
Expand Down Expand Up @@ -165,12 +165,136 @@ void Conv2dCompute<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
n);
}
}
// bias and activate
//! bias and activate
lite::x86::math::fill_bias_act(
dout_batch, bias_ptr, chout, wout * hout, flag_bias, &act_param);
}
if (!flag_1x1gemm_) TargetFree(TARGET(kX86), col_data);
}

template <>
void Conv2dCompute<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
PREPARE_PARAM
//! todo add conv_5x5_depthwise implement
flag_dw_5x5 = false;
bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
if (kernel_w == 1 && stride_w == 1 && paddings[0] == 0 && kps_equal &&
pads_equal) {
flag_1x1gemm_ = true;
} else {
flag_1x1gemm_ = false;
}

//! select conv impl
if (dw_kernel && kps_equal && no_dilation && flag_dw) {
impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>;
} else {
//! update scale
w_scale_ = param.weight_scale;
if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) {
LOG(FATAL) << "weights scale size must equal to filter size";
return;
}
if (w_scale_.size() == 1) {
for (int i = 0; i < param.filter->dims()[0] - 1; ++i) {
w_scale_.push_back(w_scale_[0]);
}
}
float input_scale = param.input_scale;
for (auto& ws : w_scale_) {
ws *= input_scale;
}
}

if (impl_) {
impl_->SetContext(std::move(this->ctx_));
impl_->SetParam(param);
impl_->PrepareForRun();
is_first_epoch_ = false;
}
}

template <>
void Conv2dCompute<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
if (impl_) {
return impl_->Run();
}
//! todo add int8 gemm
}

template <>
void Conv2dCompute<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
PREPARE_PARAM
// todo add conv_5x5_depthwise implement
flag_dw_5x5 = false;
bool flag_dw = flag_dw_3x3 || flag_dw_5x5;
if (kernel_w == 1 && stride_w == 1 && paddings[0] == 0 && kps_equal &&
pads_equal) {
flag_1x1gemm_ = true;
} else {
flag_1x1gemm_ = false;
}

//! select conv impl
if (dw_kernel && kps_equal && no_dilation && flag_dw) {
impl_ = new DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>;
} else {
//! update scale
w_scale_ = param.weight_scale;
if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) {
LOG(FATAL) << "weights scale size must equal to filter size";
return;
}
if (w_scale_.size() == 1) {
for (int i = 0; i < param.filter->dims()[0] - 1; ++i) {
w_scale_.push_back(w_scale_[0]);
}
}
float input_scale = param.input_scale;
float output_scale = param.output_scale;
for (auto& ws : w_scale_) {
ws = ws * input_scale / output_scale;
}
//! update bias
if (param.bias) {
bias_.Resize(param.bias->dims());
auto ptr = bias_.mutable_data<float>();
auto ptr_in = param.bias->data<float>();
for (int i = 0; i < bias_.numel(); ++i) {
ptr[i] = ptr_in[i] / param.output_scale;
}
flag_trans_bias_ = true;
}
//! update relu6 parameter
if (param.activation_param.active_type ==
lite_api::ActivationType::kRelu6) {
param.activation_param.Relu_clipped_coef =
param.activation_param.Relu_clipped_coef / param.output_scale;
}
//! update leakyRelu parameter
if (param.activation_param.active_type ==
lite_api::ActivationType::kLeakyRelu) {
param.activation_param.Leaky_relu_alpha =
param.activation_param.Leaky_relu_alpha / param.output_scale;
}
}

if (impl_) {
impl_->SetContext(std::move(this->ctx_));
impl_->SetParam(param);
impl_->PrepareForRun();
is_first_epoch_ = false;
}
}

template <>
void Conv2dCompute<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
if (impl_) {
return impl_->Run();
}
//! todo add int8 gemm
}
#undef PREPARE_PARAM
#undef INIT_PARAM
} // namespace x86
} // namespace kernels
Expand Down
4 changes: 4 additions & 0 deletions lite/kernels/x86/conv_compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), Ptype> {
using param_t = operators::ConvParam;
KernelLite<TARGET(kX86), Ptype>* impl_{nullptr};
bool flag_1x1gemm_{false};
bool flag_trans_bias_{true};
std::vector<float> w_scale_;
Tensor weights_;
Tensor bias_;
};

} // namespace x86
Expand Down
83 changes: 83 additions & 0 deletions lite/kernels/x86/conv_depthwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ namespace lite {
namespace kernels {
namespace x86 {

template <>
void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::PrepareForRun() {}

template <>
void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {
auto& param = this->Param<param_t>();
Expand Down Expand Up @@ -144,6 +147,86 @@ void DepthwiseConv<PRECISION(kFloat), PRECISION(kFloat)>::Run() {

PROFILE_INFO(kFloat, kFloat)

template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::PrepareForRun() {
auto& param = this->Param<param_t>();

//! update scale
w_scale_ = param.weight_scale;
if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) {
LOG(FATAL) << "weights scale size must equal to filter size";
return;
}
if (w_scale_.size() == 1) {
for (int i = 0; i < param.filter->dims()[0] - 1; ++i) {
w_scale_.push_back(w_scale_[0]);
}
}
float input_scale = param.input_scale;
for (auto& ws : w_scale_) {
ws *= input_scale;
}
}

#define CONV_DW_INT8_PARAM \
o_data, i_data, w_data, b_data, bs, ic, iw, ih, oh, ow, flag_act, alpha, \
w_scale_, ctx
template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kFloat)>::Run() {
//! todo add implementation
}

PROFILE_INFO(kInt8, kFloat)

template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
auto& param = this->Param<param_t>();

//! update scale
w_scale_ = param.weight_scale;
if (w_scale_.size() != 1 && w_scale_.size() != param.filter->dims()[0]) {
LOG(FATAL) << "weights scale size must equal to filter size";
return;
}
if (w_scale_.size() == 1) {
for (int i = 0; i < param.filter->dims()[0] - 1; ++i) {
w_scale_.push_back(w_scale_[0]);
}
}
float input_scale = param.input_scale;
for (auto& ws : w_scale_) {
ws *= input_scale;
}
//! update bias
if (param.bias) {
bias_.Resize(param.bias->dims());
auto ptr = bias_.mutable_data<float>();
auto ptr_in = param.bias->data<float>();
for (int i = 0; i < bias_.numel(); ++i) {
ptr[i] = ptr_in[i] / param.output_scale;
}
flag_trans_bias_ = true;
}
//! update relu6 parameter
if (param.activation_param.active_type == lite_api::ActivationType::kRelu6) {
param.activation_param.Relu_clipped_coef =
param.activation_param.Relu_clipped_coef / param.output_scale;
}
//! update leakyRelu parameter
if (param.activation_param.active_type ==
lite_api::ActivationType::kLeakyRelu) {
param.activation_param.Leaky_relu_alpha =
param.activation_param.Leaky_relu_alpha / param.output_scale;
}
}

template <>
void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::Run() {
//! todo add implementation
}

PROFILE_INFO(kInt8, kInt8)
#undef CONV_DW_INT8_PARAM
} // namespace x86
} // namespace kernels
} // namespace lite
Expand Down
4 changes: 4 additions & 0 deletions lite/kernels/x86/conv_depthwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class DepthwiseConv : public KernelLite<TARGET(kX86), Ptype> {
public:
DepthwiseConv() = default;
~DepthwiseConv() {}
void PrepareForRun() override;
virtual void Run();

#ifdef LITE_WITH_PROFILE
Expand Down Expand Up @@ -58,6 +59,9 @@ class DepthwiseConv : public KernelLite<TARGET(kX86), Ptype> {
Tensor input_padding_;
Tensor filter_pack_;
Tensor output_pack_;
bool flag_trans_bias_{true};
std::vector<float> w_scale_;
Tensor bias_;
};

} // namespace x86
Expand Down

0 comments on commit 80b25fa

Please sign in to comment.