Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
add count_include_pad argument
Browse files Browse the repository at this point in the history
  • Loading branch information
Hao Jin committed May 22, 2018
1 parent 022f238 commit 3f37699
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 44 deletions.
1 change: 1 addition & 0 deletions cpp-package/scripts/OpWrapperGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def GetConvertEnumVariableToString(self, variable=''):

class Arg:
typeDict = {'boolean':'bool',\
'boolean or None':'dmlc::optional<bool>',\
'Shape(tuple)':'Shape',\
'Symbol':'Symbol',\
'NDArray':'Symbol',\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ private[mxnet] object SymbolImplMacros {
case "long" | "long(non-negative)" => "Long"
case "double" | "doubleorNone" => "Double"
case "string" => "String"
case "boolean" => "Boolean"
case "boolean" => "BooleanorNone"
case "tupleof<float>" | "tupleof<double>" | "ptr" | "" => "Any"
case default => throw new IllegalArgumentException(
s"Invalid type for args: $default, $argType")
Expand Down
6 changes: 5 additions & 1 deletion src/operator/nn/mkldnn/mkldnn_pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,11 @@ mkldnn::algorithm GetMKLDNNPoolAlgo(const PoolingParam &param) {
return mkldnn::algorithm::pooling_max;
break;
case pool_enum::kAvgPooling:
return mkldnn::algorithm::pooling_avg_include_padding;
if (param.count_include_pad.has_value() && !param.count_include_pad.value()) {
return mkldnn::algorithm::pooling_avg_exclude_padding;
} else {
return mkldnn::algorithm::pooling_avg_include_padding;
}
break;
default:
LOG(FATAL) << "MKLDNN Pooling: Unknown pooling method.";
Expand Down
73 changes: 56 additions & 17 deletions src/operator/nn/pool.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -214,16 +214,19 @@ template <typename DType, int p = 1>
__global__ void pool_sum_1d_gpu_kernel(const int nthreads, const DType* in_data, const int channels,
const int width, const int pooled_width, const int kernel_w,
const int stride_w, const int pad_w, DType* out_data,
const bool getAvg = false) {
const bool getAvg = false, const bool count_include_pad = true) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int pw = index % pooled_width;
const int c = (index / pooled_width) % channels;
const int n = index / pooled_width / channels;
int wstart = pw * stride_w - pad_w;
int wend = min(wstart + kernel_w, width + pad_w);
const int pool_size = (getAvg? (wend - wstart) : 1);
int pool_size = (getAvg? (wend - wstart) : 1);
wstart = max(wstart, 0);
wend = min(wend, width);
if (getAvg && !count_include_pad) {
pool_size = (wend - wstart);
}
DType sum = 0;
const DType* out_slice = in_data + (n * channels + c) * width;
for (int w = wstart; w < wend; ++w) {
Expand All @@ -244,7 +247,8 @@ __global__ void pool_sum_2d_gpu_kernel(const int nthreads, const DType* in_data,
const int kernel_h, const int kernel_w,
const int stride_h, const int stride_w,
const int pad_h, const int pad_w, DType* out_data,
const bool getAvg = false) {
const bool getAvg = false,
const bool count_include_pad = true) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int pw = index % pooled_width;
const int ph = (index / pooled_width) % pooled_height;
Expand All @@ -254,11 +258,14 @@ __global__ void pool_sum_2d_gpu_kernel(const int nthreads, const DType* in_data,
int wstart = pw * stride_w - pad_w;
int hend = min(hstart + kernel_h, height + pad_h);
int wend = min(wstart + kernel_w, width + pad_w);
const int pool_size = (getAvg? (hend - hstart) * (wend - wstart) : 1);
int pool_size = (getAvg? (hend - hstart) * (wend - wstart) : 1);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
hend = min(hend, height);
wend = min(wend, width);
if (getAvg && !count_include_pad) {
pool_size = (hend - hstart) * (wend - wstart);
}
DType sum = 0;
const DType* out_slice = in_data + (n * channels + c) * height * width;
for (int h = hstart; h < hend; ++h) {
Expand All @@ -282,7 +289,8 @@ __global__ void pool_sum_3d_gpu_kernel(const int nthreads, const DType* in_data,
const int kernel_h, const int kernel_w,
const int stride_d, const int stride_h, const int stride_w,
const int pad_d, const int pad_h, const int pad_w,
DType* out_data, const bool getAvg = false) {
DType* out_data, const bool getAvg = false,
const bool count_include_pad = true) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int pw = index % pooled_width;
const int ph = (index / pooled_width) % pooled_height;
Expand All @@ -295,13 +303,16 @@ __global__ void pool_sum_3d_gpu_kernel(const int nthreads, const DType* in_data,
int dend = min(dstart + kernel_d, depth + pad_d);
int hend = min(hstart + kernel_h, height + pad_h);
int wend = min(wstart + kernel_w, width + pad_w);
const int pool_size = (getAvg? (dend - dstart) * (hend - hstart) * (wend - wstart) : 1);
int pool_size = (getAvg? (dend - dstart) * (hend - hstart) * (wend - wstart) : 1);
dstart = max(dstart, 0);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
dend = min(dend, depth);
hend = min(hend, height);
wend = min(wend, width);
if (getAvg && !count_include_pad) {
pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
}
DType sum = 0;
const DType* out_slice = in_data + (n * channels + c) * depth * height * width;
for (int d = dstart; d < dend; ++d) {
Expand Down Expand Up @@ -487,7 +498,8 @@ __global__ void unpool_sum_1d_gpu_kernel(const int nthreads, const DType* out_gr
const int channels, const int width,
const int pooled_width, const int kernel_w,
const int stride_w, const int pad_w, DType* in_grad,
const bool isAvg = false) {
const bool isAvg = false,
const bool count_include_pad = true) {
// index is the input image index in NCW
CUDA_KERNEL_LOOP(index, nthreads) {
// find out the local index
Expand All @@ -507,6 +519,11 @@ __global__ void unpool_sum_1d_gpu_kernel(const int nthreads, const DType* out_gr
int wstart = pw * stride_w - pad_w;
int wend = min(wstart + kernel_w, width + pad_w);
int pool_size = (isAvg? (wend - wstart) : 1);
if (isAvg && !count_include_pad) {
wstart = max(wstart, 0);
wend = min(wend, width);
pool_size = (wend - wstart);
}
gradient +=
lp_grad<DType, p>::Map(out_grad_slice[pw], in_data[index], out_data_slice[pw]) / pool_size;
}
Expand All @@ -528,7 +545,8 @@ __global__ void unpool_sum_2d_gpu_kernel(const int nthreads, const DType* out_gr
const int kernel_h, const int kernel_w,
const int stride_h, const int stride_w,
const int pad_h, const int pad_w, DType* in_grad,
const bool isAvg = false) {
const bool isAvg = false,
const bool count_include_pad = true) {
// index is the input image index in NCHW
CUDA_KERNEL_LOOP(index, nthreads) {
// find out the local index
Expand All @@ -555,6 +573,13 @@ __global__ void unpool_sum_2d_gpu_kernel(const int nthreads, const DType* out_gr
int wend = min(wstart + kernel_w, width + pad_w);
int pool_size = (isAvg? (hend - hstart) * (wend - wstart) : 1);
int out_index = ph * pooled_width + pw;
if (isAvg && !count_include_pad) {
hstart = max(hstart, 0);
wstart = max(wstart, 0);
hend = min(hend, height);
wend = min(wend, width);
pool_size = (hend - hstart) * (wend - wstart);
}
gradient +=
lp_grad<DType, p>::Map(out_grad_slice[out_index],
in_data[index],
Expand All @@ -580,7 +605,8 @@ __global__ void unpool_sum_3d_gpu_kernel(const int nthreads, const DType* out_gr
const int kernel_d, const int kernel_h,
const int kernel_w, const int stride_d, const int stride_h,
const int stride_w, const int pad_d, const int pad_h,
const int pad_w, DType* in_grad, const bool isAvg = false) {
const int pad_w, DType* in_grad, const bool isAvg = false,
const bool count_include_pad = true) {
// index is the input image index in NCDHW
CUDA_KERNEL_LOOP(index, nthreads) {
// find out the local index
Expand Down Expand Up @@ -613,6 +639,15 @@ __global__ void unpool_sum_3d_gpu_kernel(const int nthreads, const DType* out_gr
int wend = min(wstart + kernel_w, width + pad_w);
int pool_size = (isAvg? (dend - dstart) * (hend - hstart) * (wend - wstart) : 1);
int out_index = (pd * pooled_height + ph) * pooled_width + pw;
if (isAvg && !count_include_pad) {
dstart = max(dstart, 0);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
dend = min(dend, depth);
hend = min(hend, height);
wend = min(wend, width);
pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
}
gradient += lp_grad<DType, p>::Map(out_grad_slice[out_index],
in_data[index],
out_data_slice[out_index]) / pool_size;
Expand Down Expand Up @@ -643,7 +678,7 @@ template<typename DType, int p>
inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const TShape& ishape,
const TShape& oshape, const TShape& kernel, const TShape& pad,
const TShape& stride, const int pool_type, OpReqType req_type,
DType* out_data) {
DType* out_data, const bool count_include_pad) {
CHECK_EQ(req_type, kWriteTo) << "Only support req=kWriteTo in pooling operations";
using namespace mxnet_op;
if (kernel.ndim() == 1) {
Expand All @@ -659,7 +694,8 @@ inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const TShape& is
pool_sum_1d_gpu_kernel<<<cuda_get_num_blocks(oshape.Size()), mshadow::cuda::kBaseThreadNum,
0, mshadow::Stream<gpu>::GetStream(s)>>>(
oshape.Size(), in_data, ishape[1], ishape[2], oshape[2],
kernel[0], stride[0], pad[0], out_data, true);
kernel[0], stride[0], pad[0], out_data,
true, count_include_pad);
MSHADOW_CUDA_POST_KERNEL_CHECK(pool_sum_1d_gpu_kernel);
} else if (pool_enum::kSumPooling == pool_type) {
// NOLINT_NEXT_LINE(whitespace/operators)
Expand Down Expand Up @@ -693,7 +729,8 @@ inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const TShape& is
0, mshadow::Stream<gpu>::GetStream(s)>>>(
oshape.Size(), in_data, ishape[1], ishape[2], ishape[3],
oshape[2], oshape[3], kernel[0], kernel[1],
stride[0], stride[1], pad[0], pad[1], out_data, true);
stride[0], stride[1], pad[0], pad[1], out_data,
true, count_include_pad);
MSHADOW_CUDA_POST_KERNEL_CHECK(pool_sum_2d_gpu_kernel);
} else if (pool_enum::kSumPooling == pool_type) {
// NOLINT_NEXT_LINE(whitespace/operators)
Expand Down Expand Up @@ -731,7 +768,7 @@ inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const TShape& is
oshape.Size(), in_data, ishape[1], ishape[2], ishape[3],
ishape[4], oshape[2], oshape[3], oshape[4], kernel[0],
kernel[1], kernel[2], stride[0], stride[1], stride[2],
pad[0], pad[1], pad[2], out_data, true);
pad[0], pad[1], pad[2], out_data, true, count_include_pad);
MSHADOW_CUDA_POST_KERNEL_CHECK(pool_sum_3d_gpu_kernel);
} else if (pool_enum::kSumPooling == pool_type) {
// NOLINT_NEXT_LINE(whitespace/operators)
Expand Down Expand Up @@ -777,7 +814,8 @@ template<typename DType, int p>
inline void unpool(mshadow::Stream<gpu>* s, const DType* out_grad, const DType* in_data,
const DType* out_data, const TShape& ishape, const TShape& oshape,
const TShape& kernel, const TShape& pad, const TShape& stride,
const int pool_type, OpReqType req_type, DType* in_grad) {
const int pool_type, OpReqType req_type, DType* in_grad,
const bool count_include_pad) {
if (mxnet::kNullOp == req_type) return;
if (mxnet::kAddTo != req_type) {
mxnet_op::Kernel<mxnet_op::set_zero, gpu>::Launch(s, ishape.Size(), in_grad);
Expand All @@ -798,7 +836,7 @@ inline void unpool(mshadow::Stream<gpu>* s, const DType* out_grad, const DType*
0, mshadow::Stream<gpu>::GetStream(s)>>>(
ishape.Size(), out_grad, in_data, out_data,
ishape[1], ishape[2], oshape[2], kernel[0],
stride[0], pad[0], in_grad, true);
stride[0], pad[0], in_grad, true, count_include_pad);
MSHADOW_CUDA_POST_KERNEL_CHECK(unpool_sum_1d_gpu_kernel);
} else if (pool_enum::kSumPooling == pool_type) {
// NOLINT_NEXT_LINE(whitespace/operators)
Expand Down Expand Up @@ -836,7 +874,8 @@ inline void unpool(mshadow::Stream<gpu>* s, const DType* out_grad, const DType*
ishape.Size(), out_grad, in_data, out_data,
ishape[1], ishape[2], ishape[3],
oshape[2], oshape[3], kernel[0], kernel[1],
stride[0], stride[1], pad[0], pad[1], in_grad, true);
stride[0], stride[1], pad[0], pad[1], in_grad,
true, count_include_pad);
MSHADOW_CUDA_POST_KERNEL_CHECK(unpool_sum_2d_gpu_kernel);
} else if (pool_enum::kSumPooling == pool_type) {
// NOLINT_NEXT_LINE(whitespace/operators)
Expand Down Expand Up @@ -878,7 +917,7 @@ inline void unpool(mshadow::Stream<gpu>* s, const DType* out_grad, const DType*
ishape[1], ishape[2], ishape[3], ishape[4],
oshape[2], oshape[3], oshape[4], kernel[0], kernel[1],
kernel[2], stride[0], stride[1], stride[2], pad[0], pad[1],
pad[2], in_grad, true);
pad[2], in_grad, true, count_include_pad);
MSHADOW_CUDA_POST_KERNEL_CHECK(unpool_sum_3d_gpu_kernel);
} else if (pool_enum::kSumPooling == pool_type) {
// NOLINT_NEXT_LINE(whitespace/operators)
Expand Down
Loading

0 comments on commit 3f37699

Please sign in to comment.