diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h index 8b2ae8e0c8a8..ba2fa01c4c21 100644 --- a/src/operator/nn/deconvolution-inl.h +++ b/src/operator/nn/deconvolution-inl.h @@ -292,7 +292,11 @@ class DeconvolutionOp { using namespace mshadow::expr; if (param_.kernel.ndim() > 2) { - LOG(FATAL) << "If not using CUDNN, only 1D or 2D Deconvolution is supported"; + LOG(FATAL) << "Only 1D or 2D Deconvolution is natively supported. " + << ((MXNET_USE_MKLDNN || MXNET_USE_CUDNN) + ? "Fallback to native implementation (if occurred) is therefore " + "impossible for 3D Deconvolution." + : ""); } CHECK_EQ(req[deconv::kOut], kWriteTo); diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index aa1f1daa36aa..6b729749d917 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -43,9 +43,14 @@ static void DeconvolutionComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& outputs) { const DeconvolutionParam& params = nnvm::get(attrs.parsed); if (SupportMKLDNNDeconv(params, inputs[0])) { - MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs); - MKLDNNRun(MKLDNNDeconvolutionForward, attrs, ctx, inputs, req, outputs); - MKLDNN_OPCHECK_RUN(DeconvolutionCompute, attrs, ctx, inputs, req, outputs); + if (params.kernel.ndim() == 3) { + // we cannot check the output, as 3D deconvolution is not natively supported yet + MKLDNNRun(MKLDNNDeconvolutionForward, attrs, ctx, inputs, req, outputs); + } else { + MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs); + MKLDNNRun(MKLDNNDeconvolutionForward, attrs, ctx, inputs, req, outputs); + MKLDNN_OPCHECK_RUN(DeconvolutionCompute, attrs, ctx, inputs, req, outputs); + } return; } FallBackCompute(DeconvolutionCompute, attrs, ctx, inputs, req, outputs); @@ -58,9 +63,14 @@ static void DeconvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& outputs) { const DeconvolutionParam& params = nnvm::get(attrs.parsed); if (SupportMKLDNNDeconv(params, inputs[0])) { - MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); - MKLDNNRun(MKLDNNDeconvolutionBackward, attrs, ctx, inputs, req, outputs); - MKLDNN_OPCHECK_RUN(DeconvolutionGradCompute, attrs, ctx, inputs, req, outputs); + if (params.kernel.ndim() == 3) { + // we cannot check the output, as 3D deconvolution is not natively supported yet + MKLDNNRun(MKLDNNDeconvolutionBackward, attrs, ctx, inputs, req, outputs); + } else { + MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); + MKLDNNRun(MKLDNNDeconvolutionBackward, attrs, ctx, inputs, req, outputs); + MKLDNN_OPCHECK_RUN(DeconvolutionGradCompute, attrs, ctx, inputs, req, outputs); + } return; } FallBackCompute(DeconvolutionGradCompute, attrs, ctx, inputs, req, outputs); @@ -98,12 +108,12 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_shape, mxnet::ShapeVector* out_shape) { const DeconvolutionParam& param_ = nnvm::get(attrs.parsed); -#if MXNET_USE_CUDNN == 0 +#if MXNET_USE_CUDNN == 0 && MXNET_USE_MKLDNN == 0 if (param_.kernel.ndim() > 2) { - LOG(FATAL) << "If not using CUDNN, only 1D or 2D Deconvolution is supported"; + LOG(FATAL) << "If not using CUDNN or MKLDNN, only 1D or 2D Deconvolution is supported"; return false; } -#endif // CUDNN +#endif using namespace mshadow; if (!param_.no_bias) { diff --git a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc index 78deeadcf8cb..a0888490b015 100644 --- a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc @@ -29,8 +29,9 @@ namespace mxnet { namespace op { -bool SupportMKLDNNDeconv(const DeconvolutionParam& params, const NDArray& input) { - return params.kernel.ndim() == 2 && input.shape().ndim() == 4 && +bool SupportMKLDNNDeconv(const DeconvolutionParam ¶ms, const NDArray &input) { + return params.kernel.ndim() >= 1 && params.kernel.ndim() <= 3 && + input.shape().ndim() == (params.kernel.ndim() + 2) && (input.dtype() == mshadow::kFloat32 || input.dtype() == mshadow::kBfloat16); } @@ -327,10 +328,10 @@ DeconvDescCreator::DeconvDescCreator(const DeconvolutionParam& param, strides(param.stride.ndim()), padding(param.pad.ndim()), dilates(param.dilate.ndim()) { - // assuming only deconv2D is supported for now CHECK_EQ(param.stride.ndim(), param.pad.ndim()); CHECK_EQ(param.stride.ndim(), param.dilate.ndim()); - CHECK_EQ(param.stride.ndim(), 2); + CHECK_GE(param.stride.ndim(), 1); + CHECK_LE(param.stride.ndim(), 3); for (int i = 0; i < param.stride.ndim(); ++i) { strides[i] = param.stride[i]; padding[i] = param.pad[i]; diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py index 8a38ef6fdb72..f194988dbb81 100644 --- a/tests/python/mkl/test_mkldnn.py +++ b/tests/python/mkl/test_mkldnn.py @@ -429,8 +429,8 @@ def check_convolution_training(stype): def test_Deconvolution(): def check_Deconvolution_training(stype): - for shape in [(3, 3, 10, 10)]: # testing only 2D for now - data_tmp = np.random.randint(256, size=shape) + for shape in [(3, 3, 10), (3, 3, 10, 10), (3, 3, 10, 10, 10)]: + data_tmp = np.random.normal(-0.1, 1, size=shape) data = mx.symbol.Variable('data', stype=stype) if np.array(shape).shape[0] == 3: @@ -439,6 +439,11 @@ def check_Deconvolution_training(stype): elif np.array(shape).shape[0] == 4: test = mx.symbol.Deconvolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=4) weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3)) + elif np.array(shape).shape[0] == 5 and stype == "default": + # Unable to test fallback to native implementation for non-default storage types + # as 3D deconvolution is not natively supported + test = mx.symbol.Deconvolution(data=data, kernel=(3,3,3), stride=(2,2,2), num_filter=4) + weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3, 3)) else: return 0 bias_tmp = np.random.normal(0.1, 0.1, size=(4,))