Skip to content

Commit

Permalink
[MXNET-408] [WIP] inplace ReLU activation (apache#10847)
Browse files Browse the repository at this point in the history
* inplace version of activation(relu)

* inplace relu

* add comments

* add commnet

* comments

* fix compilation error

* add check_numerical_grad test
  • Loading branch information
eric-haibin-lin authored and zheng-da committed Jun 28, 2018
1 parent d4543b7 commit 40f2572
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 118 deletions.
148 changes: 72 additions & 76 deletions src/operator/nn/activation-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,24 +83,24 @@ struct hash<mxnet::op::ActivationParam> {
namespace mxnet {
namespace op {

template<typename xpu, typename ForwardOp, typename BackwardOp, typename DType>
template<typename xpu, typename ForwardOp, typename BackwardOp>
void ActivationForward(const OpContext &ctx, const TBlob &in_data,
const OpReqType &req, const TBlob &out_data) {
using namespace mshadow;
using namespace mshadow::expr;
Stream<xpu> *s = ctx.get_stream<xpu>();
const size_t sz = in_data.shape_.Size();
if (sz) {
MXNET_ASSIGN_REQ_SWITCH(req, Req, {
mxnet_op::Kernel<mxnet_op::op_with_req<ForwardOp, Req>, xpu>::Launch(
s, sz,
out_data.dptr<DType>(),
in_data.dptr<DType>());
MSHADOW_REAL_TYPE_SWITCH(in_data.type_flag_, DType, {
MXNET_ASSIGN_REQ_SWITCH(req, Req, {
mxnet_op::Kernel<mxnet_op::op_with_req<ForwardOp, Req>, xpu>::Launch(
s, sz, out_data.dptr<DType>(), in_data.dptr<DType>());
});
});
}
}

template<typename xpu, typename ForwardOp, typename BackwardOp, typename DType>
template<typename xpu, typename ForwardOp, typename BackwardOp>
void ActivationBackward(const OpContext &ctx, const TBlob &out_grad,
const TBlob &out_data, const OpReqType &req,
const TBlob &in_grad) {
Expand All @@ -109,86 +109,81 @@ void ActivationBackward(const OpContext &ctx, const TBlob &out_grad,
Stream<xpu> *s = ctx.get_stream<xpu>();
const size_t sz = out_data.shape_.Size();
if (sz) {
MXNET_ASSIGN_REQ_SWITCH(req, Req, {
mxnet_op::Kernel<mxnet_op::op_with_req<
mxnet::op::mxnet_op::backward_grad_tuned<BackwardOp>, Req>, xpu>::Launch(
s, sz,
in_grad.dptr<DType>(),
out_grad.dptr<DType>(),
out_data.dptr<DType>());
MSHADOW_REAL_TYPE_SWITCH(out_grad.type_flag_, DType, {
MXNET_ASSIGN_REQ_SWITCH(req, Req, {
mxnet_op::Kernel<mxnet_op::op_with_req<
mxnet_op::backward_grad_tuned<BackwardOp>, Req>, xpu>::Launch(
s, sz, in_grad.dptr<DType>(), out_grad.dptr<DType>(), out_data.dptr<DType>());
});
});
}
}

template<typename xpu>
void ActivationComputeImpl(const ActivationParam &param, const OpContext &ctx,
const TBlob &input, OpReqType req, const TBlob &output) {
MSHADOW_REAL_TYPE_SWITCH(input.type_flag_, DType, {
switch (param.act_type) {
case activation::kReLU:
ActivationForward<xpu, mshadow_op::relu, mshadow_op::relu_grad, DType>(
ctx, input, req, output);
break;
case activation::kSigmoid:
ActivationForward<xpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad, DType>(
ctx, input, req, output);
break;
case activation::kTanh:
ActivationForward<xpu, mshadow_op::tanh, mshadow_op::tanh_grad, DType>(
ctx, input, req, output);
break;
case activation::kSoftReLU:
ActivationForward<xpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>(
ctx, input, req, output);
break;
case activation::kSoftSign:
ActivationForward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad, DType>(
ctx, input, req, output);
break;
default:
LOG(FATAL) << "unknown activation type";
}
});
switch (param.act_type) {
case activation::kReLU:
ActivationForward<xpu, mshadow_op::relu, mshadow_op::relu_grad>(
ctx, input, req, output);
break;
case activation::kSigmoid:
ActivationForward<xpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad>(
ctx, input, req, output);
break;
case activation::kTanh:
ActivationForward<xpu, mshadow_op::tanh, mshadow_op::tanh_grad>(
ctx, input, req, output);
break;
case activation::kSoftReLU:
ActivationForward<xpu, mshadow_op::softrelu, mshadow_op::softrelu_grad>(
ctx, input, req, output);
break;
case activation::kSoftSign:
ActivationForward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad>(
ctx, input, req, output);
break;
default:
LOG(FATAL) << "unknown activation type";
}
}

template<typename xpu>
void ActivationGradComputeImpl(const ActivationParam &param, const OpContext &ctx,
const TBlob &out_grad, const TBlob &out_data,
OpReqType req, const TBlob &output) {
MSHADOW_REAL_TYPE_SWITCH(out_grad.type_flag_, DType, {
switch (param.act_type) {
case activation::kReLU:
ActivationBackward<xpu, mshadow_op::relu, mshadow_op::relu_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSigmoid:
ActivationBackward<xpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
case activation::kTanh:
ActivationBackward<xpu, mshadow_op::tanh, mshadow_op::tanh_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSoftReLU:
ActivationBackward<xpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSoftSign:
ActivationBackward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
default:
LOG(FATAL) << "unknown activation type";
}
});
switch (param.act_type) {
case activation::kReLU:
ActivationBackward<xpu, mshadow_op::relu, mshadow_op::relu_grad>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSigmoid:
ActivationBackward<xpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad>(
ctx, out_grad, out_data, req, output);
break;
case activation::kTanh:
ActivationBackward<xpu, mshadow_op::tanh, mshadow_op::tanh_grad>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSoftReLU:
ActivationBackward<xpu, mshadow_op::softrelu, mshadow_op::softrelu_grad>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSoftSign:
ActivationBackward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad>(
ctx, out_grad, out_data, req, output);
break;
default:
LOG(FATAL) << "unknown activation type";
}
}

template<typename xpu>
void ActivationCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
CHECK_EQ(inputs.size(), 1U);
CHECK_EQ(outputs.size(), 1U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
Expand All @@ -197,18 +192,19 @@ void ActivationCompute(const nnvm::NodeAttrs& attrs,

template<typename xpu>
void ActivationGradCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
CHECK_EQ(inputs.size(), 3U);
bool relu = param.act_type == activation::kReLU;
CHECK_EQ(inputs.size(), relu ? 2U : 3U);
#else
CHECK_EQ(inputs.size(), 2U);
#endif
CHECK_EQ(outputs.size(), 1U);
CHECK_EQ(req.size(), 1U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
ActivationGradComputeImpl<xpu>(param, ctx, inputs[0], inputs[1], req[0], outputs[0]);
}

Expand Down
50 changes: 34 additions & 16 deletions src/operator/nn/activation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@ struct ActivationGrad {
std::vector<nnvm::NodeEntry> heads(ograds.begin(), ograds.end());
heads.emplace_back(nnvm::NodeEntry{n, activation::kOut, 0});
#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
heads.push_back(n->inputs[activation::kData]);
const NodeAttrs& attrs = n->attrs;
// for ReLU, no need to pass input data. This enables inplace optimization during the
// forward pass.
if (dmlc::get<ActivationParam>(attrs.parsed).act_type != activation::kReLU) {
heads.push_back(n->inputs[activation::kData]);
}
#endif
return MakeGradNode(op_name, n, heads, n->attrs.dict);
}
Expand Down Expand Up @@ -74,13 +79,15 @@ void ActivationGradComputeExCPU(const nnvm::NodeAttrs& attrs,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
CHECK_EQ(inputs.size(), 3U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
bool relu = param.act_type == activation::kReLU;
CHECK_EQ(inputs.size(), relu ? 2U : 3U);
if (SupportMKLDNN(inputs[0])) {
MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs);
MKLDNNActivationBackward(attrs, ctx, inputs[0], inputs[2], req[0],
// XXX: for y = relu(x), y is passed as "in_data" to Backward()
MKLDNNActivationBackward(attrs, ctx, inputs[0], relu ? inputs[1] : inputs[2], req[0],
outputs[0]);
MKLDNN_OPCHECK_RUN(ActivationGradCompute<cpu>, attrs, ctx, inputs, req, outputs);
MKLDNN_OPCHECK_RUN(ActivationGradCompute<cpu>, attrs, ctx, inputs, req, outputs);
return;
}
ActivationGradComputeImpl<cpu>(param, ctx, inputs[0].data(), inputs[1].data(),
Expand Down Expand Up @@ -112,23 +119,29 @@ inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs,
DispatchMode* dispatch_mode,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
bool ret = false;
#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
CHECK_EQ(in_attrs->size(), 3U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
if (param.act_type != activation::kReLU) {
CHECK_EQ(in_attrs->size(), 3U);
ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
dispatch_mode,
in_attrs, out_attrs);
} else {
// for ReLU activation, the backward pass only needs ograd and output
CHECK_EQ(in_attrs->size(), 2U);
ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
dispatch_mode,
in_attrs, out_attrs);
}
#else
CHECK_EQ(in_attrs->size(), 2U);
ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
dispatch_mode,
in_attrs, out_attrs);
#endif
CHECK_EQ(out_attrs->size(), 1U);
#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
bool ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
dispatch_mode,
in_attrs, out_attrs);
#else
bool ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
dispatch_mode,
in_attrs, out_attrs);
#endif
#if MXNET_USE_MKLDNN == 1
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNAct(param)) {
*dispatch_mode = DispatchMode::kFComputeEx;
}
Expand Down Expand Up @@ -162,7 +175,12 @@ The following activation functions are supported:
.add_arguments(ActivationParam::__FIELDS__());

NNVM_REGISTER_OP(_backward_Activation)
.set_num_inputs(3)
.set_num_inputs([](const nnvm::NodeAttrs& attrs) {
int act_type = dmlc::get<ActivationParam>(attrs.parsed).act_type;
// for ReLU activation, the backward pass only needs ograd and output
if (act_type == activation::kReLU) return 2;
return 3;
})
.set_num_outputs(1)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<FInferStorageType>("FInferStorageType", BackwardActStorageType)
Expand Down
39 changes: 22 additions & 17 deletions src/operator/nn/activation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,13 @@ void ActivationCompute<gpu>(const nnvm::NodeAttrs& attrs,
CHECK_EQ(outputs.size(), 1U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);

// SoftReLU not supported by CUDNN yet
// SoftReLU and kSoftSign are both not supported by CUDNN yet
if (param.act_type == activation::kSoftReLU) {
MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
ActivationForward<gpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>(ctx,
inputs[0], req[0], outputs[0]);
});
ActivationForward<gpu, mshadow_op::softrelu, mshadow_op::softrelu_grad>(ctx,
inputs[0], req[0], outputs[0]);
} else if (param.act_type == activation::kSoftSign) {
ActivationForward<gpu, mshadow_op::softsign, mshadow_op::softsign_grad>(ctx,
inputs[0], req[0], outputs[0]);
} else {
MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
get_cudnn_op<DType>(param).Forward(ctx, inputs[0], req[0], outputs[0]);
Expand All @@ -70,24 +71,28 @@ void ActivationCompute<gpu>(const nnvm::NodeAttrs& attrs,

template<>
void ActivationGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
CHECK_EQ(inputs.size(), 3U);
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
bool relu = param.act_type == activation::kReLU;
CHECK_EQ(inputs.size(), relu ? 2U : 3U);
CHECK_EQ(outputs.size(), 1U);
CHECK_EQ(req.size(), 1U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);

// SoftReLU not supported by CUDNN yet
// both SoftReLU and SoftSign not supported by CUDNN yet
if (param.act_type == activation::kSoftReLU) {
MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
ActivationBackward<gpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>(
ctx, inputs[0], inputs[1], req[0], outputs[0]);
});
ActivationBackward<gpu, mshadow_op::softrelu, mshadow_op::softrelu_grad>(
ctx, inputs[0], inputs[1], req[0], outputs[0]);
} else if (param.act_type == activation::kSoftSign) {
ActivationBackward<gpu, mshadow_op::softsign, mshadow_op::softsign_grad>(
ctx, inputs[0], inputs[1], req[0], outputs[0]);
} else {
MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
get_cudnn_op<DType>(param).Backward(ctx, inputs[0], inputs[2], inputs[1], req[0], outputs[0]);
// XXX: for y = relu(x), y is passed as "in_data" to Backward()
get_cudnn_op<DType>(param).Backward(ctx, inputs[0], relu ? inputs[1] : inputs[2],
inputs[1], req[0], outputs[0]);
});
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/operator/nn/cudnn/cudnn_activation-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ class CuDNNActivationOp {
#endif
}

// backward computation for cudnn activation operator. Note that for relu
// it's okay to pass "out_data" as "in_data", since it doesn't make any
// difference in terms of computing the gradient of relu.
void Backward(const OpContext &ctx, const TBlob &out_grad,
const TBlob &in_data, const TBlob &out_data,
const OpReqType &req, const TBlob &in_grad) {
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_act.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ void MKLDNNActivationForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
stream->Submit();
}

// For backward relu activation, it's okay to pass "out_data" as "in_data" to this
// function, since the computation only involes non-zeros.
void MKLDNNActivationBackward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
const NDArray &out_grad, const NDArray &in_data,
const OpReqType &req, const NDArray &in_grad) {
Expand Down
2 changes: 1 addition & 1 deletion src/operator/tensor/elemwise_unary_op_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ The storage type of ``relu`` output depends upon the input storage type:
.set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>)
.set_attr<FCompute>("FCompute<cpu>", UnaryOp::Compute<cpu, mshadow_op::relu>)
.set_attr<FComputeEx>("FComputeEx<cpu>", UnaryOp::ComputeEx<cpu, mshadow_op::relu>)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_relu"});
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_relu"});

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu,
unary_bwd<mshadow_op::relu_grad>);
Expand Down
Loading

0 comments on commit 40f2572

Please sign in to comment.