Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-408] inplace ReLU activation #10847

Merged
merged 7 commits into from
May 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 72 additions & 76 deletions src/operator/nn/activation-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,24 +83,24 @@ struct hash<mxnet::op::ActivationParam> {
namespace mxnet {
namespace op {

template<typename xpu, typename ForwardOp, typename BackwardOp, typename DType>
template<typename xpu, typename ForwardOp, typename BackwardOp>
void ActivationForward(const OpContext &ctx, const TBlob &in_data,
const OpReqType &req, const TBlob &out_data) {
using namespace mshadow;
using namespace mshadow::expr;
Stream<xpu> *s = ctx.get_stream<xpu>();
const size_t sz = in_data.shape_.Size();
if (sz) {
MXNET_ASSIGN_REQ_SWITCH(req, Req, {
mxnet_op::Kernel<mxnet_op::op_with_req<ForwardOp, Req>, xpu>::Launch(
s, sz,
out_data.dptr<DType>(),
in_data.dptr<DType>());
MSHADOW_REAL_TYPE_SWITCH(in_data.type_flag_, DType, {
MXNET_ASSIGN_REQ_SWITCH(req, Req, {
mxnet_op::Kernel<mxnet_op::op_with_req<ForwardOp, Req>, xpu>::Launch(
s, sz, out_data.dptr<DType>(), in_data.dptr<DType>());
});
});
}
}

template<typename xpu, typename ForwardOp, typename BackwardOp, typename DType>
template<typename xpu, typename ForwardOp, typename BackwardOp>
void ActivationBackward(const OpContext &ctx, const TBlob &out_grad,
const TBlob &out_data, const OpReqType &req,
const TBlob &in_grad) {
Expand All @@ -109,86 +109,81 @@ void ActivationBackward(const OpContext &ctx, const TBlob &out_grad,
Stream<xpu> *s = ctx.get_stream<xpu>();
const size_t sz = out_data.shape_.Size();
if (sz) {
MXNET_ASSIGN_REQ_SWITCH(req, Req, {
mxnet_op::Kernel<mxnet_op::op_with_req<
mxnet::op::mxnet_op::backward_grad_tuned<BackwardOp>, Req>, xpu>::Launch(
s, sz,
in_grad.dptr<DType>(),
out_grad.dptr<DType>(),
out_data.dptr<DType>());
MSHADOW_REAL_TYPE_SWITCH(out_grad.type_flag_, DType, {
MXNET_ASSIGN_REQ_SWITCH(req, Req, {
mxnet_op::Kernel<mxnet_op::op_with_req<
mxnet_op::backward_grad_tuned<BackwardOp>, Req>, xpu>::Launch(
s, sz, in_grad.dptr<DType>(), out_grad.dptr<DType>(), out_data.dptr<DType>());
});
});
}
}

template<typename xpu>
void ActivationComputeImpl(const ActivationParam &param, const OpContext &ctx,
const TBlob &input, OpReqType req, const TBlob &output) {
MSHADOW_REAL_TYPE_SWITCH(input.type_flag_, DType, {
switch (param.act_type) {
case activation::kReLU:
ActivationForward<xpu, mshadow_op::relu, mshadow_op::relu_grad, DType>(
ctx, input, req, output);
break;
case activation::kSigmoid:
ActivationForward<xpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad, DType>(
ctx, input, req, output);
break;
case activation::kTanh:
ActivationForward<xpu, mshadow_op::tanh, mshadow_op::tanh_grad, DType>(
ctx, input, req, output);
break;
case activation::kSoftReLU:
ActivationForward<xpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>(
ctx, input, req, output);
break;
case activation::kSoftSign:
ActivationForward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad, DType>(
ctx, input, req, output);
break;
default:
LOG(FATAL) << "unknown activation type";
}
});
switch (param.act_type) {
case activation::kReLU:
ActivationForward<xpu, mshadow_op::relu, mshadow_op::relu_grad>(
ctx, input, req, output);
break;
case activation::kSigmoid:
ActivationForward<xpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad>(
ctx, input, req, output);
break;
case activation::kTanh:
ActivationForward<xpu, mshadow_op::tanh, mshadow_op::tanh_grad>(
ctx, input, req, output);
break;
case activation::kSoftReLU:
ActivationForward<xpu, mshadow_op::softrelu, mshadow_op::softrelu_grad>(
ctx, input, req, output);
break;
case activation::kSoftSign:
ActivationForward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad>(
ctx, input, req, output);
break;
default:
LOG(FATAL) << "unknown activation type";
}
}

template<typename xpu>
void ActivationGradComputeImpl(const ActivationParam &param, const OpContext &ctx,
const TBlob &out_grad, const TBlob &out_data,
OpReqType req, const TBlob &output) {
MSHADOW_REAL_TYPE_SWITCH(out_grad.type_flag_, DType, {
switch (param.act_type) {
case activation::kReLU:
ActivationBackward<xpu, mshadow_op::relu, mshadow_op::relu_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSigmoid:
ActivationBackward<xpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
case activation::kTanh:
ActivationBackward<xpu, mshadow_op::tanh, mshadow_op::tanh_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSoftReLU:
ActivationBackward<xpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSoftSign:
ActivationBackward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
default:
LOG(FATAL) << "unknown activation type";
}
});
switch (param.act_type) {
case activation::kReLU:
ActivationBackward<xpu, mshadow_op::relu, mshadow_op::relu_grad>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSigmoid:
ActivationBackward<xpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad>(
ctx, out_grad, out_data, req, output);
break;
case activation::kTanh:
ActivationBackward<xpu, mshadow_op::tanh, mshadow_op::tanh_grad>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSoftReLU:
ActivationBackward<xpu, mshadow_op::softrelu, mshadow_op::softrelu_grad>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSoftSign:
ActivationBackward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad>(
ctx, out_grad, out_data, req, output);
break;
default:
LOG(FATAL) << "unknown activation type";
}
}

template<typename xpu>
void ActivationCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
CHECK_EQ(inputs.size(), 1U);
CHECK_EQ(outputs.size(), 1U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
Expand All @@ -197,18 +192,19 @@ void ActivationCompute(const nnvm::NodeAttrs& attrs,

template<typename xpu>
void ActivationGradCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
CHECK_EQ(inputs.size(), 3U);
bool relu = param.act_type == activation::kReLU;
CHECK_EQ(inputs.size(), relu ? 2U : 3U);
#else
CHECK_EQ(inputs.size(), 2U);
#endif
CHECK_EQ(outputs.size(), 1U);
CHECK_EQ(req.size(), 1U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
ActivationGradComputeImpl<xpu>(param, ctx, inputs[0], inputs[1], req[0], outputs[0]);
}

Expand Down
50 changes: 34 additions & 16 deletions src/operator/nn/activation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@ struct ActivationGrad {
std::vector<nnvm::NodeEntry> heads(ograds.begin(), ograds.end());
heads.emplace_back(nnvm::NodeEntry{n, activation::kOut, 0});
#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
heads.push_back(n->inputs[activation::kData]);
const NodeAttrs& attrs = n->attrs;
// for ReLU, no need to pass input data. This enables inplace optimization during the
// forward pass.
if (dmlc::get<ActivationParam>(attrs.parsed).act_type != activation::kReLU) {
heads.push_back(n->inputs[activation::kData]);
}
#endif
return MakeGradNode(op_name, n, heads, n->attrs.dict);
}
Expand Down Expand Up @@ -74,13 +79,15 @@ void ActivationGradComputeExCPU(const nnvm::NodeAttrs& attrs,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
CHECK_EQ(inputs.size(), 3U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
bool relu = param.act_type == activation::kReLU;
CHECK_EQ(inputs.size(), relu ? 2U : 3U);
if (SupportMKLDNN(inputs[0])) {
MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs);
MKLDNNActivationBackward(attrs, ctx, inputs[0], inputs[2], req[0],
// XXX: for y = relu(x), y is passed as "in_data" to Backward()
MKLDNNActivationBackward(attrs, ctx, inputs[0], relu ? inputs[1] : inputs[2], req[0],
outputs[0]);
MKLDNN_OPCHECK_RUN(ActivationGradCompute<cpu>, attrs, ctx, inputs, req, outputs);
MKLDNN_OPCHECK_RUN(ActivationGradCompute<cpu>, attrs, ctx, inputs, req, outputs);
return;
}
ActivationGradComputeImpl<cpu>(param, ctx, inputs[0].data(), inputs[1].data(),
Expand Down Expand Up @@ -112,23 +119,29 @@ inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs,
DispatchMode* dispatch_mode,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
bool ret = false;
#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
CHECK_EQ(in_attrs->size(), 3U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
if (param.act_type != activation::kReLU) {
CHECK_EQ(in_attrs->size(), 3U);
ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
dispatch_mode,
in_attrs, out_attrs);
} else {
// for ReLU activation, the backward pass only needs ograd and output
CHECK_EQ(in_attrs->size(), 2U);
ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
dispatch_mode,
in_attrs, out_attrs);
}
#else
CHECK_EQ(in_attrs->size(), 2U);
ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
dispatch_mode,
in_attrs, out_attrs);
#endif
CHECK_EQ(out_attrs->size(), 1U);
#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
bool ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
dispatch_mode,
in_attrs, out_attrs);
#else
bool ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
dispatch_mode,
in_attrs, out_attrs);
#endif
#if MXNET_USE_MKLDNN == 1
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNAct(param)) {
*dispatch_mode = DispatchMode::kFComputeEx;
}
Expand Down Expand Up @@ -162,7 +175,12 @@ The following activation functions are supported:
.add_arguments(ActivationParam::__FIELDS__());

NNVM_REGISTER_OP(_backward_Activation)
.set_num_inputs(3)
.set_num_inputs([](const nnvm::NodeAttrs& attrs) {
int act_type = dmlc::get<ActivationParam>(attrs.parsed).act_type;
// for ReLU activation, the backward pass only needs ograd and output
if (act_type == activation::kReLU) return 2;
return 3;
})
.set_num_outputs(1)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<FInferStorageType>("FInferStorageType", BackwardActStorageType)
Expand Down
39 changes: 22 additions & 17 deletions src/operator/nn/activation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,13 @@ void ActivationCompute<gpu>(const nnvm::NodeAttrs& attrs,
CHECK_EQ(outputs.size(), 1U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);

// SoftReLU not supported by CUDNN yet
// SoftReLU and kSoftSign are both not supported by CUDNN yet
if (param.act_type == activation::kSoftReLU) {
MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
ActivationForward<gpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>(ctx,
inputs[0], req[0], outputs[0]);
});
ActivationForward<gpu, mshadow_op::softrelu, mshadow_op::softrelu_grad>(ctx,
inputs[0], req[0], outputs[0]);
} else if (param.act_type == activation::kSoftSign) {
ActivationForward<gpu, mshadow_op::softsign, mshadow_op::softsign_grad>(ctx,
inputs[0], req[0], outputs[0]);
} else {
MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
get_cudnn_op<DType>(param).Forward(ctx, inputs[0], req[0], outputs[0]);
Expand All @@ -70,24 +71,28 @@ void ActivationCompute<gpu>(const nnvm::NodeAttrs& attrs,

template<>
void ActivationGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
CHECK_EQ(inputs.size(), 3U);
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
bool relu = param.act_type == activation::kReLU;
CHECK_EQ(inputs.size(), relu ? 2U : 3U);
CHECK_EQ(outputs.size(), 1U);
CHECK_EQ(req.size(), 1U);
const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);

// SoftReLU not supported by CUDNN yet
// both SoftReLU and SoftSign not supported by CUDNN yet
if (param.act_type == activation::kSoftReLU) {
MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
ActivationBackward<gpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>(
ctx, inputs[0], inputs[1], req[0], outputs[0]);
});
ActivationBackward<gpu, mshadow_op::softrelu, mshadow_op::softrelu_grad>(
ctx, inputs[0], inputs[1], req[0], outputs[0]);
} else if (param.act_type == activation::kSoftSign) {
ActivationBackward<gpu, mshadow_op::softsign, mshadow_op::softsign_grad>(
ctx, inputs[0], inputs[1], req[0], outputs[0]);
} else {
MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
get_cudnn_op<DType>(param).Backward(ctx, inputs[0], inputs[2], inputs[1], req[0], outputs[0]);
// XXX: for y = relu(x), y is passed as "in_data" to Backward()
get_cudnn_op<DType>(param).Backward(ctx, inputs[0], relu ? inputs[1] : inputs[2],
inputs[1], req[0], outputs[0]);
});
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/operator/nn/cudnn/cudnn_activation-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ class CuDNNActivationOp {
#endif
}

// backward computation for cudnn activation operator. Note that for relu
// it's okay to pass "out_data" as "in_data", since it doesn't make any
// difference in terms of computing the gradient of relu.
void Backward(const OpContext &ctx, const TBlob &out_grad,
const TBlob &in_data, const TBlob &out_data,
const OpReqType &req, const TBlob &in_grad) {
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_act.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ void MKLDNNActivationForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
stream->Submit();
}

// For backward relu activation, it's okay to pass "out_data" as "in_data" to this
// function, since the computation only involes non-zeros.
void MKLDNNActivationBackward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
const NDArray &out_grad, const NDArray &in_data,
const OpReqType &req, const NDArray &in_grad) {
Expand Down
2 changes: 1 addition & 1 deletion src/operator/tensor/elemwise_unary_op_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ The storage type of ``relu`` output depends upon the input storage type:
.set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, false>)
.set_attr<FCompute>("FCompute<cpu>", UnaryOp::Compute<cpu, mshadow_op::relu>)
.set_attr<FComputeEx>("FComputeEx<cpu>", UnaryOp::ComputeEx<cpu, mshadow_op::relu>)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_relu"});
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_relu"});

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu,
unary_bwd<mshadow_op::relu_grad>);
Expand Down
Loading