Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Relay] crossentropy_with_logits and its gradient #4075

Merged
merged 2 commits into from
Oct 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/tvm/relay/op/_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ def _schedule_reduce(_, outs, target):
_reg.register_schedule("mean", _schedule_reduce)
_reg.register_schedule("variance", _schedule_reduce)
_reg.register_schedule("nn.cross_entropy", _schedule_reduce)
_reg.register_schedule("nn.cross_entropy_with_logits", _schedule_reduce)
9 changes: 9 additions & 0 deletions python/tvm/relay/op/_tensor_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,3 +383,12 @@ def cross_entropy_grad(orig, grad):
batch_size = take(shape, const(0, dtype='int32'), axis=0)
grad = grad / batch_size.astype('float32')
return [-grad * y / x, -grad * log(x)]


@register_gradient("nn.cross_entropy_with_logits")
def cross_entropy_with_logits_grad(orig, grad):
x, y = orig.args
shape = shape_of(x)
batch_size = take(shape, const(0, dtype='int32'), axis=0)
grad = grad / batch_size.astype('float32')
return [-grad * y, -grad * x]
9 changes: 9 additions & 0 deletions python/tvm/relay/op/nn/_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,3 +754,12 @@ def schedule_bitserial_dense(attrs, outputs, target):
def compute_cross_entropy(attrs, inputs, out_dtype, target):
x, y = inputs
return [-topi.sum(topi.log(x) * y) / x.shape[0]]


reg.register_pattern("nn.cross_entropy_with_logits", OpPattern.OPAQUE)


@reg.register_compute("nn.cross_entropy_with_logits")
def compute_cross_entropy_with_logits(attrs, inputs, out_dtype, target):
x, y = inputs
return [-topi.sum(x * y) / x.shape[0]]
19 changes: 19 additions & 0 deletions python/tvm/relay/op/nn/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,3 +1777,22 @@ def cross_entropy(predictions, targets):
The computed result.
"""
return _make.cross_entropy(predictions, targets)


def cross_entropy_with_logits(predictions, targets):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we name this cross_entropy_from_logits everywhere instead

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, on second thought, can we just add a kwarg for cross_entropy for from_logits?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a name from tensorflow, is there a reason to change the name?
I also do not think it should be a kwargs - it look like adding two functionality to one function, which is a common software antipratice.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't feel too strongly about it either way; I've been using keras interface a lot so I was thinking about keras' declaration:

keras.losses.categorical_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0)

"""CrossEntropy with logits.

Parameters
----------
predictions : tvm.relay.Expr
The predictions.

targets : tvm.relay.Expr
The targets.

Returns
-------
result : tvm.relay.Expr
The computed result.
"""
return _make.cross_entropy_with_logits(predictions, targets)
25 changes: 24 additions & 1 deletion src/relay/op/nn/nn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,7 @@ bool CrossEntropyRel(const Array<Type>& types,
return true;
}

// Positional relay function to create batch_matmul operator used by frontend FFI.
// Positional relay function to create cross_entropy operator used by frontend FFI.
Expr MakeCrossEntropy(Expr predictions, Expr targets) {
static const Op& op = Op::Get("nn.cross_entropy");
return CallNode::make(op, {predictions, targets}, Attrs(), {});
Expand All @@ -866,5 +866,28 @@ Do log on the data - do not accept logits.
.add_type_rel("CrossEntropy", CrossEntropyRel);


// Positional relay function to create cross_entropy_with_logits operator used by frontend FFI.
Expr MakeCrossEntropyWithLogits(Expr predictions, Expr targets) {
static const Op& op = Op::Get("nn.cross_entropy_with_logits");
return CallNode::make(op, {predictions, targets}, Attrs(), {});
}


TVM_REGISTER_API("relay.op.nn._make.cross_entropy_with_logits")
.set_body_typed(MakeCrossEntropyWithLogits);


RELAY_REGISTER_OP("nn.cross_entropy_with_logits")
.describe(R"code(
Computes cross entropy given predictions and targets.
Accept logits.
)code" TVM_ADD_FILELINE)
.set_num_inputs(2)
.add_argument("x", "1D Tensor", "Predictions.")
.add_argument("y", "1D Tensor", "Targets.")
.set_support_level(10)
.add_type_rel("CrossEntropy", CrossEntropyRel);


} // namespace relay
} // namespace tvm
14 changes: 11 additions & 3 deletions tests/python/relay/test_op_grad_level10.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,23 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pytest

from tvm import relay
from tvm.relay.testing import check_grad


def test_cross_entropy_grad():
x = relay.var("x", shape=(1, 5))
y = relay.var("y", shape=(1, 5))
x = relay.var("x", shape=(2, 5))
y = relay.var("y", shape=(2, 5))
check_grad(relay.Function([x, y], relay.op.nn.cross_entropy(x, y)), eps=0.01, scale=0.1, mean=1)


def test_cross_entropy_with_logits_grad():
x = relay.var("x", shape=(2, 5))
y = relay.var("y", shape=(2, 5))
check_grad(relay.Function([x, y], relay.op.nn.cross_entropy_with_logits(x, y)), eps=0.01, scale=0.1, mean=1)


if __name__ == "__main__":
test_cross_entropy_grad()
pytest.main([__file__])