Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[MXNET-1215] Allow dynamic shape exists in imperative mode (#13283)
Browse files Browse the repository at this point in the history
* Add NDArray with lazy shape, Add boolean mask operator

* Fix lints

* Address comments and refactor forward pass

* Add backward pass for boolean index

* Fix tests....

* Add unittests

* Make lint happy

* Address comments

* Address comments, and complete docstring

* Update docstring
  • Loading branch information
junrushao authored and szha committed Nov 20, 2018
1 parent cf6ae59 commit 779bdc5
Show file tree
Hide file tree
Showing 6 changed files with 320 additions and 11 deletions.
36 changes: 33 additions & 3 deletions include/mxnet/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,18 @@ class NDArray {
bool delay_alloc = true, int dtype = mshadow::default_type_flag,
std::vector<int> aux_types = {}, std::vector<TShape> aux_shapes = {},
TShape storage_shape = TShape(mshadow::Shape1(0)));

/*!
* \brief constructs a new dynamic NDArray whose shape is unknown,
* hence the NDArray is inherently lazily created
* \param ctx context of NDArray
* \param dtype data type of this ndarray
*/
explicit NDArray(Context ctx, int dtype = mshadow::default_type_flag) {
ptr_ = std::make_shared<Chunk>(TShape(mshadow::Shape1(0)), ctx, true, dtype);
dtype_ = dtype;
storage_type_ = kDefaultStorage;
entry_ = {nullptr, 0, 0};
}
/*!
* \brief constructing a static NDArray that shares data with TBlob
* Use with caution: allocate ONLY ONE NDArray for each TBlob,
Expand Down Expand Up @@ -157,7 +168,20 @@ class NDArray {
: ptr_(std::make_shared<Chunk>(stype, data, aux_data, dev_id)), shape_(shape),
dtype_(data.type_flag_), storage_type_(stype), entry_({nullptr, 0, 0}) {
}

/*!
* \brief initialize the NDArray, assuming it is not assigned a meaningful shape before
* \param shape the shape of the NDArray
*/
void Init(const TShape &shape) {
ptr_->Init(shape, this->dtype_);
this->shape_ = shape;
}
/*!
* \brief set the correct shape of NDArray directly from the storage_shape of its own chunk.
*/
void SetShapeFromChunk() {
shape_ = ptr_->storage_shape;
}
/*
* This indicates whether an array is a view of another array (created by
* reshape or slice). If an array is a view and the the data is stored in
Expand Down Expand Up @@ -960,7 +984,13 @@ class NDArray {
#endif
}
}

/*! \brief initialize the shape and dtype, assuming it is not initialized before. */
void Init(const TShape &shape, int dtype) {
auto size = shape.Size();
storage_shape = shape;
shandle.size = size * mshadow::mshadow_sizeof(dtype);
this->CheckAndAlloc();
}
inline void CheckAndAlloc(const TShape &shape, const std::vector<TShape> &aux_shapes,
int dtype) {
// calculate size, perform allocation
Expand Down
12 changes: 10 additions & 2 deletions src/imperative/imperative.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,16 @@ OpStatePtr Imperative::Invoke(
SetShapeType(ctx, attrs, inputs, outputs, &dispatch_mode);
std::vector<OpReqType> req;
SetWriteInplaceReq(inputs, outputs, &req);

return InvokeOp(ctx, attrs, inputs, outputs, req, dispatch_mode);
OpStatePtr ret = InvokeOp(ctx, attrs, inputs, outputs, req, dispatch_mode);
// the followinng loop is used for finding out the correct shape when some shapes are dynamic
for (size_t i = 0; i < outputs.size(); i++) {
if (outputs[i]->shape().ndim() == 0) {
// the WaitToRead overhead here does not seem to be avoidable
outputs[i]->WaitToRead();
outputs[i]->SetShapeFromChunk();
}
}
return ret;
}

void Imperative::MarkVariables(
Expand Down
17 changes: 11 additions & 6 deletions src/imperative/imperative_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,13 @@ inline void SetShapeType(const Context& ctx,
for (auto& i : outputs) {
out_shapes.push_back(i->shape());
}
CHECK(infershape.count(attrs.op))
<< "Operator " << attrs.op->name << " is missing FInferShape attribute";
CHECK(infershape[attrs.op](attrs, &in_shapes, &out_shapes));
CHECK_EQ(out_shapes.size(), outputs.size());

bool is_dynamic_shape_existing = false;
if (!infershape.count(attrs.op)) {
is_dynamic_shape_existing = true;
} else {
CHECK(infershape[attrs.op](attrs, &in_shapes, &out_shapes));
CHECK_EQ(out_shapes.size(), outputs.size());
}
// infer type
std::vector<int>& in_types = ret->arg_types;
in_types.clear();
Expand Down Expand Up @@ -178,7 +180,10 @@ inline void SetShapeType(const Context& ctx,
for (size_t i = 0; i < outputs.size(); ++i) {
NDArrayStorageType storage_type = static_cast<NDArrayStorageType>(out_storage_types[i]);
if (outputs[i]->is_none()) {
if (storage_type == kDefaultStorage) {
if (is_dynamic_shape_existing) {
// once there is dynamic shape somewhere, we could not pre-determine the shape.
*outputs[i] = NDArray(ctx, out_types[i]);
} else if (storage_type == kDefaultStorage) {
*outputs[i] = NDArray(out_shapes[i], ctx, true, out_types[i]);
} else {
*outputs[i] = NDArray(storage_type, out_shapes[i], ctx, true, out_types[i]);
Expand Down
134 changes: 134 additions & 0 deletions src/operator/contrib/boolean_mask-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* Copyright (c) 2018 by Contributors
* \file boolean_mask-inl.h
*/

#ifndef MXNET_OPERATOR_CONTRIB_BOOLEAN_MASK_INL_H_
#define MXNET_OPERATOR_CONTRIB_BOOLEAN_MASK_INL_H_

#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <mxnet/operator.h>
#include <mxnet/ndarray.h>
#include <map>
#include <vector>
#include <string>
#include <utility>
#include <algorithm>
#include "../operator_common.h"
#include "../mxnet_op.h"
#include "../tensor/init_op.h"
#include "../mshadow_op.h"
#include "../elemwise_op_common.h"

namespace mxnet {
namespace op {

struct BooleanMaskParam : public dmlc::Parameter<BooleanMaskParam> {
int axis;
DMLC_DECLARE_PARAMETER(BooleanMaskParam) {
DMLC_DECLARE_FIELD(axis).set_default(0)
.describe("An integer that represents the axis in NDArray to mask from.");
}
};

template<typename xpu>
inline void BooleanMaskForward(const nnvm::NodeAttrs& attrs,
const OpContext &ctx,
const std::vector<NDArray> &inputs,
const std::vector<OpReqType> &req,
const std::vector<NDArray> &outputs) {
// TODO(@junrushao1994): This implementation is a proof-of-concept,
// hence very slow actually. Performance should be improved in the future.
CHECK_EQ(inputs.size(), 2U);
CHECK_EQ(outputs.size(), 1U);
const BooleanMaskParam& param = nnvm::get<BooleanMaskParam>(attrs.parsed);
const int axis = param.axis;
const NDArray &data = inputs[0];
const NDArray &idx = inputs[1];
const NDArray &out = outputs[0];
CHECK_EQ(axis, 0) << "Not supported yet";
CHECK_EQ(data.shape()[axis], idx.shape()[0]);
CHECK_EQ(idx.shape().ndim(), 1U);
// count the number of 1s in `idx`, so that we could know the output dimension
size_t valid_num = 0;
MSHADOW_TYPE_SWITCH(idx.dtype(), DType, {
DType* idx_dptr = idx.data().dptr<DType>();
int length = idx.shape()[0];
for (int i = 0; i < length; i++) {
if (idx_dptr[i]) {
++valid_num;
}
}
});
// set the output shape forcefully
TShape s = data.shape();
s[axis] = valid_num;
const_cast<NDArray &>(out).Init(s);
// do the copy
MSHADOW_TYPE_SWITCH(idx.dtype(), DType, {
DType* idx_dptr = idx.data().dptr<DType>();
int length = idx.shape()[0];
mshadow::Stream<xpu> *stream = ctx.get_stream<xpu>();
for (int i = 0, j = 0; i < length; ++i) {
if (idx_dptr[i]) {
NDArray src = data.At(i);
NDArray dst = out.At(j++);
CHECK(src.shape() == dst.shape());
mxnet_op::copy(stream, dst.data(), src.data());
}
}
});
}

template<typename xpu>
inline void BooleanMaskBackward(const nnvm::NodeAttrs& attrs,
const OpContext &ctx,
const std::vector<NDArray> &inputs,
const std::vector<OpReqType> &req,
const std::vector<NDArray> &outputs) {
CHECK_EQ(inputs.size(), 3U);
CHECK_EQ(outputs.size(), 2U);
// inputs: {ograd, data, idx}
// outputs: {igrad_data, igrad_idx}
const NDArray& ograd = inputs[0];
const NDArray& idx = inputs[2];
const NDArray& igrad_data = outputs[0];
MSHADOW_TYPE_SWITCH(idx.dtype(), DType, {
DType* idx_dptr = idx.data().dptr<DType>();
int length = idx.shape()[0];
mshadow::Stream<xpu> *stream = ctx.get_stream<xpu>();
Fill<false>(stream, igrad_data.data(), req[0], 0);
for (int i = 0, j = 0; i < length; ++i) {
if (idx_dptr[i]) {
NDArray src = ograd.At(j++);
NDArray dst = igrad_data.At(i);
CHECK(src.shape() == dst.shape());
mxnet_op::copy(stream, dst.data(), src.data());
}
}
});
}

} // namespace op
} // namespace mxnet

#endif // MXNET_OPERATOR_CONTRIB_BOOLEAN_MASK_INL_H_
114 changes: 114 additions & 0 deletions src/operator/contrib/boolean_mask.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* Copyright (c) 2018 by Contributors
* \file boolean_mask.cc
*/

#include "./boolean_mask-inl.h"

namespace mxnet {
namespace op {

DMLC_REGISTER_PARAMETER(BooleanMaskParam);


bool BooleanMaskType(const nnvm::NodeAttrs& attrs,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(in_attrs->size(), 2);
CHECK_EQ(out_attrs->size(), 1);
TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
return out_attrs->at(0) != -1;
}

bool BooleanMaskStorageType(const nnvm::NodeAttrs& attrs,
const int dev_mask,
DispatchMode* dispatch_mode,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(in_attrs->size(), 2);
CHECK_EQ(out_attrs->size(), 1);
for (int &attr : *in_attrs) {
CHECK_EQ(attr, kDefaultStorage) << "Only default storage is supported";
}
for (int &attr : *out_attrs) {
attr = kDefaultStorage;
}
*dispatch_mode = DispatchMode::kFComputeEx;
return true;
}

bool BooleanMaskBackStorageType(const nnvm::NodeAttrs& attrs,
const int dev_mask,
DispatchMode* dispatch_mode,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(in_attrs->size(), 3);
CHECK_EQ(out_attrs->size(), 2);
for (int &attr : *in_attrs) {
CHECK_EQ(attr, kDefaultStorage) << "Only default storage is supported";
}
for (int &attr : *out_attrs) {
attr = kDefaultStorage;
}
for (size_t i = 0; i < out_attrs->size(); i++)
out_attrs->at(i) = kDefaultStorage;
*dispatch_mode = DispatchMode::kFComputeEx;
return true;
}

NNVM_REGISTER_OP(_contrib_boolean_mask)
.describe(R"code(
Experimental CPU-only support for boolean masking.
Given an n-d NDArray data, and a 1-d NDArray index,
the operator produces an un-predeterminable shaped n-d NDArray out,
which stands for the rows in x where the corresonding element in index is non-zero.
>>> data = mx.nd.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]])
>>> index = mx.nd.array([0, 1, 0])
>>> out = mx.nd.contrib.boolean_mask(data, index)
>>> out
[[4. 5. 6.]]
<NDArray 1x3 @cpu(0)>
)code" ADD_FILELINE)
.set_attr_parser(ParamParser<BooleanMaskParam>)
.set_num_inputs(2)
.set_num_outputs(1)
.set_attr<nnvm::FInferType>("FInferType", BooleanMaskType)
.set_attr<FComputeEx>("FComputeEx<cpu>", BooleanMaskForward<cpu>)
.set_attr<FInferStorageType>("FInferStorageType", BooleanMaskStorageType)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_contrib_boolean_mask"})
.add_argument("data", "NDArray-or-Symbol", "Data")
.add_argument("index", "NDArray-or-Symbol", "Mask")
.add_arguments(BooleanMaskParam::__FIELDS__());

NNVM_REGISTER_OP(_backward_contrib_boolean_mask)
.set_num_inputs(3)
.set_num_outputs(2)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<FInferStorageType>("FInferStorageType", BooleanMaskBackStorageType)
.set_attr<FComputeEx>("FComputeEx<cpu>", BooleanMaskBackward<cpu>)
.add_arguments(BooleanMaskParam::__FIELDS__());

} // namespace op
} // namespace mxnet
18 changes: 18 additions & 0 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4793,6 +4793,24 @@ def test_index_copy():
assert same(x.grad.asnumpy(), x_grad.asnumpy())
assert same(t.grad.asnumpy(), t_grad.asnumpy())


@with_seed()
def test_boolean_mask():
if default_context().device_type != 'cpu':
return
data = mx.nd.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]])
index = mx.nd.array([0, 1, 0])
data.attach_grad()
with mx.autograd.record():
out = mx.nd.contrib.boolean_mask(data, index)
out.backward()
data.grad.wait_to_read()
expected = np.array([[4, 5, 6]])
expected_grad = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]])
assert same(out.asnumpy(), expected)
assert same(data.grad.asnumpy(), expected_grad)


@with_seed()
def test_div_sqrt_dim():
data_tmp = np.random.normal(0, 1, (5, 10, 8))
Expand Down

0 comments on commit 779bdc5

Please sign in to comment.