diff --git a/3rdparty/mshadow/mshadow/extension/slice.h b/3rdparty/mshadow/mshadow/extension/slice.h
index cb2eff4548aa..d0c266284639 100644
--- a/3rdparty/mshadow/mshadow/extension/slice.h
+++ b/3rdparty/mshadow/mshadow/extension/slice.h
@@ -33,8 +33,8 @@ struct SliceExp : public TRValue<SliceExp<SrcExp,
       : src_(src), ch_begin_(begin) {
     shape_ = ShapeCheck<srcdim, SrcExp>::Check(src_);
     ch_old_ = shape_[dimslice];
-    CHECK(begin < shape_[dimslice] && end <= shape_[dimslice])
-        << "The slice went out of range";
+    CHECK(begin <= shape_[dimslice] && end <= shape_[dimslice])
+        << "The slice went out of range. ";
     shape_[dimslice] = end - begin;
   }
   template<typename E, int etype>
diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index 612017cdaff7..6c2bb8078922 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -51,7 +51,8 @@
            "logical_xor", "maximum", "minimum", "moveaxis", "modulo", "multiply", "not_equal",
            "onehot_encode", "power", "subtract", "true_divide", "waitall", "_new_empty_handle",
            "histogram", "split_v2", "to_dlpack_for_read", "to_dlpack_for_write", "from_dlpack",
-           "from_numpy"]
+           "from_numpy", "zeros", "indexing_key_expand_implicit_axes", "get_indexing_dispatch_code",
+           "get_oshape_of_gather_nd_op"]
 
 _STORAGE_TYPE_UNDEFINED = -1
 _STORAGE_TYPE_DEFAULT = 0
@@ -480,40 +481,61 @@ def __setitem__(self, key, value):
         array([[ 6.,  5.,  5.],
                [ 6.,  0.,  4.]], dtype=float32)
         """
-        if self.ndim == 0 and key == ():
-            _internal._full(shape=self.shape, value=float(value), ctx=self.context,
-                            dtype=self.dtype, out=self)
-            return
-        key = _indexing_key_expand_implicit_axes(key, self.shape)
-        slc_key = tuple(idx for idx in key if idx is not None)
+        if self.ndim == 0:
+            if not isinstance(key, (tuple, py_slice)):
+                raise IndexError('scalar tensor can only accept `()` and `:` as index')
+            if isinstance(key, tuple) and len(key) != 0:
+                raise IndexError('scalar tensor can only accept `()` and `:` as index')
+            if isinstance(value, numeric_types):
+                self._full(value)
+            elif isinstance(value, NDArray) and value.size == 1:
+                if value.shape != self.shape:
+                    value = value.reshape(self.shape)
+                value.copyto(self)
+            elif isinstance(value, (np.ndarray, np.generic)) and value.size == 1:
+                if isinstance(value, np.generic) or value.shape != self.shape:
+                    value = value.reshape(self.shape)
+                self._sync_copyfrom(value)
+            else:
+                raise ValueError('setting an array element with a sequence.')
 
-        if len(slc_key) < self.ndim:
-            raise RuntimeError(
-                'too few indices after normalization: expected `ndim` ({}) '
-                'but got {}. This is a bug, please report it!'
-                ''.format(self.ndim, len(slc_key))
-            )
-        if len(slc_key) > self.ndim:
-            raise IndexError(
-                'too many indices ({}) for array with {} dimensions'
-                ''.format(len(slc_key), self.ndim)
-            )
+        elif self.size == 0:
+            return
 
-        indexing_dispatch_code = _get_indexing_dispatch_code(slc_key)
-        if indexing_dispatch_code == _NDARRAY_BASIC_INDEXING:
-            self._set_nd_basic_indexing(slc_key, value)
-        elif indexing_dispatch_code == _NDARRAY_ADVANCED_INDEXING:
-            self._set_nd_advanced_indexing(slc_key, value)
         else:
-            raise ValueError(
-                'Indexing NDArray with index {} of type {} is not supported'
-                ''.format(key, type(key))
-            )
+            key = indexing_key_expand_implicit_axes(key, self.shape)
+            slc_key = tuple(idx for idx in key if idx is not None)
+
+            if len(slc_key) < self.ndim:
+                raise RuntimeError(
+                    'too few indices after normalization: expected `ndim` ({}) '
+                    'but got {}. This is a bug, please report it!'
+                    ''.format(self.ndim, len(slc_key))
+                )
+            if len(slc_key) > self.ndim:
+                raise IndexError(
+                    'too many indices ({}) for array with {} dimensions'
+                    ''.format(len(slc_key), self.ndim)
+                )
 
-    def __getitem__(self, key):
+            indexing_dispatch_code = get_indexing_dispatch_code(slc_key)
+            if indexing_dispatch_code == _NDARRAY_BASIC_INDEXING:
+                self._set_nd_basic_indexing(key, value)
+            elif indexing_dispatch_code == _NDARRAY_ADVANCED_INDEXING:
+                self._set_nd_advanced_indexing(key, value)
+            else:
+                raise ValueError(
+                    'Indexing NDArray with index {} of type {} is not supported'
+                    ''.format(key, type(key))
+                )
+
+    def __getitem__(self, key):  # pylint: disable=too-many-return-statements
         """x.__getitem__(i) <=> x[i]
 
-        Returns the subarray ``self[key]``.
+        Returns a sliced view of this array if the elements fetched are contiguous in memory;
+        otherwise, returns a newly created NDArray.
+        This functions supports advanced indexing defined in the following reference with
+        some restrictions.
 
         For basic indexing, i.e., if ``key`` consists only of integers,
         ``slice``, ``Ellipsis`` (``...``) and ``None``, a mutable view is
@@ -644,13 +666,41 @@ def __getitem__(self, key):
         array([[[4., 5.],
                 [6., 7.]]], dtype=float32)
         """
-        if self.ndim == 0 and key == ():
+        ndim = self.ndim
+        shape = self.shape
+
+        if ndim == 0 and (key == () or key == slice(None, None, None)):
+            return self
+
+        # Handle simple cases for higher speed
+        if isinstance(key, tuple) and len(key) == 0:
             return self
-        key = _indexing_key_expand_implicit_axes(key, self.shape)
+        if isinstance(key, tuple) and len(key) == ndim\
+                and all(isinstance(idx, integer_types) for idx in key):
+            out = self
+            for idx in key:
+                out = out[idx]
+            return out
+        if isinstance(key, integer_types):
+            if key > shape[0] - 1:
+                raise IndexError(
+                    'index {} is out of bounds for axis 0 with size {}'.format(
+                        key, shape[0]))
+            return self._at(key)
+        elif isinstance(key, py_slice):
+            if (key.step is None or key.step == 1):
+                if  key.start is not None or key.stop is not None:
+                    return self._slice(key.start, key.stop)
+                else:
+                    return self
+            elif key.step == 0:
+                raise ValueError("slice step cannot be zero")
+
+        key = indexing_key_expand_implicit_axes(key, self.shape)
         if len(key) == 0:
             raise ValueError('indexing key cannot be an empty tuple')
 
-        indexing_dispatch_code = _get_indexing_dispatch_code(key)
+        indexing_dispatch_code = get_indexing_dispatch_code(key)
         if indexing_dispatch_code == _NDARRAY_BASIC_INDEXING:
             return self._get_nd_basic_indexing(key)
         elif indexing_dispatch_code == _NDARRAY_ADVANCED_INDEXING:
@@ -658,18 +708,17 @@ def __getitem__(self, key):
         else:
             raise RuntimeError
 
-    def _prepare_value_nd(self, value, new_axes, bcast_shape):
+    def _prepare_value_nd(self, value, bcast_shape, squeeze_axes=None):
         """Return a broadcast `NDArray` with same context and dtype as ``self``.
-
-        Before broadcasting, ``new_axes`` of length 1 will be added to
-        ``value``. This is done in contrast to blindly reshaping based on
-        ``bcast_shape``, since the latter would silently ignore wrongly shaped
-        ``value`` arrays, e.g. ``nd.zeros((2, 3))[:, :1] = nd.ones(2)``.
+        For setting item, The returned `ndarray` is squeezed according to squeeze_axes since the
+        value_nd is assigned to not yet expanded space in original array.
+        `value`: numeric types or array like.
+        `bcast_shape`: a shape tuple.
+        `squeeze_axes`: a sequence of axes to squeeze in the value array.
         """
         if isinstance(value, numeric_types):
             value_nd = full(bcast_shape, value, ctx=self.context, dtype=self.dtype)
-            new_axes = []  # ignore for scalar
-        elif isinstance(value, NDArray):
+        elif type(value) == self.__class__:  # pylint: disable=unidiomatic-typecheck
             value_nd = value.as_in_context(self.context)
             if value_nd.dtype != self.dtype:
                 value_nd = value_nd.astype(self.dtype)
@@ -677,18 +726,20 @@ def _prepare_value_nd(self, value, new_axes, bcast_shape):
             try:
                 value_nd = array(value, ctx=self.context, dtype=self.dtype)
             except:
-                raise TypeError('NDArray does not support assignment with non-array-like '
-                                'object {} of type {}'.format(value, type(value)))
+                raise TypeError('{} does not support assignment with non-array-like '
+                                'object {} of type {}'.format(self.__class__, value, type(value)))
 
-        # First reshape `value_nd` to a new shape that incorporates existing
-        # axes, new axes and broadcasting axes in the right way.
-        tmp_shape = _shape_for_bcast(
-            value_nd.shape, target_ndim=len(bcast_shape), new_axes=new_axes
-        )
-        value_nd = value_nd.reshape(tmp_shape)
+        # For setitem, if there is None in indices, we need to squeeze the assigned value_nd
+        # since None is also ignored in slicing the  original array.
+        if squeeze_axes and value_nd.ndim > len(bcast_shape):
+            squeeze_axes = tuple([ax for ax in squeeze_axes if ax < len(value_nd.shape)])
+            value_nd = value_nd.squeeze(axis=tuple(squeeze_axes))
 
         if value_nd.shape != bcast_shape:
-            value_nd = value_nd.broadcast_to(bcast_shape)
+            if value_nd.size == 0:
+                value_nd = value_nd.reshape(bcast_shape)
+            else:
+                value_nd = value_nd.broadcast_to(bcast_shape)
         return value_nd
 
     # pylint: disable=invalid-name
@@ -723,24 +774,46 @@ def _basic_indexing_key_int_to_slice(idcs):
     # pylint: enable=invalid-name
 
     @staticmethod
-    def _new_axes_after_basic_indexing(axes, key_nd):
-        """Return indices of ``axes`` after slicing with ``key_nd``.
+    def _new_axes_after_basic_indexing(axes, key):
+        """Return indices of ``axes`` after slicing with ``key``.
 
         This function is used to calculate the positions where new axes should
         end up after indexing, taking into account the removal of axes by
         integer indexing.
 
-        The ``key_nd`` sequence should contain slices and integers only, no
-        ``None`` entries.
+        The ``key`` sequence should be the exapanded key including slices, integer types
+        and ``None``.
         """
-        steps = [0] + [0 if isinstance(idx, integer_types) else 1
-                       for idx in key_nd]
+        steps = [0] + [0 if isinstance(idx, integer_types) else 1 for idx in key]
         cum_steps = np.cumsum(steps)
-        axes_in_bounds = [ax for ax in axes if ax < len(cum_steps)]
-        axes_out_of_bounds = [ax for ax in axes if ax >= len(cum_steps)]
-        axes_after = tuple(cum_steps[axes_in_bounds])
-        oob_offsets = [ax - len(key_nd) for ax in axes_out_of_bounds]
-        axes_after += tuple(cum_steps[-1] + offset for offset in oob_offsets)
+        axes_after = tuple(cum_steps[axes])
+        return axes_after
+
+    @staticmethod
+    def _new_axes_after_advanced_indexing(key, adv_axs, bcast_adv_ndim, adv_are_adjacent):  # pylint: disable=invalid-name
+        """
+        Return indices of ``axes`` after slicing with ``key_nd``.
+
+        This function is used to calculate the positions where new axes should
+        end up after indexing, taking into account the removal of axes by
+        integer indexing.
+
+        The ``key`` sequence should be the exapanded key including slices, array like objects,
+        integer types and ``None``.
+        ``adv_axes`` is the sequence of indices of advanced axes.
+        ``bcast_adv_ndim`` is the number of dimensions of advanced indexing subspace.
+        ``adv_are_adjacent`` is a boolean value. Value being True means all advanced indicies are adjacent.
+
+        Note: integer indices are also considered advanced indices here.
+        """
+        new_axes = [ax for ax in range(len(key)) if key[ax] is None]
+        adv_axs_set = set(adv_axs)
+        if not adv_are_adjacent:
+            steps = [bcast_adv_ndim] + [0 if ax in adv_axs_set else 1 for ax in range(len(key))]
+        else:
+            steps = [0] + [0 if ax in adv_axs_set else 1 for ax in range(len(key))]
+        cum_steps = np.cumsum(steps)
+        axes_after = tuple(cum_steps[new_axes])
         return axes_after
 
     # pylint: disable=invalid-name
@@ -807,15 +880,24 @@ def _basic_indexing_contiguous_flat_begin_end(slc_key, shape):
     def _set_nd_basic_indexing(self, key, value):
         """This function indexes ``self`` with a tuple of ``slice`` objects only."""
         for idx in key:
-            if not isinstance(idx, (py_slice, integer_types)):
+            if idx is not None and not isinstance(idx, (py_slice, integer_types)):
                 raise RuntimeError(
                     '`key` may only contain `slice` or integer objects in the '
                     'basic implementation, got object of type {}. '
                     'This is a bug, please report it!'
                     ''.format(type(idx)))
+        key_nd = tuple(idx for idx in key if idx is not None)
         int_axes = [
-            ax for ax in range(len(key)) if isinstance(key[ax], integer_types)
+            ax for ax in range(len(key_nd)) if isinstance(key_nd[ax], integer_types)
         ]
+
+        # Check bounds for integer axes
+        for ax in int_axes:  # pylint: disable=invalid-name
+            if not -self.shape[ax] <= key_nd[ax] < self.shape[ax]:
+                raise IndexError(
+                    'index {} is out of bounds for axis {} with size {}'
+                    ''.format(key_nd[ax], ax, self.shape[ax]))
+
         begin, end, step = self._basic_indexing_key_to_begin_end_step(
             key, self.shape, keep_none=False
         )
@@ -828,10 +910,12 @@ def _set_nd_basic_indexing(self, key, value):
         begin, end, step = self._basic_indexing_key_to_begin_end_step(
             key, self.shape, keep_none=True
         )
+        none_axes = [ax for ax in range(len(key)) if key[ax] is None]
+        new_axes = self._new_axes_after_basic_indexing(none_axes, key)
 
         if can_assign_directly:
             # Easy case, overwrite whole array.
-            if isinstance(value, NDArray):
+            if type(value) == self.__class__:  # pylint: disable=unidiomatic-typecheck
                 if value.handle is not self.handle:
                     # Need to do this before `broadcast_to`.
                     tmp_shape = _shape_for_bcast(
@@ -844,17 +928,13 @@ def _set_nd_basic_indexing(self, key, value):
                     value.copyto(self)
 
             elif isinstance(value, numeric_types):
-                _internal._full(
-                    shape=self.shape, value=float(value), ctx=self.context,
-                    dtype=self.dtype, out=self
-                )
+                self._full(value)
 
             elif isinstance(value, (np.ndarray, np.generic)):
                 tmp_shape = _shape_for_bcast(
                     value.shape, target_ndim=self.ndim, new_axes=int_axes
                 )
                 value = value.reshape(tmp_shape)
-
                 if isinstance(value, np.generic) or value.shape != self.shape:
                     value = np.broadcast_to(value, self.shape)
                 self._sync_copyfrom(value)
@@ -862,20 +942,27 @@ def _set_nd_basic_indexing(self, key, value):
             else:
                 # Other array-like
                 value_nd = self._prepare_value_nd(
-                    value, new_axes=int_axes, bcast_shape=self.shape
+                    value, bcast_shape=self.shape
                 )
                 value_nd.copyto(self)
 
         elif isinstance(value, numeric_types):
-            _internal._slice_assign_scalar(
-                self, float(value), begin, end, step, out=self
-            )
+            self.slice_assign_scalar(float(value), begin, end, step)
 
         else:
+            # drop the axis of indexed_shape corresponding to int axes
+            bcast_shape = []
+            for i, size in enumerate(indexed_shape):
+                if i not in int_axes:
+                    bcast_shape.append(size)
+            if bcast_shape == []:
+                bcast_shape = [1]
+            bcast_shape = tuple(bcast_shape)
             value_nd = self._prepare_value_nd(
-                value, new_axes=int_axes, bcast_shape=indexed_shape
+                value, bcast_shape=bcast_shape, squeeze_axes=new_axes
             )
-            _internal._slice_assign(self, value_nd, begin, end, step, out=self)
+            value_nd = value_nd.reshape(indexed_shape)
+            self.slice_assign(value_nd, begin, end, step)
 
     def _get_nd_basic_indexing(self, key):
         """This function indexes ``self`` with a tuple of `slice` objects only."""
@@ -891,10 +978,12 @@ def _get_nd_basic_indexing(self, key):
                 'too many indices ({}) for array with {} dimensions'
                 ''.format(len(key_nd), self.ndim)
             )
-
-        none_axes = [ax for ax in range(len(key)) if key[ax] is None]  # pylint: disable=invalid-name
         slc_key, int_axes = self._basic_indexing_key_int_to_slice(key_nd)
-        new_axes = self._new_axes_after_basic_indexing(none_axes, key_nd)
+        none_axes = [ax for ax in range(len(key)) if key[ax] is None]
+        if none_axes:
+            new_axes = self._new_axes_after_basic_indexing(none_axes, key)
+        else:
+            new_axes = []
 
         # Check bounds for integer axes
         for ax in int_axes:  # pylint: disable=invalid-name
@@ -903,27 +992,11 @@ def _get_nd_basic_indexing(self, key):
                     'index {} is out of bounds for axis {} with size {}'
                     ''.format(key_nd[ax], ax, self.shape[ax]))
 
-        # Make sure we don't accidentally have advanced indexing or
-        # unsupported entries.
-        for idx in slc_key:
-            if not isinstance(idx, py_slice):
-                raise RuntimeError(
-                    'found object of type {} instead of `slice`. '
-                    'This is a bug, please report it!'
-                    ''.format(type(idx)))
-
         # Convert to begin, end and step, and return immediately if the slice
         # is empty
         begin, end, step = self._basic_indexing_key_to_begin_end_step(
             slc_key, self.shape, keep_none=False
         )
-        # Pylint is wrong about this
-        # pylint: disable=bad-continuation
-        if any(
-            b >= e and s > 0 or b <= e and s < 0 for b, e, s in zip(begin, end, step)
-        ):
-            return array([], self.context, self.dtype)
-        # pylint: enable=bad-continuation
 
         if self._basic_indexing_slice_is_contiguous(slc_key, self.shape):
             # Create a shared-memory view by using low-level flat slicing
@@ -967,7 +1040,6 @@ def _get_nd_basic_indexing(self, key):
         if final_shape == []:
             # Override for single element indexing
             final_shape = [1]
-
         return sliced.reshape(final_shape)
 
     @staticmethod
@@ -1056,7 +1128,10 @@ def _drop_slice_none_at_end(key):
         return tuple(key)
 
     def _get_index_nd(self, key):
-        """Return an index array for use in `scatter_nd` and `gather_nd`."""
+        """
+        Return an index array for use in `scatter_nd` and `gather_nd`,
+        and a list of positions of new_axes in ouptut shape.
+        """
         key_nd = tuple(idx for idx in key if idx is not None)
         if len(key_nd) < self.ndim:
             raise RuntimeError(
@@ -1125,6 +1200,14 @@ def _get_index_nd(self, key):
         bcast_idcs_permut_short = self._broadcast_advanced_indices(
             converted_idcs_short, block_axes=block_axs_nd
         )
+
+        # Get the ndim of advanced indexing subspace
+        converted_advanced_idcs = [
+            self._advanced_index_to_array(idx, ax_len, self.context)
+            for idx, ax_len in zip(adv_idcs_nd, [self.shape[ax] for ax in adv_axs_nd])
+        ]
+        bcast_advanced_shape = _broadcast_shapes(converted_advanced_idcs)
+
         # Undo the permutation to restore the original order
         bcast_idcs_short = [
             bcast_idcs_permut_short[ax]
@@ -1132,21 +1215,38 @@ def _get_index_nd(self, key):
             if axs_nd_permut[ax] not in dropped_axs
         ]
 
-        return op.stack(*bcast_idcs_short)
+        # Calculate where the newaxes are inserted after advanced indexing
+        new_axes_positions = self._new_axes_after_advanced_indexing(key, adv_axs,\
+                                len(bcast_advanced_shape), adv_idcs_are_adjacent)
+
+                                # if any array is numpy.ndarray, stack in numpy ndarray class.
+        for idcs in bcast_idcs_short:
+            if type(idcs) != NDArray:  # pylint: disable=unidiomatic-typecheck
+                return bcast_idcs_short, new_axes_positions
+
+        return op.stack(*bcast_idcs_short), new_axes_positions
 
     def _set_nd_advanced_indexing(self, key, value):
         """This function is called by __setitem__ when key is an advanced index."""
-        indices = self._get_index_nd(key)
-        vshape = _get_oshape_of_gather_nd_op(self.shape, indices.shape)
-        value_nd = self._prepare_value_nd(value, new_axes=[], bcast_shape=vshape)
-        _internal._scatter_set_nd(
-            lhs=self, rhs=value_nd, indices=indices, shape=self.shape, out=self
-        )
+        indices, new_axes = self._get_index_nd(key)
+        vshape = get_oshape_of_gather_nd_op(self.shape, indices.shape)
+        value_nd = self._prepare_value_nd(value, bcast_shape=vshape, squeeze_axes=new_axes)
+        self._scatter_set_nd(value_nd, indices)
 
     def _get_nd_advanced_indexing(self, key):
         """Get item when key is a tuple of any objects of the following types:
         NDArray, np.ndarray, list, tuple, slice, and integer."""
-        return op.gather_nd(self, self._get_index_nd(key))
+        slc_key, new_axes = self._get_index_nd(key)
+        sliced = op.gather_nd(self, slc_key)
+
+        # Reshape due to `None` entries in `key`.
+        if new_axes:
+            final_shape = [sliced.shape[i] for i in range(sliced.ndim)]
+            for ax in new_axes:  # pylint: disable=invalid-name
+                final_shape.insert(ax, 1)
+            return sliced.reshape(final_shape)
+        else:
+            return sliced
 
     def _sync_copyfrom(self, source_array):
         """Performs a synchronized copy from the `source_array` to the current array.
@@ -2522,6 +2622,88 @@ def copy(self):
         """
         return self.copyto(self.context)
 
+    def slice_assign_scalar(self, value, begin, end, step):
+        """
+        Assign the scalar to a cropped subset of this NDArray. Value will broadcast to the shape of the cropped shape
+        and will be cast to the same dtype of the NDArray.
+
+        Parameters
+        ----------
+        value: numeric value
+            Value and this NDArray should be of the same data type.
+            The shape of rhs should be the same as the cropped shape of this NDArray.
+        begin: tuple of begin indices
+        end: tuple of end indices
+        step: tuple of step lenghths
+
+        Returns
+        -------
+        This NDArray.
+
+        Examples
+        --------
+        >>> from mxnet import nd
+        >>> x = nd.ones((2, 2, 2))
+        >>> y = x.slice_assign_scalar(0, (0, 0, None), (1, 1, None), (None, None, None))
+        >>> y
+        [[[0. 0.]
+        [1. 1.]]
+
+        [[1. 1.]
+        [1. 1.]]]
+        <NDArray 2x2x2 @cpu(0)>
+        >>> x
+        [[[0. 0.]
+        [1. 1.]]
+
+        [[1. 1.]
+        [1. 1.]]]
+        <NDArray 2x2x2 @cpu(0)>
+
+        """
+        return _internal._slice_assign_scalar(self, value, begin=begin, end=end, step=step, out=self)
+
+    def slice_assign(self, rhs, begin, end, step):
+        """
+        Assign the rhs to a cropped subset of this NDarray in place.
+        Returns the view of this NDArray.
+
+        Parameters
+        ----------
+        rhs: NDArray.
+            rhs and this NDArray should be of the same data type, and on the same device.
+            The shape of rhs should be the same as the cropped shape of this NDArray.
+        begin: tuple of begin indices
+        end: tuple of end indices
+        step: tuple of step lenghths
+
+        Returns
+        -------
+        This NDArray.
+
+        Examples
+        --------
+        >>> x = nd.ones((2, 2, 2))
+        >>> assigned = nd.zeros((1, 1, 2))
+        >>> y = x.slice_assign(assigned, (0, 0, None), (1, 1, None), (None, None, None))
+        >>> y
+        [[[0. 0.]
+        [1. 1.]]
+
+        [[1. 1.]
+        [1. 1.]]]
+        <NDArray 2x2x2 @cpu(0)>
+        >>> x
+        [[[0. 0.]
+        [1. 1.]]
+
+        [[1. 1.]
+        [1. 1.]]]
+        <NDArray 2x2x2 @cpu(0)>
+        """
+        return _internal._slice_assign(self, rhs, begin=begin, end=end, step=step, out=self)
+
+
     def as_in_context(self, context):
         """Returns an array on the target device with the same value as this array.
 
@@ -2692,20 +2874,32 @@ def to_dlpack_for_write(self):
         """
         return to_dlpack_for_write(self)
 
+    def _full(self, value):
+        """
+        This is added as an NDArray class method in order to support polymorphism in NDArray and numpy.ndarray indexing
+        """
+        return _internal._full(self.shape, value=value, ctx=self.context, dtype=self.dtype, out=self)
+
+    def _scatter_set_nd(self, value_nd, indices):
+        """
+        This is added as an NDArray class method in order to support polymorphism in NDArray and numpy.ndarray indexing
+        """
+        return _internal._scatter_set_nd(
+            lhs=self, rhs=value_nd, indices=indices, shape=self.shape, out=self
+        )
 
-def _indexing_key_expand_implicit_axes(key, shape):
+def indexing_key_expand_implicit_axes(key, shape):
     """Make implicit axes explicit by adding ``slice(None)``.
-
     Examples
     --------
     >>> shape = (3, 4, 5)
-    >>> _indexing_key_expand_implicit_axes(np.s_[2, 1, 1], shape)
+    >>> indexing_key_expand_implicit_axes(np.s_[2, 1, 1], shape)
     (2, 1, 1)
-    >>> _indexing_key_expand_implicit_axes(np.s_[0], shape)
+    >>> indexing_key_expand_implicit_axes(np.s_[0], shape)
     (0, slice(None, None, None), slice(None, None, None))
-    >>> _indexing_key_expand_implicit_axes(np.s_[0, ...], shape)  # equivalent
+    >>> indexing_key_expand_implicit_axes(np.s_[0, ...], shape)  # equivalent
     (0, slice(None, None, None), slice(None, None, None))
-    >>> _indexing_key_expand_implicit_axes(np.s_[:2, None, 0, ...], shape)
+    >>> indexing_key_expand_implicit_axes(np.s_[:2, None, 0, ...], shape)
     (slice(None, 2, None), None, 0, slice(None, None, None))
     """
     if not isinstance(key, tuple):
@@ -2794,7 +2988,7 @@ def _is_advanced_index(idx):
         raise RuntimeError('illegal index type {}'.format(type(idx)))
 
 
-def _get_indexing_dispatch_code(key):
+def get_indexing_dispatch_code(key):
     """Returns a dispatch code for calling basic or advanced indexing functions."""
     assert isinstance(key, tuple)
 
@@ -2854,7 +3048,7 @@ def _get_index_range(start, stop, length, step=1):
     return start, stop, step
 
 
-def _get_oshape_of_gather_nd_op(dshape, ishape):
+def get_oshape_of_gather_nd_op(dshape, ishape):
     """Given data and index shapes, get the output `NDArray` shape.
     This basically implements the infer shape logic of op gather_nd."""
     assert len(dshape) > 0 and len(ishape) > 0
@@ -2865,8 +3059,9 @@ def _get_oshape_of_gather_nd_op(dshape, ishape):
 
 
 def _get_dim_size(start, stop, step):
-    """Given start, stop, and stop, calculate the number of elements
-    of this slice."""
+    """Given start, stop, and step, calculate the number of elements
+    of this slice.
+    """
     assert step != 0
     if stop == start:
         return 0
diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py
index 04b3b19bcf2e..f0785a76818e 100644
--- a/python/mxnet/ndarray/numpy/_op.py
+++ b/python/mxnet/ndarray/numpy/_op.py
@@ -27,7 +27,7 @@
 from . import _internal as _npi
 from ..ndarray import NDArray
 
-__all__ = ['zeros', 'ones', 'add', 'subtract', 'multiply', 'divide', 'mod', 'power', 'sin',
+__all__ = ['zeros', 'ones', 'full', 'add', 'subtract', 'multiply', 'divide', 'mod', 'power', 'sin',
            'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute',
            'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p',
            'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor',
@@ -103,6 +103,61 @@ def ones(shape, dtype=_np.float32, order='C', ctx=None):
     return _npi.ones(shape=shape, ctx=ctx, dtype=dtype)
 
 
+@set_module('mxnet.ndarray.numpy')
+def full(shape, fill_value, dtype=None, order='C', ctx=None, out=None):  # pylint: disable=too-many-arguments
+    """
+    Return a new array of given shape and type, filled with `fill_value`.
+    Parameters
+    ----------
+    shape : int or sequence of ints
+        Shape of the new array, e.g., ``(2, 3)`` or ``2``.
+    fill_value : scalar
+        Fill value.
+    dtype : data-type, optional
+        The desired data-type for the array. The default, `None`, means
+        `np.array(fill_value).dtype`.
+    order : {'C'}, optional
+        Whether to store multidimensional data in C- or Fortran-contiguous
+        (row- or column-wise) order in memory. Currently only supports C order.
+    ctx: to specify the device, e.g. the i-th GPU.
+    out : ndarray or None, optional
+        A location into which the result is stored.
+        If provided, it must have the same shape and dtype as input ndarray.
+        If not provided or `None`, a freshly-allocated array is returned.
+    Returns
+    -------
+    out : ndarray
+        Array of `fill_value` with the given shape, dtype, and order.
+    Notes
+    -----
+    This function differs from the original `numpy.full
+    https://docs.scipy.org/doc/numpy/reference/generated/numpy.full.html`_ in
+    the following way(s):
+    - Have an additional `ctx` argument to specify the device
+    - Have an additional `out` argument
+    - Currently does not support `order` selection
+    See Also
+    --------
+    empty : Return a new uninitialized array.
+    ones : Return a new array setting values to one.
+    zeros : Return a new array setting values to zero.
+    Examples
+    --------
+    >>> np.full((2, 2), 10)
+    array([[10., 10.],
+           [10., 10.]])
+    >>> np.full((2, 2), 2, dtype=np.int32, ctx=mx.cpu(0))
+    array([[2, 2],
+           [2, 2]], dtype=int32)
+    """
+    if order != 'C':
+        raise NotImplementedError
+    if ctx is None:
+        ctx = current_context()
+    dtype = _np.float32 if dtype is None else dtype
+    return _npi.full(shape=shape, value=fill_value, ctx=ctx, dtype=dtype, out=out)
+
+
 @set_module('mxnet.ndarray.numpy')
 def arange(start, stop=None, step=1, dtype=None, ctx=None):
     """Return evenly spaced values within a given interval.
diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py
index a47a9c01b7c4..5e7129226e34 100644
--- a/python/mxnet/numpy/multiarray.py
+++ b/python/mxnet/numpy/multiarray.py
@@ -34,16 +34,19 @@
 import warnings
 import numpy as _np
 from ..ndarray import NDArray, _DTYPE_NP_TO_MX, _GRAD_REQ_MAP
+from ..ndarray import indexing_key_expand_implicit_axes, get_indexing_dispatch_code,\
+                      get_oshape_of_gather_nd_op
 from ..ndarray._internal import _set_np_ndarray_class
 from . import _op as _mx_np_op
 from ..base import check_call, _LIB, NDArrayHandle
 from ..base import mx_real_t, c_array_buf, mx_uint, numeric_types, integer_types
+from ..context import Context
 from ..util import _sanity_check_params, set_module
 from ..context import current_context
 from ..ndarray import numpy as _mx_nd_np
 from ..ndarray.numpy import _internal as _npi
 
-__all__ = ['ndarray', 'empty', 'array', 'zeros', 'ones', 'add', 'subtract', 'multiply', 'divide',
+__all__ = ['ndarray', 'empty', 'array', 'zeros', 'ones', 'full', 'add', 'subtract', 'multiply', 'divide',
            'mod', 'power', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt',
            'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log',
            'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative',
@@ -51,6 +54,10 @@
            'tensordot', 'linspace', 'expand_dims', 'tile', 'arange', 'split', 'concatenate',
            'stack']
 
+# Return code for dispatching indexing function call
+_NDARRAY_UNSUPPORTED_INDEXING = -1
+_NDARRAY_BASIC_INDEXING = 0
+_NDARRAY_ADVANCED_INDEXING = 1
 
 # This function is copied from ndarray.py since pylint
 # keeps giving false alarm error of undefined-all-variable
@@ -101,80 +108,234 @@ def _get_index(idx):
 
 @set_module('mxnet.numpy')  # pylint: disable=invalid-name
 class ndarray(NDArray):
-    """An array object represents a multidimensional, homogeneous array of fixed-size items.
+    """
+    An array object represents a multidimensional, homogeneous array of fixed-size items.
     An associated data-type object describes the format of each element in the array
     (its byte-order, how many bytes it occupies in memory, whether it is an integer, a
     floating point number, or something else, etc.). Arrays should be constructed using
-    `array`, `zeros` or `empty`. Currently, only c-contiguous arrays are supported."""
+    `array`, `zeros` or `empty`. Currently, only c-contiguous arrays are supported.
+    """
+    def _get_np_basic_indexing(self, key):
+        """
+        This function indexes ``self`` with a tuple of `slice` objects only.
+        """
+        key_nd = tuple(idx for idx in key if idx is not None)
+        if len(key_nd) < self.ndim:
+            raise RuntimeError(
+                'too few indices after normalization: expected `ndim` ({}) '
+                'but got {}. This is a bug, please report it!'
+                ''.format(self.ndim, len(key_nd))
+            )
+        if len(key_nd) > self.ndim:
+            raise IndexError(
+                'too many indices ({}) for array with {} dimensions'
+                ''.format(len(key_nd), self.ndim)
+            )
+
+        none_axes = [ax for ax in range(len(key)) if key[ax] is None]  # pylint: disable=invalid-name
+        slc_key, int_axes = self._basic_indexing_key_int_to_slice(key_nd)
+        new_axes = self._new_axes_after_basic_indexing(none_axes, key)
+
+        # Check bounds for integer axes
+        for ax in int_axes:  # pylint: disable=invalid-name
+            if not -self.shape[ax] <= key_nd[ax] < self.shape[ax]:
+                raise IndexError(
+                    'index {} is out of bounds for axis {} with size {}'
+                    ''.format(key_nd[ax], ax, self.shape[ax]))
+
+        if self._basic_indexing_slice_is_contiguous(slc_key, self.shape):
+            # Create a shared-memory view by using low-level flat slicing
+            flat_begin, flat_end = self._basic_indexing_contiguous_flat_begin_end(
+                slc_key, self.shape
+            )
+            handle = NDArrayHandle()
+            flat_self = self.reshape_view(-1)
+            check_call(
+                _LIB.MXNDArraySlice(
+                    flat_self.handle,
+                    mx_uint(flat_begin),
+                    mx_uint(flat_end),
+                    ctypes.byref(handle),
+                )
+            )
+            sliced_shape = self._basic_indexing_sliced_shape(slc_key, self.shape)
+            sliced = self.__class__(handle=handle, writable=self.writable)
+            if 0 in sliced_shape:
+                sliced = sliced.reshape(sliced_shape)
+            else:
+                sliced = sliced.reshape_view(sliced_shape)
+
+        else:
+            begin, end, step = self._basic_indexing_key_to_begin_end_step(
+                slc_key, self.shape, keep_none=True
+            )
+            sliced = _npi.slice(self, begin, end, step)
+
+        # Reshape to final shape due to integer and `None` entries in `key`.
+        final_shape = [sliced.shape[i] for i in range(sliced.ndim) if i not in int_axes]
+        for ax in new_axes:  # pylint: disable=invalid-name
+            final_shape.insert(ax, 1)
+
+        if sliced.size == 0:
+            return sliced.reshape(tuple(final_shape))
+        else:
+            return sliced.reshape_view(tuple(final_shape))
+
+    def _get_np_advanced_indexing(self, key):
+        idcs, new_axes = self._get_index_nd(key)
+        if type(idcs) == NDArray:  # pylint: disable=unidiomatic-typecheck
+            idcs = idcs.as_np_ndarray()
+        else:
+            idcs = _npi.stack(*[i if isinstance(i, self.__class__) else i.as_np_ndarray() for i in idcs])
+        sliced = _npi.gather_nd(self, idcs)
+        # Reshape due to `None` entries in `key`.
+        if new_axes:
+            final_shape = [sliced.shape[i] for i in range(sliced.ndim)]
+            for ax in new_axes:  # pylint: disable=invalid-name
+                final_shape.insert(ax, 1)
+            return sliced.reshape(tuple(final_shape))
+        else:
+            return sliced
+
+    def _set_np_advanced_indexing(self, key, value):
+        """This function is called by __setitem__ when key is an advanced index."""
+        idcs, new_axes = self._get_index_nd(key)
+        if type(idcs) == NDArray:  # pylint: disable=unidiomatic-typecheck
+            idcs = idcs.as_np_ndarray()
+        else:
+            idcs = _npi.stack(*[i if isinstance(i, self.__class__) else i.as_np_ndarray() for i in idcs])
+        vshape = get_oshape_of_gather_nd_op(self.shape, idcs.shape)
+        value_nd = self._prepare_value_nd(value, bcast_shape=vshape, squeeze_axes=new_axes)
+        self._scatter_set_nd(value_nd, idcs)
 
     # pylint: disable=too-many-return-statements
     def __getitem__(self, key):
-        # TODO(junwu): calling base class __getitem__ is a temp solution
+        """
+        Overriding the method in NDArray class in a numpy fashion.
+        Calling numpy ndarray's _get_np_basic_indexing(key) and _get_np_advanced_indexing(key).
+        """
         ndim = self.ndim
         shape = self.shape
         if ndim == 0:
             if key != ():
                 raise IndexError('scalar tensor can only accept `()` as index')
+        # Handle simple cases for higher speed
         if isinstance(key, tuple) and len(key) == 0:
             return self
-        elif isinstance(key, tuple) and len(key) == ndim\
+        if isinstance(key, tuple) and len(key) == ndim\
                 and all(isinstance(idx, integer_types) for idx in key):
             out = self
             for idx in key:
                 out = out[idx]
             return out
-        elif isinstance(key, integer_types):
+        if isinstance(key, integer_types):
             if key > shape[0] - 1:
                 raise IndexError(
                     'index {} is out of bounds for axis 0 with size {}'.format(
                         key, shape[0]))
             return self._at(key)
         elif isinstance(key, py_slice):
-            if key.step is not None and key.step != 1:
-                if key.step == 0:
-                    raise ValueError("slice step cannot be zero")
-                return self.as_nd_ndarray().__getitem__(key).as_np_ndarray()
-            elif key.start is not None or key.stop is not None:
-                return self._slice(key.start, key.stop)
-            else:
-                return self
-
-        if isinstance(key, ndarray):
-            key = key.as_nd_ndarray()
-        elif isinstance(key, tuple):
-            key = [_get_index(idx) for idx in key]
-            key = tuple(key)
-        elif isinstance(key, list):
-            key = [_get_index(idx) for idx in key]
-        elif sys.version_info[0] > 2 and isinstance(key, range):
-            key = _get_index(key)
-        return self.as_nd_ndarray().__getitem__(key).as_np_ndarray()
-    # pylint: enable=too-many-return-statements
+            if (key.step is None or key.step == 1):
+                if  key.start is not None or key.stop is not None:
+                    return self._slice(key.start, key.stop)
+                else:
+                    return self
+            elif key.step == 0:
+                raise ValueError("slice step cannot be zero")
+
+        key = indexing_key_expand_implicit_axes(key, self.shape)
+        indexing_dispatch_code = get_indexing_dispatch_code(key)
+        if indexing_dispatch_code == _NDARRAY_BASIC_INDEXING:
+            return self._get_np_basic_indexing(key)
+        elif indexing_dispatch_code == _NDARRAY_ADVANCED_INDEXING:
+            return self._get_np_advanced_indexing(key)
+        else:
+            raise RuntimeError
 
     def __setitem__(self, key, value):
-        # TODO(junwu): calling base class __setitem__ is a temp solution
+        """
+        x.__setitem__(i, y) <=> x[i]=y
+        Sets ``self[key]`` to ``value``.
+
+        Overriding the method in NDArray class in a numpy fashion.
+        """
         if isinstance(value, NDArray) and not isinstance(value, ndarray):
             raise TypeError('Cannot assign mx.nd.NDArray to mxnet.numpy.ndarray')
         if self.ndim == 0:
             if not isinstance(key, tuple) or len(key) != 0:
                 raise IndexError('scalar tensor can only accept `()` as index')
-        if isinstance(value, ndarray):
-            value = value.as_nd_ndarray()
-        # TODO(junwu): Better handling of this situation
-        if isinstance(key, tuple) and len(key) == 0:
-            self.as_nd_ndarray().__setitem__(key, value)
-            return
-
-        if isinstance(key, ndarray):
-            key = key.as_nd_ndarray()
-        elif isinstance(key, tuple):
-            key = [_get_index(idx) for idx in key]
-            key = tuple(key)
-        elif isinstance(key, list):
-            key = [_get_index(idx) for idx in key]
-        elif sys.version_info[0] > 2 and isinstance(key, range):
-            key = _get_index(key)
-        self.as_nd_ndarray().__setitem__(key, value)
+            if isinstance(value, numeric_types):
+                self.full(value)
+            elif isinstance(value, ndarray) and value.size == 1:
+                if value.shape != self.shape:
+                    value = value.reshape(self.shape)
+                value.copyto(self)
+            elif isinstance(value, (_np.ndarray, _np.generic)) and value.size == 1:
+                if isinstance(value, _np.generic) or value.shape != self.shape:
+                    value = value.reshape(self.shape)
+                self._sync_copyfrom(value)
+            else:
+                raise ValueError('setting an array element with a sequence.')
+        else:
+            key = indexing_key_expand_implicit_axes(key, self.shape)
+            slc_key = tuple(idx for idx in key if idx is not None)
+            if len(slc_key) < self.ndim:
+                raise RuntimeError(
+                    'too few indices after normalization: expected `ndim` ({}) '
+                    'but got {}. This is a bug, please report it!'
+                    ''.format(self.ndim, len(slc_key))
+                )
+            if len(slc_key) > self.ndim and self.ndim != 0:
+                raise IndexError(
+                    'too many indices ({}) for array with {} dimensions'
+                    ''.format(len(slc_key), self.ndim)
+                )
+            indexing_dispatch_code = get_indexing_dispatch_code(slc_key)
+            if indexing_dispatch_code == _NDARRAY_BASIC_INDEXING:
+                self._set_nd_basic_indexing(key, value)  # function is inheritated from NDArray class
+            elif indexing_dispatch_code == _NDARRAY_ADVANCED_INDEXING:
+                self._set_np_advanced_indexing(key, value)
+            else:
+                raise ValueError(
+                    'Indexing NDArray with index {} of type {} is not supported'
+                    ''.format(key, type(key))
+                )
+
+    def _prepare_value_nd(self, value, bcast_shape, squeeze_axes=None):
+        """Return a broadcast `ndarray` with same context and dtype as ``self``.
+        For setting item, The returned `ndarray` is squeezed according to squeeze_axes since the
+        value_nd is assigned to not yet expanded space in original array.
+        `value`: numeric types or array like.
+        `bcast_shape`: a shape tuple.
+        `squeeze_axes`: a sequence of axes to squeeze in the value array.
+        Note: mxnet.numpy.ndarray not support NDArray as assigned value.
+        """
+        if isinstance(value, numeric_types):
+            value_nd = full(bcast_shape, value, ctx=self.context, dtype=self.dtype)
+        elif isinstance(value, self.__class__):
+            value_nd = value.as_in_context(self.context)
+            if value_nd.dtype != self.dtype:
+                value_nd = value_nd.astype(self.dtype)
+        else:
+            try:
+                value_nd = array(value, ctx=self.context, dtype=self.dtype)
+            except:
+                raise TypeError('mxnet.np.ndarray does not support assignment with non-array-like '
+                                'object {} of type {}'.format(value, type(value)))
+
+        # For advanced indexing setitem, if there is None in indices, we need to squeeze the
+        # assigned value_nd since None is also ignored in slicing the original array.
+        if squeeze_axes and value_nd.ndim > len(bcast_shape):
+            squeeze_axes = tuple([ax for ax in squeeze_axes if ax < len(value_nd.shape)])
+            value_nd = value_nd.squeeze(axis=tuple(squeeze_axes))
+
+        if value_nd.shape != bcast_shape:
+            if value_nd.size == 0:
+                value_nd = value_nd.reshape(bcast_shape)
+            else:
+                value_nd = value_nd.broadcast_to(bcast_shape)
+        return value_nd
+
 
     def __add__(self, other):
         """x.__add__(y) <=> x + y"""
@@ -550,7 +711,7 @@ def copyto(self, other):
         ``self.shape`` should be the same. This function copies the value from
         ``self`` to ``other``.
 
-        If ``other`` is a context, a new ``NDArray`` will be first created on
+        If ``other`` is a context, a new ``np.ndarray`` will be first created on
         the target context, and the value of ``self`` is copied.
 
         Parameters
@@ -560,14 +721,14 @@ def copyto(self, other):
 
         Returns
         -------
-        ndarray
+        out: ndarray
             The copied array. If ``other`` is an ``ndarray``, then the return value
             and ``other`` will point to the same ``ndarray``.
 
         Examples
         --------
         >>> x = np.ones((2,3))
-        >>> y = np.zeros((2,3), mx.gpu(0))
+        >>> y = np.zeros((2,3), ctx=mx.gpu(0))
         >>> z = x.copyto(y)
         >>> z is y
         True
@@ -576,8 +737,15 @@ def copyto(self, other):
                [ 1.,  1.,  1.]], dtype=float32)
         """
         if isinstance(other, ndarray):
-            other = other.as_nd_ndarray()
-        return self.as_nd_ndarray().copyto(other).as_np_ndarray()
+            if other.handle is self.handle:
+                warnings.warn('You are attempting to copy an array to itself', RuntimeWarning)
+                return False
+            return _npi.copyto(self, out=other)
+        elif isinstance(other, Context):
+            hret = ndarray(_new_alloc_handle(self.shape, other, True, self.dtype))
+            return _npi.copyto(self, out=hret)
+        else:
+            raise TypeError('copyto does not support type ' + str(type(other)))
 
     def asscalar(self):
         raise AttributeError('mxnet.numpy.ndarray object has no attribute asscalar')
@@ -650,6 +818,12 @@ def reshape_like(self, *args, **kwargs):
         """
         raise AttributeError('mxnet.numpy.ndarray object has no attribute reshape_like')
 
+    def reshape_view(self, *shape, **kwargs):
+        """Returns a **view** of this array with a new shape without altering any data.
+        Inheritated from NDArray.reshape.
+        """
+        return super(ndarray, self).reshape(*shape, **kwargs)
+
     def zeros_like(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`zeros_like`.
 
@@ -732,6 +906,82 @@ def slice_like(self, *args, **kwargs):
         """
         raise AttributeError('mxnet.numpy.ndarray object has no attribute slice_like')
 
+    def slice_assign_scalar(self, value, begin, end, step):
+        """
+        Assign the scalar to a cropped subset of this ndarray. Value will broadcast to the shape of the cropped shape
+        and will be cast to the same dtype of the ndarray.
+
+        Parameters
+        ----------
+        value: numeric value
+            Value and this ndarray should be of the same data type.
+            The shape of rhs should be the same as the cropped shape of this ndarray.
+        begin: tuple of begin indices
+        end: tuple of end indices
+        step: tuple of step lenghths
+
+        Returns
+        -------
+        This ndarray.
+
+        Examples
+        --------
+        >>> x = np.ones((2, 2, 2))
+        >>> y = x.slice_assign_scalar(0, (0, 0, None), (1, 1, None), (None, None, None))
+        >>> y
+        array([[[0., 0.],
+                [1., 1.]],
+
+               [[1., 1.],
+                [1., 1.]]])
+        >>> x
+        array([[[0., 0.],
+                [1., 1.]],
+
+               [[1., 1.],
+                [1., 1.]]])
+        """
+        return _npi.slice_assign_scalar(self, value, begin=begin, end=end, step=step, out=self)
+
+    def slice_assign(self, rhs, begin, end, step):
+        """
+        Assign the rhs to a cropped subset of this ndarray in place.
+        Returns the view of this ndarray.
+
+        Parameters
+        ----------
+        rhs: ndarray.
+            rhs and this NDArray should be of the same data type, and on the same device.
+            The shape of rhs should be the same as the cropped shape of this ndarray.
+        begin: tuple of begin indices
+        end: tuple of end indices
+        step: tuple of step lenghths
+
+        Returns
+        -------
+        out : ndarray
+            This ndarray.
+
+        Examples
+        --------
+        >>> x = np.ones((2, 2, 2))
+        >>> assigned = np.zeros((1, 1, 2))
+        >>> y = x.slice_assign(assigned, (0, 0, None), (1, 1, None), (None, None, None))
+        >>> y
+        array([[[0., 0.],
+                [1., 1.]],
+
+               [[1., 1.],
+                [1., 1.]]])
+        >>> x
+        array([[[0., 0.],
+                [1., 1.]],
+
+               [[1., 1.],
+                [1., 1.]]])
+        """
+        return _npi.slice_assign(self, rhs, begin=begin, end=end, step=step, out=self)
+
     def take(self, *args, **kwargs):
         """Convenience fluent method for :py:func:`take`.
 
@@ -816,7 +1066,7 @@ def sign(self, *args, **kwargs):
         The arguments are the same as for :py:func:`sign`, with
         this array as data.
         """
-        raise AttributeError('mxnet.numpy.ndarray object has no attribute abs')
+        raise AttributeError('mxnet.numpy.ndarray object has no attribute sign')
 
     def flatten(self, order='C'):  # pylint: disable=arguments-differ
         """Return a copy of the array collapsed into one dimension."""
@@ -1252,11 +1502,26 @@ def squeeze(self, axis=None):  # pylint: disable=arguments-differ
         return _mx_np_op.squeeze(self, axis=axis)
 
     def broadcast_to(self, shape):
-        raise AttributeError('mxnet.numpy.ndarray object has no attribute broadcast_to')
+        return _mx_np_op.broadcast_to(self, shape)
 
     def broadcast_like(self, other):
         raise AttributeError('mxnet.numpy.ndarray object has no attribute broadcast_like')
 
+    def _full(self, value):
+        """
+        Currently for internal use only. Implemented for __setitem__.
+        Assign to self an array of self's same shape and type, filled with value.
+        """
+        return _mx_nd_np.full(self.shape, value, ctx=self.context, dtype=self.dtype, out=self)
+
+    def _scatter_set_nd(self, value_nd, indices):
+        """
+        This is added as an ndarray class method in order to support polymorphism in NDArray and numpy.ndarray indexing
+        """
+        return _npi.scatter_set_nd(
+            lhs=self, rhs=value_nd, indices=indices, shape=self.shape, out=self
+        )
+
     @property
     def shape(self):
         return super(ndarray, self).shape
@@ -1408,6 +1673,62 @@ def ones(shape, dtype=_np.float32, order='C', ctx=None):
     return _mx_nd_np.ones(shape, dtype, order, ctx)
 
 
+@set_module('mxnet.numpy')
+def full(shape, fill_value, dtype=None, order='C', ctx=None, out=None):  # pylint: disable=too-many-arguments
+    """
+    Return a new array of given shape and type, filled with `fill_value`.
+
+    Parameters
+    ----------
+    shape : int or sequence of ints
+        Shape of the new array, e.g., ``(2, 3)`` or ``2``.
+    fill_value : scalar
+        Fill value.
+    dtype : data-type, optional
+        The desired data-type for the array. The default, `None`, means
+        `np.array(fill_value).dtype`.
+    order : {'C'}, optional
+        Whether to store multidimensional data in C- or Fortran-contiguous
+        (row- or column-wise) order in memory. Currently only supports C order.
+    ctx: to specify the device, e.g. the i-th GPU.
+    out : ndarray or None, optional
+        A location into which the result is stored.
+        If provided, it must have the same shape and dtype as input ndarray.
+        If not provided or `None`, a freshly-allocated array is returned.
+
+    Returns
+    -------
+    out : ndarray
+        Array of `fill_value` with the given shape, dtype, and order.
+
+    Notes
+    -----
+    This function differs from the original `numpy.full
+    https://docs.scipy.org/doc/numpy/reference/generated/numpy.full.html`_ in
+    the following way(s):
+
+    - Has an additional `ctx` argument to specify the device
+    - Has an additional `out` argument
+    - Currently does not support `order` selection
+
+    See Also
+    --------
+    empty : Return a new uninitialized array.
+    ones : Return a new array setting values to one.
+    zeros : Return a new array setting values to zero.
+
+    Examples
+    --------
+    >>> np.full((2, 2), 10)
+    array([[10., 10.],
+           [10., 10.]])
+    >>> np.full((2, 2), 2, dtype=np.int32, ctx=mx.cpu(0))
+    array([[2, 2],
+           [2, 2]], dtype=int32)
+    """
+    return _mx_nd_np.full(shape, fill_value, order=order, ctx=ctx, dtype=dtype, out=out)
+
+
 @set_module('mxnet.numpy')
 def add(x1, x2, out=None):
     """Add arguments element-wise.
diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py
index 251a8a1b8e56..ff20cabdb748 100644
--- a/python/mxnet/symbol/numpy/_symbol.py
+++ b/python/mxnet/symbol/numpy/_symbol.py
@@ -931,6 +931,61 @@ def ones(shape, dtype=_np.float32, order='C', ctx=None):
     return _npi.ones(shape=shape, ctx=ctx, dtype=dtype)
 
 
+@set_module('mxnet.symbol.numpy')
+def full(shape, fill_value, dtype=None, order='C', ctx=None, out=None):  # pylint: disable=too-many-arguments
+    """
+    Return a new array of given shape and type, filled with `fill_value`.
+    Parameters
+    ----------
+    shape : int or sequence of ints
+        Shape of the new array, e.g., ``(2, 3)`` or ``2``.
+    fill_value : scalar
+        Fill value.
+    dtype : data-type, optional
+        The desired data-type for the array. The default, `None`, means
+        `np.array(fill_value).dtype`.
+    order : {'C'}, optional
+        Whether to store multidimensional data in C- or Fortran-contiguous
+        (row- or column-wise) order in memory. Currently only supports C order.
+    ctx: to specify the device, e.g. the i-th GPU.
+    out : ndarray or None, optional
+        A location into which the result is stored.
+        If provided, it must have the same shape and dtype as input ndarray.
+        If not provided or `None`, a freshly-allocated array is returned.
+    Returns
+    -------
+    out : ndarray
+        Array of `fill_value` with the given shape, dtype, and order.
+    Notes
+    -----
+    This function differs from the original `numpy.full
+    https://docs.scipy.org/doc/numpy/reference/generated/numpy.full.html`_ in
+    the following way(s):
+    - Have an additional `ctx` argument to specify the device
+    - Have an additional `out` argument
+    - Currently does not support `order` selection
+    See Also
+    --------
+    empty : Return a new uninitialized array.
+    ones : Return a new array setting values to one.
+    zeros : Return a new array setting values to zero.
+    Examples
+    --------
+    >>> np.full((2, 2), 10)
+    array([[10., 10.],
+           [10., 10.]])
+    >>> np.full((2, 2), 2, dtype=np.int32, ctx=mx.cpu(0))
+    array([[2, 2],
+           [2, 2]], dtype=int32)
+    """
+    if order != 'C':
+        raise NotImplementedError
+    if ctx is None:
+        ctx = current_context()
+    dtype = _np.float32 if dtype is None else dtype
+    return _npi.full(shape=shape, value=fill_value, ctx=ctx, dtype=dtype, out=out)
+
+
 #pylint: disable= too-many-arguments, no-member, protected-access
 def _ufunc_helper(lhs, rhs, fn_array, fn_scalar, lfn_scalar, rfn_scalar=None, out=None):
     """ Helper function for element-wise operation.
diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py
index bb730fd3a007..4533729e2c15 100644
--- a/python/mxnet/test_utils.py
+++ b/python/mxnet/test_utils.py
@@ -1090,7 +1090,6 @@ def check_symbolic_forward(sym, location, expected, rtol=1E-4, atol=None,
 
     executor = sym.bind(ctx=ctx, args=location, args_grad=args_grad_data, aux_states=aux_states)
     for g in executor.grad_arrays:
-        print(g.shape)
         if g.ndim == 0:
             g[()] = 0
         else:
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index f6b67d3cb437..71f459d49185 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -543,8 +543,6 @@ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle,
   API_BEGIN();
   NDArray *arr = static_cast<NDArray*>(handle);
   mxnet::Tuple<dim_t> shape(dims, dims+ndim);
-  CHECK_GT(arr->shape().Size(), 0) << "Source ndarray's shape is undefined. Input shape: "
-    << arr->shape();
   mxnet::TShape new_shape = mxnet::op::InferReshapeShape(shape, arr->shape(), reverse);
   *ptr = arr->ReshapeWithRecord(new_shape);
   *out = ptr;
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 7fca6aa3f733..cc21dd242a2d 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -2110,6 +2110,7 @@ void CopyFromToSimple(
 // copy function is special
 // that we need to remove kAcceptEmptyMutateTarget from it
 NNVM_REGISTER_OP(_copyto)
+.add_alias("_npi_copyto")
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr<mxnet::FInferShape>("FInferShape", op::ElemwiseShape<1, 1>)
diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc
index ad4e54db54f1..21aefc5b2fd4 100644
--- a/src/operator/tensor/indexing_op.cc
+++ b/src/operator/tensor/indexing_op.cc
@@ -815,6 +815,7 @@ Examples::
 
 
 NNVM_REGISTER_OP(gather_nd)
+.add_alias("_npi_gather_nd")
 .describe(R"code(Gather elements or slices from `data` and store to a tensor whose
 shape is defined by `indices`.
 
@@ -1008,6 +1009,7 @@ Examples::
 .add_arguments(ScatterNDParam::__FIELDS__());
 
 NNVM_REGISTER_OP(_scatter_set_nd)
+.add_alias("_npi_scatter_set_nd")
 .describe(R"code(This operator has the same functionality as scatter_nd
 except that it does not reset the elements not indexed by the input
 index `NDArray` in the input data `NDArray`. output should be explicitly
diff --git a/src/operator/tensor/init_op.cc b/src/operator/tensor/init_op.cc
index 4e8900be24ca..7f60bf4e2246 100644
--- a/src/operator/tensor/init_op.cc
+++ b/src/operator/tensor/init_op.cc
@@ -82,6 +82,7 @@ NNVM_REGISTER_OP(_ones)
 .add_arguments(InitOpParam::__FIELDS__());
 
 NNVM_REGISTER_OP(_full)
+.add_alias("_npi_full")
   .describe("fill target with a scalar value")
   .set_num_inputs(0)
   .set_num_outputs(1)
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 58a535353e10..0d66907ad6cd 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -667,13 +667,15 @@ void SliceEx(const nnvm::NodeAttrs& attrs,
 }
 
 template<int ndim>
-inline void GetIndexRange(const mxnet::TShape& dshape,
+inline bool GetIndexRange(const mxnet::TShape& dshape,
                           const mxnet::Tuple<dmlc::optional<index_t>>& param_begin,
                           const mxnet::Tuple<dmlc::optional<index_t>>& param_end,
                           const mxnet::Tuple<dmlc::optional<index_t>>& param_step,
                           common::StaticArray<index_t, ndim>* begin,
                           common::StaticArray<index_t, ndim>* end,
                           common::StaticArray<index_t, ndim>* step) {
+  // Function returns false if output is zero-sized, true otherwise.
+  bool zero_size_shape = false;
   CHECK_NE(dshape.ndim(), 0U);
   CHECK_LE(param_begin.ndim(), dshape.ndim())
     << "Slicing axis exceeds data dimensions";
@@ -722,6 +724,10 @@ inline void GetIndexRange(const mxnet::TShape& dshape,
     (*begin)[i] = b;
     (*end)[i] = e;
     (*step)[i] = s;
+    // checking begin==end
+    if (b == e) {
+      zero_size_shape = true;
+    }
   }
 
   for (int i = param_begin.ndim(); i < dshape.ndim(); ++i) {
@@ -729,6 +735,8 @@ inline void GetIndexRange(const mxnet::TShape& dshape,
     (*end)[i] = dshape[i];
     (*step)[i] = 1;
   }
+
+  return zero_size_shape;
 }
 
 inline void SetSliceOpOutputDimSize(const mxnet::TShape& dshape,
@@ -973,7 +981,7 @@ inline bool SliceAssignOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 2U);
   CHECK_EQ(out_attrs->size(), 1U);
   const mxnet::TShape& dshape = (*in_attrs)[0];
-  if (dshape.ndim() == 0U || dshape.Size() == 0U) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
   mxnet::TShape vshape = dshape;  // vshape is the value shape on the right hand side
   const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed);
   MXNET_NDIM_SWITCH(dshape.ndim(), ndim, {
@@ -1016,7 +1024,11 @@ void SliceAssignOpForward(const nnvm::NodeAttrs& attrs,
   const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed);
   MXNET_NDIM_SWITCH(data.ndim(), ndim, {
     common::StaticArray<index_t, ndim> begin, end, step;
-    GetIndexRange(data.shape_, param.begin, param.end, param.step, &begin, &end, &step);
+    bool zero_size_shape = GetIndexRange(data.shape_, param.begin, param.end, param.step,
+                                        &begin, &end, &step);
+    if (zero_size_shape) {
+      return;  // slice_assign of zero-sized subspace needs no operation.
+    }
     MSHADOW_TYPE_SWITCH(out.type_flag_, DType, {
       MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
         int num_threads = val.shape_.FlatTo2D()[0];
@@ -1117,7 +1129,11 @@ void SliceAssignScalarOpForward(const nnvm::NodeAttrs& attrs,
   const SliceAssignScalarParam& param = nnvm::get<SliceAssignScalarParam>(attrs.parsed);
   MXNET_NDIM_SWITCH(data.ndim(), ndim, {
     common::StaticArray<index_t, ndim> begin, end, step;
-    GetIndexRange(data.shape_, param.begin, param.end, param.step, &begin, &end, &step);
+    bool zero_size_shape = GetIndexRange(data.shape_, param.begin, param.end, param.step,
+                                        &begin, &end, &step);
+    if (zero_size_shape) {
+      return;  // slice_assign of zero-sized subspaced needs no operation.
+    }
     for (index_t i = 0; i < param.begin.ndim(); ++i) {
       const int b = begin[i], e = end[i], s = step[i];
       SetSliceOpOutputDimSize(data.shape_, i, b, e, s, &vshape);
@@ -1250,6 +1266,9 @@ void SliceAxisGrad_(const nnvm::NodeAttrs& attrs,
                 const std::vector<TBlob>& inputs,
                 const std::vector<OpReqType>& req,
                 const std::vector<TBlob>& outputs) {
+  if (outputs[0].shape_.Size() == 0) {
+    return;
+  }
   const SliceAxisParam& param = nnvm::get<SliceAxisParam>(attrs.parsed);
   using namespace mshadow::op;
   using namespace mshadow::expr;
@@ -1258,7 +1277,6 @@ void SliceAxisGrad_(const nnvm::NodeAttrs& attrs,
   index_t begin, end;
   GetSliceAxisParams(param, outputs[0].shape_, &axis, &begin, &end);
   int ndim = outputs[0].shape_.ndim();
-
   if (axis + 1 == ndim) {
     MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
         mshadow::Tensor<xpu, 2, DType> ograd =
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index f02a38ac07c4..d35c364fba47 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -507,6 +507,7 @@ Example::
                                                             [1.,  3.]]
 )code" ADD_FILELINE)
 .add_alias("_npx_slice")
+.add_alias("_npi_slice")
 .set_attr_parser(ParamParser<SliceParam>)
 .set_attr<mxnet::FInferShape>("FInferShape", SliceOpShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
@@ -531,6 +532,7 @@ NNVM_REGISTER_OP(_backward_slice)
 
 NNVM_REGISTER_OP(_slice_assign)
 .add_alias("_crop_assign")
+.add_alias("_npi_slice_assign")
 .MXNET_DESCRIBE("Assign the rhs to a cropped subset of lhs.\n\n"
 "Requirements\n"
 "------------\n"
@@ -556,6 +558,7 @@ NNVM_REGISTER_OP(_slice_assign)
 
 NNVM_REGISTER_OP(_slice_assign_scalar)
 .add_alias("_crop_assign_scalar")
+.add_alias("_npi_slice_assign_scalar")
 .MXNET_DESCRIBE("(Assign the scalar to a cropped subset of the input.\n\n"
 "Requirements\n"
 "------------\n"
diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py
index 87d3a41fd5bf..e15240c30dd3 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -141,12 +141,6 @@ def test_ndarray_setitem():
     x_np[:, -3:-1, -2:-1] = 1
     assert same(x.asnumpy(), x_np)
 
-    # Scalar array, no assignment allowed
-    with mx.np_shape():
-        x = mx.nd.zeros(())
-        with assert_raises(IndexError):
-            x[:] = 1
-
     # Assignments for empty axes
     for trivial_shape in [(1,), (1, 1), (1, 1, 1)]:
         x = mx.nd.zeros(trivial_shape)
@@ -1387,7 +1381,6 @@ def assert_same(np_array, np_index, mx_array, mx_index, mx_value, np_value=None)
             except Exception as e:
                 print('Failed with index = {}, value.shape = {}'.format(mx_index, mx_value.shape))
                 raise e
-
             assert same(np_array, mx_array.asnumpy())
 
         np_index = index
@@ -1476,9 +1469,12 @@ def convert(num):
     np_array = np.arange(np.prod(shape), dtype='int32').reshape(shape)
     # index_list is a list of tuples. The tuple's first element is the index, the second one is a boolean value
     # indicating whether we should expect the result as a scalar compared to numpy.
-    index_list = [(0, False), (np.int32(0), False), (np.int64(0), False),
+    index_list = [# Basic indexing
+                  # Single int as index
+                  (0, False), (np.int32(0), False), (np.int64(0), False),
                   (5, False), (np.int32(5), False), (np.int64(5), False),
                   (-1, False), (np.int32(-1), False), (np.int64(-1), False),
+                  # Slicing as index
                   (slice(5), False), (np_int(slice(5), np.int32), False), (np_int(slice(5), np.int64), False),
                   (slice(1, 5), False), (np_int(slice(1, 5), np.int32), False), (np_int(slice(1, 5), np.int64), False),
                   (slice(1, 5, 2), False), (np_int(slice(1, 5, 2), np.int32), False),
@@ -1499,6 +1495,7 @@ def convert(num):
                   (np_int(slice(None, None, -1)), False), (np_int(slice(None, None, -1), np.int64), False),
                   (slice(None, None, -2), False),
                   (np_int(slice(None, None, -2), np.int32), False), (np_int(slice(None, None, -2), np.int64), False),
+                  # slice(None) as indices
                   ((slice(None), slice(None), 1, 8), False),
                   ((slice(None), slice(None), -1, 8), False),
                   ((slice(None), slice(None), 1, -8), False),
@@ -1511,6 +1508,7 @@ def convert(num):
                   ((slice(None), 2, slice(1, 5), 1), False),
                   (np_int((slice(None), 2, slice(1, 5), 1)), False),
                   (np_int((slice(None), 2, slice(1, 5), 1), np.int64), False),
+                  # Multiple ints as indices
                   ((1, 2, 3), False),
                   (np_int((1, 2, 3)), False),
                   (np_int((1, 2, 3), np.int64), False),
@@ -1535,6 +1533,19 @@ def convert(num):
                   ((slice(1, 8, 2), 1, slice(3, 8), 2), False),
                   (np_int((slice(1, 8, 2), 1, slice(3, 8), 2)), False),
                   (np_int((slice(1, 8, 2), 1, slice(3, 8), 2), np.int64), False),
+                  # Test Ellipsis ('...')
+                  ((1, Ellipsis, -1), False),
+                  ((slice(2), Ellipsis, None, 0), False),
+                  # Test basic indexing with newaxis
+                  (None, False),
+                  ((1, None, -2, 3, -4), False),
+                  ((1, slice(2, 5), None), False), 
+                  ((slice(None), slice(1, 4), None, slice(2, 3)), False), 
+                  ((slice(1, 3), slice(1, 3), slice(1, 3), slice(1, 3), None), False), 
+                  ((slice(1, 3), slice(1, 3), None, slice(1, 3), slice(1, 3)), False), 
+                  ((None, slice(1, 2), 3, None), False),
+                  ((1, None, 2, 3, None, None, 4), False),
+                  # Advanced indexing
                   ([1], False), ([1, 2], False), ([2, 1, 3], False), ([7, 5, 0, 3, 6, 2, 1], False),
                   (np.array([6, 3], dtype=np.int32), False),
                   (np.array([[3, 4], [0, 6]], dtype=np.int32), False),
@@ -1571,16 +1582,15 @@ def convert(num):
                   (([[[[1]]]], [[2], [12]], slice(0, 3), slice(None)), False),
                   (([1, 2], slice(3, 5), [2, 3], [3, 4]), False),
                   (([1, 2], slice(3, 5), (2, 3), [3, 4]), False),
-                  ((1, Ellipsis, -1), False),
-                  ((slice(2), Ellipsis, None, 0), False),
-                  (None, False),
-                  ((1, None, -2, 3, -4), False),
-                  # TODO(zoeygxy): Support None in advanced indexing
-                  # (([1, 2], slice(3, 5), None, None, [3, 4]), False),
-                  # ((slice(None), slice(3, 5), None, None, [2, 3], [3, 4]), False),
-                  # ((slice(None), slice(3, 5), None, [2, 3], None, [3, 4]), False),
-                  # ((None, slice(None), slice(3, 5), [2, 3], None, [3, 4]), False),
+                  # Advanced indexing with None
+                  (([1, 2], slice(3, 5), None, None, [3, 4]), False),
+                  ((slice(None), slice(3, 5), None, None, [2, 3], [3, 4]), False),
+                  ((slice(None), slice(3, 5), None, [2, 3], None, [3, 4]), False),
+                  ((None, slice(None), slice(3, 5), [2, 3], None, [3, 4]), False),
+                  ((None, slice(None), None, slice(3, 5), [2, 3], None, [3, 4]), False),
+                  (([2, 3, 4], None, [3, 4, 6], None, slice(1, 2), None, [1, 2, 3]), False),
     ]
+
     for index in index_list:
         test_getitem(np_array, index[0], index[1])
         test_setitem(np_array, index[0], index[1])
diff --git a/tests/python/unittest/test_numpy_ndarray.py b/tests/python/unittest/test_numpy_ndarray.py
index 7fa05206ac09..6dd7b43cd82c 100644
--- a/tests/python/unittest/test_numpy_ndarray.py
+++ b/tests/python/unittest/test_numpy_ndarray.py
@@ -377,26 +377,54 @@ def test_np_ndarray_copy():
 @with_seed()
 @use_np
 def test_np_ndarray_indexing():
+    """
+    Test all indexing.
+    """
+    def np_int(index, int_type=np.int32):
+        """
+        Helper function for testing indexing that converts slices to slices of ints or None, and tuples to 
+        tuples of ints or None.
+        """
+        def convert(num):
+            if num is None:
+                return num
+            else:
+                return int_type(num)
+
+        if isinstance(index, slice):
+            return slice(convert(index.start), convert(index.stop), convert(index.step))
+        elif isinstance(index, tuple):  # tuple of slices and integers
+            ret = []
+            for elem in index:
+                if isinstance(elem, slice):
+                    ret.append(slice(convert(elem.start), convert(elem.stop), convert(elem.step)))
+                else:
+                    ret.append(convert(elem))
+            return tuple(ret)
+        else:
+            assert False
+
+    # Copied from test_ndarray.py. Under construction. 
     def test_getitem(np_array, index):
-        """`is_scalar` indicates whether we should expect a scalar for the result.
-        If so, the indexed array of NDArray should call asscalar to compare
-        with numpy's indexed array."""
         np_index = index
+        if type(index) == mx.nd.NDArray:  # use of NDArray is prohibited
+            assert False
         if isinstance(index, np.ndarray):
             np_index = index.asnumpy()
         if isinstance(index, tuple):
-            np_index = []
-            for idx in index:
-                if isinstance(idx, np.ndarray):
-                    np_index.append(idx.asnumpy())
-                else:
-                    np_index.append(idx)
-            np_index = tuple(np_index)
-
+            np_index = tuple([
+                idx.asnumpy() if isinstance(idx, mx.nd.NDArray) else idx
+                for idx in index]
+            )
         np_indexed_array = np_array[np_index]
-        mx_array = np.array(np_array, dtype=np_array.dtype)
-        mx_indexed_array = mx_array[index].asnumpy()
-        assert same(np_indexed_array, mx_indexed_array), 'Failed with index=%s' % str(index)
+        mx_np_array = np.array(np_array, dtype=np_array.dtype)
+        try:
+            mx_indexed_array = mx_np_array[index]
+        except Exception as e:
+            print('Failed with index = {}'.format(index))
+            raise e
+        mx_indexed_array = mx_indexed_array.asnumpy()
+        assert same(np_indexed_array, mx_indexed_array), 'Failed with index = {}'.format(index)
 
     def test_setitem(np_array, index):
         def assert_same(np_array, np_index, mx_array, mx_index, mx_value, np_value=None):
@@ -406,10 +434,15 @@ def assert_same(np_array, np_index, mx_array, mx_index, mx_value, np_value=None)
                 np_array[np_index] = mx_value.asnumpy()
             else:
                 np_array[np_index] = mx_value
-            mx_array[mx_index] = mx_value
+            try:
+                mx_array[mx_index] = mx_value
+            except Exception as e:
+                print('Failed with index = {}, value.shape = {}'.format(mx_index, mx_value.shape))
+                raise e
+
             assert same(np_array, mx_array.asnumpy())
 
-        np_index = index
+        np_index = index  # keep this native numpy type
         if isinstance(index, np.ndarray):
             np_index = index.asnumpy()
         if isinstance(index, tuple):
@@ -421,29 +454,33 @@ def assert_same(np_array, np_index, mx_array, mx_index, mx_value, np_value=None)
                     np_index.append(idx)
             np_index = tuple(np_index)
 
-        mx_array = np.array(np_array, dtype=np_array.dtype)
-        np_array = mx_array.asnumpy()
+        mx_array = np.array(np_array, dtype=np_array.dtype)  # mxnet.np.ndarray
+        np_array = mx_array.asnumpy()  # native numpy array
         indexed_array_shape = np_array[np_index].shape
         np_indexed_array = _np.random.randint(low=-10000, high=0, size=indexed_array_shape)
-        # test value is a numpy array without broadcast
+        # test value is a native numpy array without broadcast
         assert_same(np_array, np_index, mx_array, index, np_indexed_array)
+        # test value is a mxnet numpy array without broadcast
+        assert_same(np_array, np_index, mx_array, index, np.array(np_indexed_array))
         # test value is an numeric_type
         assert_same(np_array, np_index, mx_array, index, _np.random.randint(low=-10000, high=0))
         if len(indexed_array_shape) > 1:
-            # test ndarray with broadcast
-            assert_same(np_array, np_index, mx_array, index,
-                        _np.random.uniform(low=-10000, high=0, size=(indexed_array_shape[-1],)))
-            # test numpy array with broadcast
-            assert_same(np_array, np_index, mx_array, index,
-                        _np.random.randint(low=-10000, high=0, size=(indexed_array_shape[-1],)))
+            np_value = _np.random.randint(low=-10000, high=0, size=(indexed_array_shape[-1],))
+            # test mxnet ndarray with broadcast
+            assert_same(np_array, np_index, mx_array, index, np.array(np_value))
+            # test native numpy array with broadcast
+            assert_same(np_array, np_index, mx_array, index, np_value)
             # test list with broadcast
             assert_same(np_array, np_index, mx_array, index,
                         [_np.random.randint(low=-10000, high=0)] * indexed_array_shape[-1])
 
     def test_getitem_autograd(np_array, index):
+        """
+        np_array: native numpy array.
+        """
         x = np.array(np_array, dtype=np_array.dtype)
         x.attach_grad()
-        with autograd.record():
+        with mx.autograd.record():
             y = x[index]
         y.backward()
         value = np.ones_like(y)
@@ -452,177 +489,203 @@ def test_getitem_autograd(np_array, index):
         assert same(x_grad.asnumpy(), x.grad.asnumpy())
 
     def test_setitem_autograd(np_array, index):
+        """
+        np_array: native numpy array.
+        """
         x = np.array(np_array, dtype=np_array.dtype)
         out_shape = x[index].shape
         y = np.array(_np.random.uniform(size=out_shape))
         y.attach_grad()
         try:
-            with autograd.record():
+            with mx.autograd.record():
                 x[index] = y
-                assert False  # should not reach here
+                x.backward()
+                y_grad = np.ones_like(y)
+                assert same(y_grad.asnumpy(), y.grad.asnumpy())
         except mx.base.MXNetError as err:
             assert str(err).find('Inplace operations (+=, -=, x[:]=, etc) are not supported when recording with') != -1
 
-    def np_int(index, int_type=_np.int32):
-        def convert(num):
-            if num is None:
-                return num
-            else:
-                return int_type(num)
-
-        if isinstance(index, slice):
-            return slice(convert(index.start), convert(index.stop), convert(index.step))
-        elif isinstance(index, tuple):  # tuple of slices and integers
-            ret = []
-            for elem in index:
-                if isinstance(elem, slice):
-                    ret.append(slice(convert(elem.start), convert(elem.stop), convert(elem.step)))
-                else:
-                    ret.append(convert(elem))
-            return tuple(ret)
-        else:
-            assert False
-
     shape = (8, 16, 9, 9)
-    np_array = _np.arange(_np.prod(shape), dtype='int32').reshape(shape)
+    np_array = _np.arange(_np.prod(_np.array(shape)), dtype='int32').reshape(shape)  # native np array
+    
+    # Test sliced output being ndarray:
     index_list = [
-        (),
-        0,
-        _np.int32(0),
-        _np.int64(0),
-        5,
-        _np.int32(5),
-        _np.int64(5),
-        -1,
-        _np.int32(-1),
-        _np.int64(-1),
-        slice(5),
-        np_int(slice(5), _np.int32),
-        np_int(slice(5), _np.int64),
-        slice(1, 5),
-        np_int(slice(1, 5), _np.int32),
-        np_int(slice(1, 5), _np.int64),
-        slice(1, 5, 2),
-        np_int(slice(1, 5, 2), _np.int32),
-        np_int(slice(1, 5, 2), _np.int64),
-        slice(7, 0, -1),
-        np_int(slice(7, 0, -1)),
-        np_int(slice(7, 0, -1), _np.int64),
-        slice(None, 6),
-        np_int(slice(None, 6)),
-        np_int(slice(None, 6), _np.int64),
-        slice(None, 6, 3),
-        np_int(slice(None, 6, 3)),
-        np_int(slice(None, 6, 3), _np.int64),
-        slice(1, None),
-        np_int(slice(1, None)),
-        np_int(slice(1, None), _np.int64),
-        slice(1, None, 3),
-        np_int(slice(1, None, 3)),
-        np_int(slice(1, None, 3), _np.int64),
-        slice(None, None, 2),
-        np_int(slice(None, None, 2)),
-        np_int(slice(None, None, 2), _np.int64),
-        slice(None, None, -1),
-        np_int(slice(None, None, -1)),
-        np_int(slice(None, None, -1), _np.int64),
-        slice(None, None, -2),
-        np_int(slice(None, None, -2), _np.int32),
-        np_int(slice(None, None, -2), _np.int64),
-        (slice(None), slice(None), 1, 8),
-        (slice(None), slice(None), -1, 8),
-        (slice(None), slice(None), 1, -8),
-        (slice(None), slice(None), -1, -8),
-        np_int((slice(None), slice(None), 1, 8)),
-        np_int((slice(None), slice(None), 1, 8), _np.int64),
-        (slice(None), slice(None), 1, 8),
-        np_int((slice(None), slice(None), -1, -8)),
-        np_int((slice(None), slice(None), -1, -8), _np.int64),
-        (slice(None), 2, slice(1, 5), 1),
-        np_int((slice(None), 2, slice(1, 5), 1)),
-        np_int((slice(None), 2, slice(1, 5), 1), _np.int64),
-        (1, 2, 3),
-        np_int((1, 2, 3)),
-        np_int((1, 2, 3), _np.int64),
-        (-1, -2, -3),
-        np_int((-1, -2, -3)),
-        np_int((-1, -2, -3), _np.int64),
-        (1, 2, 3, 4),
-        np_int((1, 2, 3, 4)),
-        np_int((1, 2, 3, 4), _np.int64),
-        (-4, -3, -2, -1),
-        np_int((-4, -3, -2, -1)),
-        np_int((-4, -3, -2, -1), _np.int64),
-        (slice(None, None, -1), 2, slice(1, 5), 1),
-        np_int((slice(None, None, -1), 2, slice(1, 5), 1)),
-        np_int((slice(None, None, -1), 2, slice(1, 5), 1), _np.int64),
-        (slice(None, None, -1), 2, slice(1, 7, 2), 1),
-        np_int((slice(None, None, -1), 2, slice(1, 7, 2), 1)),
-        np_int((slice(None, None, -1), 2, slice(1, 7, 2), 1), _np.int64),
-        (slice(1, 8, 2), slice(14, 2, -2), slice(3, 8), slice(0, 7, 3)),
-        np_int((slice(1, 8, 2), slice(14, 2, -2), slice(3, 8), slice(0, 7, 3))),
-        np_int((slice(1, 8, 2), slice(14, 2, -2), slice(3, 8), slice(0, 7, 3)), _np.int64),
-        (slice(1, 8, 2), 1, slice(3, 8), 2),
-        np_int((slice(1, 8, 2), 1, slice(3, 8), 2)),
-        np_int((slice(1, 8, 2), 1, slice(3, 8), 2), _np.int64),
-        [1],
-        [1, 2],
-        [2, 1, 3],
-        [7, 5, 0, 3, 6, 2, 1],
-        _np.array([6, 3], dtype=_np.int32),
-        _np.array([[3, 4], [0, 6]], dtype=_np.int32),
-        _np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=_np.int32),
-        _np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=_np.int64),
-        _np.array([[2], [0], [1]], dtype=_np.int32),
-        _np.array([[2], [0], [1]], dtype=_np.int64),
-        np.array([4, 7], dtype=_np.int32),
-        np.array([4, 7], dtype=_np.int64),
-        np.array([[3, 6], [2, 1]], dtype=_np.int32),
-        np.array([[3, 6], [2, 1]], dtype=_np.int64),
-        np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=_np.int32),
-        np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=_np.int64),
-        (1, [2, 3]),
-        (1, [2, 3], _np.array([[3], [0]], dtype=_np.int32)),
-        (1, [2, 3]),
-        (1, [2, 3], _np.array([[3], [0]], dtype=_np.int64)),
-        (1, [2], _np.array([[5], [3]], dtype=_np.int32), slice(None)),
-        (1, [2], _np.array([[5], [3]], dtype=_np.int64), slice(None)),
-        (1, [2, 3], _np.array([[6], [0]], dtype=_np.int32), slice(2, 5)),
-        (1, [2, 3], _np.array([[6], [0]], dtype=_np.int64), slice(2, 5)),
-        (1, [2, 3], _np.array([[4], [7]], dtype=_np.int32), slice(2, 5, 2)),
-        (1, [2, 3], _np.array([[4], [7]], dtype=_np.int64), slice(2, 5, 2)),
-        (1, [2], _np.array([[3]], dtype=_np.int32), slice(None, None, -1)),
-        (1, [2], _np.array([[3]], dtype=_np.int64), slice(None, None, -1)),
-        (1, [2], _np.array([[3]], dtype=_np.int32), np.array([[5, 7], [2, 4]], dtype=_np.int64)),
-        (1, [2], np.array([[4]], dtype=_np.int32), np.array([[1, 3], [5, 7]], dtype='int64')),
-        [0],
-        [0, 1],
-        [1, 2, 3],
-        [2, 0, 5, 6],
-        ([1, 1], [2, 3]),
-        ([1], [4], [5]),
-        ([1], [4], [5], [6]),
-        ([[1]], [[2]]),
-        ([[1]], [[2]], [[3]], [[4]]),
-        (slice(0, 2), [[1], [6]], slice(0, 2), slice(0, 5, 2)),
-        ([[[[1]]]], [[1]], slice(0, 3), [1, 5]),
-        ([[[[1]]]], 3, slice(0, 3), [1, 3]),
-        ([[[[1]]]], 3, slice(0, 3), 0),
-        ([[[[1]]]], [[2], [12]], slice(0, 3), slice(None)),
-        ([1, 2], slice(3, 5), [2, 3], [3, 4]),
-        ([1, 2], slice(3, 5), (2, 3), [3, 4]),
-        range(4),
-        range(3, 0, -1),
-        (range(4,), [1]),
-        # slice(0, 0) does not support output zero-size tensor yet
+                # Basic indexing
+                # Single int as index
+                0,
+                np.int32(0),
+                np.int64(0),
+                5,
+                np.int32(5),
+                np.int64(5),
+                -1, 
+                np.int32(-1), 
+                np.int64(-1),
+                # Slicing as index
+                slice(5),
+                np_int(slice(5), np.int32),
+                np_int(slice(5), np.int64),
+                slice(1, 5),
+                np_int(slice(1, 5), np.int32),
+                np_int(slice(1, 5), np.int64),
+                slice(1, 5, 2),
+                np_int(slice(1, 5, 2), np.int32),
+                np_int(slice(1, 5, 2), np.int64),
+                slice(7, 0, -1),
+                np_int(slice(7, 0, -1)),
+                np_int(slice(7, 0, -1), np.int64),
+                slice(None, 6),
+                np_int(slice(None, 6)),
+                np_int(slice(None, 6), np.int64),
+                slice(None, 6, 3),
+                np_int(slice(None, 6, 3)),
+                np_int(slice(None, 6, 3), np.int64),
+                slice(1, None),
+                np_int(slice(1, None)),
+                np_int(slice(1, None), np.int64),
+                slice(1, None, 3),
+                np_int(slice(1, None, 3)),
+                np_int(slice(1, None, 3), np.int64),
+                slice(None, None, 2),
+                np_int(slice(None, None, 2)),
+                np_int(slice(None, None, 2), np.int64),
+                slice(None, None, -1),
+                np_int(slice(None, None, -1)),
+                np_int(slice(None, None, -1), np.int64),
+                slice(None, None, -2),
+                np_int(slice(None, None, -2), np.int32),
+                np_int(slice(None, None, -2), np.int64),
+                # Multiple ints as indices
+                (1, 2, 3),
+                np_int((1, 2, 3)),
+                np_int((1, 2, 3), np.int64),
+                (-1, -2, -3),
+                np_int((-1, -2, -3)),
+                np_int((-1, -2, -3), np.int64),
+                (1, 2, 3, 4),
+                np_int((1, 2, 3, 4)),
+                np_int((1, 2, 3, 4), np.int64),
+                (-4, -3, -2, -1),
+                np_int((-4, -3, -2, -1)),
+                np_int((-4, -3, -2, -1), np.int64),
+                # slice(None) as indices
+                (slice(None), slice(None), 1, 8),
+                (slice(None), slice(None), -1, 8),
+                (slice(None), slice(None), 1, -8),
+                (slice(None), slice(None), -1, -8),
+                np_int((slice(None), slice(None), 1, 8)),
+                np_int((slice(None), slice(None), 1, 8), np.int64),
+                (slice(None), slice(None), 1, 8),
+                np_int((slice(None), slice(None), -1, -8)),
+                np_int((slice(None), slice(None), -1, -8), np.int64),
+                (slice(None), 2, slice(1, 5), 1),
+                np_int((slice(None), 2, slice(1, 5), 1)),
+                np_int((slice(None), 2, slice(1, 5), 1), np.int64),
+                # Mixture of ints and slices as indices
+                (slice(None, None, -1), 2, slice(1, 5), 1),
+                np_int((slice(None, None, -1), 2, slice(1, 5), 1)),
+                np_int((slice(None, None, -1), 2, slice(1, 5), 1), np.int64),
+                (slice(None, None, -1), 2, slice(1, 7, 2), 1),
+                np_int((slice(None, None, -1), 2, slice(1, 7, 2), 1)),
+                np_int((slice(None, None, -1), 2, slice(1, 7, 2), 1), np.int64),
+                (slice(1, 8, 2), slice(14, 2, -2), slice(3, 8), slice(0, 7, 3)),
+                np_int((slice(1, 8, 2), slice(14, 2, -2), slice(3, 8), slice(0, 7, 3))),  
+                np_int((slice(1, 8, 2), slice(14, 2, -2), slice(3, 8), slice(0, 7, 3)), np.int64),  
+                (slice(1, 8, 2), 1, slice(3, 8), 2),  
+                np_int((slice(1, 8, 2), 1, slice(3, 8), 2)),  
+                np_int((slice(1, 8, 2), 1, slice(3, 8), 2), np.int64),  
+                # Test Ellipsis ('...')
+                (1, Ellipsis, -1),  
+                (slice(2), Ellipsis, None, 0),  
+                # Test newaxis
+                None,  
+                (1, None, -2, 3, -4),  
+                (1, slice(2, 5), None),   
+                (slice(None), slice(1, 4), None, slice(2, 3)),   
+                (slice(1, 3), slice(1, 3), slice(1, 3), slice(1, 3), None),   
+                (slice(1, 3), slice(1, 3), None, slice(1, 3), slice(1, 3)),   
+                (None, slice(1, 2), 3, None),  
+                (1, None, 2, 3, None, None, 4),  
+                # Advanced indexing
+                ([1, 2], slice(3, 5), None, None, [3, 4]),  
+                (slice(None), slice(3, 5), None, None, [2, 3], [3, 4]),  
+                (slice(None), slice(3, 5), None, [2, 3], None, [3, 4]),  
+                (None, slice(None), slice(3, 5), [2, 3], None, [3, 4]),  
+                [1],   
+                [1, 2],   
+                [2, 1, 3],  
+                [7, 5, 0, 3, 6, 2, 1],  
+                np.array([6, 3], dtype=np.int32),  
+                np.array([[3, 4], [0, 6]], dtype=np.int32),  
+                np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=np.int32),  
+                np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=np.int64),  
+                np.array([[2], [0], [1]], dtype=np.int32),  
+                np.array([[2], [0], [1]], dtype=np.int64),  
+                np.array([4, 7], dtype=np.int32),  
+                np.array([4, 7], dtype=np.int64),  
+                np.array([[3, 6], [2, 1]], dtype=np.int32),  
+                np.array([[3, 6], [2, 1]], dtype=np.int64),  
+                np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=np.int32),  
+                np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=np.int64),  
+                (1, [2, 3]),   
+                (1, [2, 3], np.array([[3], [0]], dtype=np.int32)),  
+                (1, [2, 3]),   
+                (1, [2, 3], np.array([[3], [0]], dtype=np.int64)),  
+                (1, [2], np.array([[5], [3]], dtype=np.int32), slice(None)),  
+                (1, [2], np.array([[5], [3]], dtype=np.int64), slice(None)),  
+                (1, [2, 3], np.array([[6], [0]], dtype=np.int32), slice(2, 5)),  
+                (1, [2, 3], np.array([[6], [0]], dtype=np.int64), slice(2, 5)),  
+                (1, [2, 3], np.array([[4], [7]], dtype=np.int32), slice(2, 5, 2)),  
+                (1, [2, 3], np.array([[4], [7]], dtype=np.int64), slice(2, 5, 2)),  
+                (1, [2], np.array([[3]], dtype=np.int32), slice(None, None, -1)),  
+                (1, [2], np.array([[3]], dtype=np.int64), slice(None, None, -1)),  
+                (1, [2], np.array([[3]], dtype=np.int32), np.array([[5, 7], [2, 4]], dtype=np.int64)),  
+                (1, [2], np.array([[4]], dtype=np.int32), np.array([[1, 3], [5, 7]], dtype='int64')),  
+                [0],   
+                [0, 1],   
+                [1, 2, 3],   
+                [2, 0, 5, 6],
+                ([1, 1], [2, 3]),
+                ([1], [4], [5]),
+                ([1], [4], [5], [6]),  
+                ([[1]], [[2]]),   
+                ([[1]], [[2]], [[3]], [[4]]),  
+                (slice(0, 2), [[1], [6]], slice(0, 2), slice(0, 5, 2)),  
+                ([[[[1]]]], [[1]], slice(0, 3), [1, 5]),  
+                ([[[[1]]]], 3, slice(0, 3), [1, 3]),  
+                ([[[[1]]]], 3, slice(0, 3), 0),  
+                ([[[[1]]]], [[2], [12]], slice(0, 3), slice(None)),  
+                ([1, 2], slice(3, 5), [2, 3], [3, 4]),  
+                ([1, 2], slice(3, 5), (2, 3), [3, 4]),   
     ]
     for index in index_list:
         test_getitem(np_array, index)
         test_setitem(np_array, index)
         test_getitem_autograd(np_array, index)
-        if not isinstance(index, tuple) or len(index) != 0:
-            # When index = (), this is same a[()] = b is equivalent to b.copyto(a)
-            # which should have no problem to do autograd
+        test_setitem_autograd(np_array, index)
+    
+    # Test indexing to zero-size tensors
+    index_list = [
+        (slice(0, 0), slice(0, 0), 1, 2),  
+        (slice(0, 0), slice(0, 0), slice(0, 0), slice(0, 0)),  
+    ]
+    for index in index_list:
+        test_getitem(np_array, index)
+        test_setitem(np_array, index)
+        test_getitem_autograd(np_array, index)
+        test_setitem_autograd(np_array, index)
+    
+    # test zero-size tensors get and setitem
+    shapes_indices = [
+                        ((0), [slice(None, None, None)]),
+                        ((3, 0), [2, (slice(None, None, None)), (slice(None, None, None), None)]),
+    ]
+    for shape, indices in shapes_indices:
+        for index in indices:
+            np_array = np.zeros(shape)
+            test_getitem(np_array, index)
+            test_setitem(np_array, index)
+            test_getitem_autograd(np_array, index)
             test_setitem_autograd(np_array, index)