CLN/BUG: fix ndarray assignment may cause unexpected cast

pandas-dev · Oct 23, 2016 · 046dc29 · 046dc29
1 parent 233d51d
commit 046dc29
Show file tree

Hide file tree

Showing 10 changed files with 311 additions and 144 deletions.
diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt
@@ -57,5 +57,8 @@ Bug Fixes
 - Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
 - Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)
 
+- Bug in assignment against datetime-like data with ``int`` may incorrectly converted to datetime-like (:issue:`14145`)
+- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
+
 - Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
   is not scalar and ``values`` is not specified (:issue:`14380`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -24,7 +24,7 @@
 import numpy.ma as ma
 
 from pandas.types.cast import (_maybe_upcast,
-                               _infer_dtype_from_scalar,
+                               _cast_scalar_to_array,
                                _possibly_cast_to_datetime,
                                _possibly_infer_to_datetimelike,
                                _possibly_convert_platform,
@@ -332,15 +332,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
                 raise_with_traceback(exc)
 
             if arr.ndim == 0 and index is not None and columns is not None:
-                if isinstance(data, compat.string_types) and dtype is None:
-                    dtype = np.object_
-                if dtype is None:
-                    dtype, data = _infer_dtype_from_scalar(data)
-
-                values = np.empty((len(index), len(columns)), dtype=dtype)
-                values.fill(data)
-                mgr = self._init_ndarray(values, index, columns, dtype=dtype,
-                                         copy=False)
+                values = _cast_scalar_to_array((len(index), len(columns)),
+                                               data, dtype=dtype)
+                mgr = self._init_ndarray(values, index, columns,
+                                         dtype=values.dtype, copy=False)
             else:
                 raise PandasError('DataFrame constructor not properly called!')
 
@@ -454,7 +449,7 @@ def _get_axes(N, K, index=index, columns=columns):
         values = _prep_ndarray(values, copy=copy)
 
         if dtype is not None:
-            if values.dtype != dtype:
+            if not is_dtype_equal(values.dtype, dtype):
                 try:
                     values = values.astype(dtype)
                 except Exception as orig:
@@ -2656,9 +2651,8 @@ def reindexer(value):
 
         else:
             # upcast the scalar
-            dtype, value = _infer_dtype_from_scalar(value)
-            value = np.repeat(value, len(self.index)).astype(dtype)
-            value = _possibly_cast_to_datetime(value, dtype)
+            value = _cast_scalar_to_array(len(self.index), value)
+            value = _possibly_cast_to_datetime(value, value.dtype)
 
         # return internal types directly
         if is_extension_type(value):

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -42,7 +42,7 @@
                                   is_null_datelike_scalar)
 import pandas.types.concat as _concat
 
-from pandas.types.generic import ABCSeries
+from pandas.types.generic import ABCSeries, ABCDatetimeIndex
 from pandas.core.common import is_null_slice
 import pandas.core.algorithms as algos
 
@@ -378,7 +378,8 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,
 
         # fillna, but if we cannot coerce, then try again as an ObjectBlock
         try:
-            values, _, value, _ = self._try_coerce_args(self.values, value)
+            values, _, _, _ = self._try_coerce_args(self.values, value)
+            # value may be converted to internal, thus drop
             blocks = self.putmask(mask, value, inplace=inplace)
             blocks = [b.make_block(values=self._try_coerce_result(b.values))
                       for b in blocks]
@@ -666,8 +667,43 @@ def setitem(self, indexer, value, mgr=None):
             if self.is_numeric:
                 value = np.nan
 
-        # coerce args
-        values, _, value, _ = self._try_coerce_args(self.values, value)
+        # coerce if block dtype can store value
+        values = self.values
+        try:
+            values, _, value, _ = self._try_coerce_args(values, value)
+            # can keep its own dtype
+            if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
+                                                          value.dtype):
+                dtype = self.dtype
+            else:
+                dtype = 'infer'
+
+        except (TypeError, ValueError):
+            # current dtype cannot store value, coerce to common dtype
+            find_dtype = False
+
+            if hasattr(value, 'dtype'):
+                dtype = value.dtype
+                find_dtype = True
+
+            elif is_scalar(value):
+                if isnull(value):
+                    # NaN promotion is handled in latter path
+                    dtype = False
+                else:
+                    dtype, _ = _infer_dtype_from_scalar(value,
+                                                        pandas_dtype=True)
+                    find_dtype = True
+            else:
+                dtype = 'infer'
+
+            if find_dtype:
+                dtype = _find_common_type([values.dtype, dtype])
+                if not is_dtype_equal(self.dtype, dtype):
+                    b = self.astype(dtype)
+                    return b.setitem(indexer, value, mgr=mgr)
+
+        # value must be storeable at this moment
         arr_value = np.array(value)
 
         # cast the values to a type that can hold nan (if necessary)
@@ -697,87 +733,52 @@ def setitem(self, indexer, value, mgr=None):
                     raise ValueError("cannot set using a slice indexer with a "
                                      "different length than the value")
 
-        try:
-
-            def _is_scalar_indexer(indexer):
-                # return True if we are all scalar indexers
-
-                if arr_value.ndim == 1:
-                    if not isinstance(indexer, tuple):
-                        indexer = tuple([indexer])
-                    return all([is_scalar(idx) for idx in indexer])
-                return False
-
-            def _is_empty_indexer(indexer):
-                # return a boolean if we have an empty indexer
+        def _is_scalar_indexer(indexer):
+            # return True if we are all scalar indexers
 
-                if arr_value.ndim == 1:
-                    if not isinstance(indexer, tuple):
-                        indexer = tuple([indexer])
-                    return any(isinstance(idx, np.ndarray) and len(idx) == 0
-                               for idx in indexer)
-                return False
-
-            # empty indexers
-            # 8669 (empty)
-            if _is_empty_indexer(indexer):
-                pass
-
-            # setting a single element for each dim and with a rhs that could
-            # be say a list
-            # GH 6043
-            elif _is_scalar_indexer(indexer):
-                values[indexer] = value
-
-            # if we are an exact match (ex-broadcasting),
-            # then use the resultant dtype
-            elif (len(arr_value.shape) and
-                  arr_value.shape[0] == values.shape[0] and
-                  np.prod(arr_value.shape) == np.prod(values.shape)):
-                values[indexer] = value
-                values = values.astype(arr_value.dtype)
-
-            # set
-            else:
-                values[indexer] = value
-
-            # coerce and try to infer the dtypes of the result
-            if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
-                                                          value.dtype):
-                dtype = value.dtype
-            elif is_scalar(value):
-                dtype, _ = _infer_dtype_from_scalar(value)
-            else:
-                dtype = 'infer'
-            values = self._try_coerce_and_cast_result(values, dtype)
-            block = self.make_block(transf(values), fastpath=True)
-
-            # may have to soft convert_objects here
-            if block.is_object and not self.is_object:
-                block = block.convert(numeric=False)
-
-            return block
-        except ValueError:
-            raise
-        except TypeError:
+            if arr_value.ndim == 1:
+                if not isinstance(indexer, tuple):
+                    indexer = tuple([indexer])
+                return all([is_scalar(idx) for idx in indexer])
+            return False
 
-            # cast to the passed dtype if possible
-            # otherwise raise the original error
-            try:
-                # e.g. we are uint32 and our value is uint64
-                # this is for compat with older numpies
-                block = self.make_block(transf(values.astype(value.dtype)))
-                return block.setitem(indexer=indexer, value=value, mgr=mgr)
+        def _is_empty_indexer(indexer):
+            # return a boolean if we have an empty indexer
 
-            except:
-                pass
-
-            raise
+            if arr_value.ndim == 1:
+                if not isinstance(indexer, tuple):
+                    indexer = tuple([indexer])
+                return any(isinstance(idx, np.ndarray) and len(idx) == 0
+                           for idx in indexer)
+            return False
 
-        except Exception:
+        # empty indexers
+        # 8669 (empty)
+        if _is_empty_indexer(indexer):
             pass
 
-        return [self]
+        # setting a single element for each dim and with a rhs that could
+        # be say a list
+        # GH 6043
+        elif _is_scalar_indexer(indexer):
+            values[indexer] = value
+
+        # if we are an exact match (ex-broadcasting),
+        # then use the resultant dtype
+        elif (len(arr_value.shape) and
+              arr_value.shape[0] == values.shape[0] and
+              np.prod(arr_value.shape) == np.prod(values.shape)):
+            values[indexer] = value
+            values = values.astype(arr_value.dtype)
+
+        # set
+        else:
+            values[indexer] = value
+
+        # coerce and try to infer the dtypes of the result
+        values = self._try_coerce_and_cast_result(values, dtype)
+        block = self.make_block(transf(values), fastpath=True)
+        return block
 
     def putmask(self, mask, new, align=True, inplace=False, axis=0,
                 transpose=False, mgr=None):
@@ -1241,6 +1242,7 @@ def func(cond, values, other):
 
             values, values_mask, other, other_mask = self._try_coerce_args(
                 values, other)
+
             try:
                 return self._try_coerce_result(expressions.where(
                     cond, values, other, raise_on_error=True))
@@ -1497,6 +1499,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
             new = new[mask]
 
         mask = _safe_reshape(mask, new_values.shape)
+
         new_values[mask] = new
         new_values = self._try_coerce_result(new_values)
         return [self.make_block(values=new_values)]
@@ -1666,7 +1669,7 @@ def fillna(self, value, **kwargs):
 
         # allow filling with integers to be
         # interpreted as seconds
-        if not isinstance(value, np.timedelta64) and is_integer(value):
+        if not isinstance(value, np.timedelta64):
             value = Timedelta(value, unit='s')
         return super(TimeDeltaBlock, self).fillna(value, **kwargs)
 
@@ -1898,6 +1901,15 @@ def _maybe_downcast(self, blocks, downcast=None):
     def _can_hold_element(self, element):
         return True
 
+    def _try_coerce_args(self, values, other):
+        """ provide coercion to our input arguments """
+
+        if isinstance(other, ABCDatetimeIndex):
+            # to store DatetimeTZBlock as object
+            other = other.asobject.values
+
+        return values, False, other, False
+
     def _try_cast(self, element):
         return element
 
@@ -2234,8 +2246,6 @@ def _try_coerce_args(self, values, other):
                                 "naive Block")
             other_mask = isnull(other)
             other = other.asm8.view('i8')
-        elif hasattr(other, 'dtype') and is_integer_dtype(other):
-            other = other.view('i8')
         else:
             try:
                 other = np.asarray(other)
@@ -2411,6 +2421,8 @@ def _try_coerce_args(self, values, other):
                 raise ValueError("incompatible or non tz-aware value")
             other_mask = isnull(other)
             other = other.value
+        else:
+            raise TypeError
 
         return values, values_mask, other, other_mask
 

diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -9,6 +9,7 @@
 import numpy as np
 
 from pandas.types.cast import (_infer_dtype_from_scalar,
+                               _cast_scalar_to_array,
                                _possibly_cast_item)
 from pandas.types.common import (is_integer, is_list_like,
                                  is_string_like, is_scalar)
@@ -166,11 +167,9 @@ def _init_data(self, data, copy, dtype, **kwargs):
             copy = False
             dtype = None
         elif is_scalar(data) and all(x is not None for x in passed_axes):
-            if dtype is None:
-                dtype, data = _infer_dtype_from_scalar(data)
-            values = np.empty([len(x) for x in passed_axes], dtype=dtype)
-            values.fill(data)
-            mgr = self._init_matrix(values, passed_axes, dtype=dtype,
+            values = _cast_scalar_to_array([len(x) for x in passed_axes],
+                                           data, dtype=dtype)
+            mgr = self._init_matrix(values, passed_axes, dtype=values.dtype,
                                     copy=False)
             copy = False
         else:  # pragma: no cover
@@ -570,9 +569,7 @@ def __setitem__(self, key, value):
                                      shape[1:], tuple(map(int, value.shape))))
             mat = np.asarray(value)
         elif is_scalar(value):
-            dtype, value = _infer_dtype_from_scalar(value)
-            mat = np.empty(shape[1:], dtype=dtype)
-            mat.fill(value)
+            mat = _cast_scalar_to_array(shape[1:], value)
         else:
             raise TypeError('Cannot set item of type: %s' % str(type(value)))