Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN/BUG: fix ndarray assignment may cause unexpected cast #14145

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,8 @@ Bug Fixes
- Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`)
- Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`)

- Bug in assignment against datetime-like data with ``int`` may incorrectly converted to datetime-like (:issue:`14145`)
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)


- Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`)
Expand Down
23 changes: 9 additions & 14 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
import numpy as np
import numpy.ma as ma

from pandas.types.cast import (_maybe_upcast, _infer_dtype_from_scalar,
from pandas.types.cast import (_maybe_upcast,
_cast_scalar_to_array,
_possibly_cast_to_datetime,
_possibly_infer_to_datetimelike,
_possibly_convert_platform,
Expand Down Expand Up @@ -333,15 +334,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
raise_with_traceback(exc)

if arr.ndim == 0 and index is not None and columns is not None:
if isinstance(data, compat.string_types) and dtype is None:
dtype = np.object_
if dtype is None:
dtype, data = _infer_dtype_from_scalar(data)

values = np.empty((len(index), len(columns)), dtype=dtype)
values.fill(data)
mgr = self._init_ndarray(values, index, columns, dtype=dtype,
copy=False)
values = _cast_scalar_to_array((len(index), len(columns)),
data, dtype=dtype)
mgr = self._init_ndarray(values, index, columns,
dtype=values.dtype, copy=False)
else:
raise PandasError('DataFrame constructor not properly called!')

Expand Down Expand Up @@ -455,7 +451,7 @@ def _get_axes(N, K, index=index, columns=columns):
values = _prep_ndarray(values, copy=copy)

if dtype is not None:
if values.dtype != dtype:
if not is_dtype_equal(values.dtype, dtype):
try:
values = values.astype(dtype)
except Exception as orig:
Expand Down Expand Up @@ -2641,9 +2637,8 @@ def reindexer(value):

else:
# upcast the scalar
dtype, value = _infer_dtype_from_scalar(value)
value = np.repeat(value, len(self.index)).astype(dtype)
value = _possibly_cast_to_datetime(value, dtype)
value = _cast_scalar_to_array(len(self.index), value)
value = _possibly_cast_to_datetime(value, value.dtype)

# return internal types directly
if is_extension_type(value):
Expand Down
176 changes: 94 additions & 82 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
is_null_datelike_scalar)
import pandas.types.concat as _concat

from pandas.types.generic import ABCSeries
from pandas.types.generic import ABCSeries, ABCDatetimeIndex
from pandas.core.common import is_null_slice
import pandas.core.algorithms as algos

Expand Down Expand Up @@ -379,7 +379,8 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,

# fillna, but if we cannot coerce, then try again as an ObjectBlock
try:
values, _, value, _ = self._try_coerce_args(self.values, value)
values, _, _, _ = self._try_coerce_args(self.values, value)
# value may be converted to internal, thus drop
blocks = self.putmask(mask, value, inplace=inplace)
blocks = [b.make_block(values=self._try_coerce_result(b.values))
for b in blocks]
Expand Down Expand Up @@ -673,8 +674,43 @@ def setitem(self, indexer, value, mgr=None):
if self.is_numeric:
value = np.nan

# coerce args
values, _, value, _ = self._try_coerce_args(self.values, value)
# coerce if block dtype can store value
values = self.values
try:
values, _, value, _ = self._try_coerce_args(values, value)
# can keep its own dtype
if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
value.dtype):
dtype = self.dtype
else:
dtype = 'infer'

except (TypeError, ValueError):
# current dtype cannot store value, coerce to common dtype
find_dtype = False

if hasattr(value, 'dtype'):
dtype = value.dtype
find_dtype = True

elif is_scalar(value):
if isnull(value):
# NaN promotion is handled in latter path
dtype = False
else:
dtype, _ = _infer_dtype_from_scalar(value,
pandas_dtype=True)
find_dtype = True
else:
dtype = 'infer'

if find_dtype:
dtype = _find_common_type([values.dtype, dtype])
if not is_dtype_equal(self.dtype, dtype):
b = self.astype(dtype)
return b.setitem(indexer, value, mgr=mgr)

# value must be storeable at this moment
arr_value = np.array(value)

# cast the values to a type that can hold nan (if necessary)
Expand Down Expand Up @@ -704,87 +740,52 @@ def setitem(self, indexer, value, mgr=None):
raise ValueError("cannot set using a slice indexer with a "
"different length than the value")

try:

def _is_scalar_indexer(indexer):
# return True if we are all scalar indexers

if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return all([is_scalar(idx) for idx in indexer])
return False

def _is_empty_indexer(indexer):
# return a boolean if we have an empty indexer
def _is_scalar_indexer(indexer):
# return True if we are all scalar indexers

if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return any(isinstance(idx, np.ndarray) and len(idx) == 0
for idx in indexer)
return False

# empty indexers
# 8669 (empty)
if _is_empty_indexer(indexer):
pass

# setting a single element for each dim and with a rhs that could
# be say a list
# GH 6043
elif _is_scalar_indexer(indexer):
values[indexer] = value

# if we are an exact match (ex-broadcasting),
# then use the resultant dtype
elif (len(arr_value.shape) and
arr_value.shape[0] == values.shape[0] and
np.prod(arr_value.shape) == np.prod(values.shape)):
values[indexer] = value
values = values.astype(arr_value.dtype)

# set
else:
values[indexer] = value

# coerce and try to infer the dtypes of the result
if hasattr(value, 'dtype') and is_dtype_equal(values.dtype,
value.dtype):
dtype = value.dtype
elif is_scalar(value):
dtype, _ = _infer_dtype_from_scalar(value)
else:
dtype = 'infer'
values = self._try_coerce_and_cast_result(values, dtype)
block = self.make_block(transf(values), fastpath=True)

# may have to soft convert_objects here
if block.is_object and not self.is_object:
block = block.convert(numeric=False)

return block
except ValueError:
raise
except TypeError:
if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return all([is_scalar(idx) for idx in indexer])
return False

# cast to the passed dtype if possible
# otherwise raise the original error
try:
# e.g. we are uint32 and our value is uint64
# this is for compat with older numpies
block = self.make_block(transf(values.astype(value.dtype)))
return block.setitem(indexer=indexer, value=value, mgr=mgr)
def _is_empty_indexer(indexer):
# return a boolean if we have an empty indexer

except:
pass

raise
if arr_value.ndim == 1:
if not isinstance(indexer, tuple):
indexer = tuple([indexer])
return any(isinstance(idx, np.ndarray) and len(idx) == 0
for idx in indexer)
return False

except Exception:
# empty indexers
# 8669 (empty)
if _is_empty_indexer(indexer):
pass

return [self]
# setting a single element for each dim and with a rhs that could
# be say a list
# GH 6043
elif _is_scalar_indexer(indexer):
values[indexer] = value

# if we are an exact match (ex-broadcasting),
# then use the resultant dtype
elif (len(arr_value.shape) and
arr_value.shape[0] == values.shape[0] and
np.prod(arr_value.shape) == np.prod(values.shape)):
values[indexer] = value
values = values.astype(arr_value.dtype)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah this has probably got to the be most convoluted code!


# set
else:
values[indexer] = value

# coerce and try to infer the dtypes of the result
values = self._try_coerce_and_cast_result(values, dtype)
block = self.make_block(transf(values), fastpath=True)
return block

def putmask(self, mask, new, align=True, inplace=False, axis=0,
transpose=False, mgr=None):
Expand Down Expand Up @@ -1255,6 +1256,7 @@ def func(cond, values, other):

values, values_mask, other, other_mask = self._try_coerce_args(
values, other)

try:
return self._try_coerce_result(expressions.where(
cond, values, other, raise_on_error=True))
Expand Down Expand Up @@ -1534,6 +1536,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0,
new = new[mask]

mask = _safe_reshape(mask, new_values.shape)

new_values[mask] = new
new_values = self._try_coerce_result(new_values)
return [self.make_block(values=new_values)]
Expand Down Expand Up @@ -1703,7 +1706,7 @@ def fillna(self, value, **kwargs):

# allow filling with integers to be
# interpreted as seconds
if not isinstance(value, np.timedelta64) and is_integer(value):
if not isinstance(value, np.timedelta64):
value = Timedelta(value, unit='s')
return super(TimeDeltaBlock, self).fillna(value, **kwargs)

Expand Down Expand Up @@ -1937,6 +1940,15 @@ def _maybe_downcast(self, blocks, downcast=None):
def _can_hold_element(self, element):
return True

def _try_coerce_args(self, values, other):
""" provide coercion to our input arguments """

if isinstance(other, ABCDatetimeIndex):
# to store DatetimeTZBlock as object
other = other.asobject.values

return values, False, other, False

def _try_cast(self, element):
return element

Expand Down Expand Up @@ -2276,8 +2288,6 @@ def _try_coerce_args(self, values, other):
"naive Block")
other_mask = isnull(other)
other = other.asm8.view('i8')
elif hasattr(other, 'dtype') and is_integer_dtype(other):
other = other.view('i8')
else:
try:
other = np.asarray(other)
Expand Down Expand Up @@ -2453,6 +2463,8 @@ def _try_coerce_args(self, values, other):
raise ValueError("incompatible or non tz-aware value")
other_mask = isnull(other)
other = other.value
else:
raise TypeError

return values, values_mask, other, other_mask

Expand Down
13 changes: 5 additions & 8 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np

from pandas.types.cast import (_infer_dtype_from_scalar,
_cast_scalar_to_array,
_possibly_cast_item)
from pandas.types.common import (is_integer, is_list_like,
is_string_like, is_scalar)
Expand Down Expand Up @@ -166,11 +167,9 @@ def _init_data(self, data, copy, dtype, **kwargs):
copy = False
dtype = None
elif is_scalar(data) and all(x is not None for x in passed_axes):
if dtype is None:
dtype, data = _infer_dtype_from_scalar(data)
values = np.empty([len(x) for x in passed_axes], dtype=dtype)
values.fill(data)
mgr = self._init_matrix(values, passed_axes, dtype=dtype,
values = _cast_scalar_to_array([len(x) for x in passed_axes],
data, dtype=dtype)
mgr = self._init_matrix(values, passed_axes, dtype=values.dtype,
copy=False)
copy = False
else: # pragma: no cover
Expand Down Expand Up @@ -570,9 +569,7 @@ def __setitem__(self, key, value):
shape[1:], tuple(map(int, value.shape))))
mat = np.asarray(value)
elif is_scalar(value):
dtype, value = _infer_dtype_from_scalar(value)
mat = np.empty(shape[1:], dtype=dtype)
mat.fill(value)
mat = _cast_scalar_to_array(shape[1:], value)
else:
raise TypeError('Cannot set item of type: %s' % str(type(value)))

Expand Down
Loading