diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 76c5da0bb855b..2aa0db7aa2f4e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -54,6 +54,18 @@ Backwards incompatible API changes - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`). - Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`) +.. _whatsnew_0210.dtype_conversions: + +Dtype Conversions +^^^^^^^^^^^^^^^^^ + +Example about setitem / where with bools. + + + +- Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) +- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) .. _whatsnew_0210.api: diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 5e92c506b5d0c..bf9f7b6528776 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -19,6 +19,7 @@ cimport tslib from hashtable cimport * from pandas._libs import tslib, algos, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta +from datetime import datetime, timedelta from datetime cimport (get_datetime64_value, _pydatetime_to_dts, pandas_datetimestruct) @@ -507,24 +508,37 @@ cdef class TimedeltaEngine(DatetimeEngine): return 'm8[ns]' cpdef convert_scalar(ndarray arr, object value): + # we don't turn integers + # into datetimes/timedeltas + + # we don't turn bools into int/float/complex + if arr.descr.type_num == NPY_DATETIME: if isinstance(value, np.ndarray): pass - elif isinstance(value, Timestamp): - return value.value + elif isinstance(value, datetime): + return Timestamp(value).value elif value is None or value != value: return iNaT - else: + elif util.is_string_object(value): return Timestamp(value).value + raise ValueError("cannot set a Timestamp with a non-timestamp") + elif arr.descr.type_num == NPY_TIMEDELTA: if isinstance(value, np.ndarray): pass - elif isinstance(value, Timedelta): - return value.value + elif isinstance(value, timedelta): + return Timedelta(value).value elif value is None or value != value: return iNaT - else: + elif util.is_string_object(value): return Timedelta(value).value + raise ValueError("cannot set a Timedelta with a non-timedelta") + + if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and not + issubclass(arr.dtype.type, np.bool_)): + if util.is_bool_object(value): + raise ValueError('Cannot assign bool to float/integer series') if issubclass(arr.dtype.type, (np.integer, np.bool_)): if util.is_float_object(value) and value != value: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index d74c5e66ea1a9..6f0e7388a42de 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -149,6 +149,12 @@ def _reconstruct_data(values, dtype, original): pass elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype): values = Index(original)._shallow_copy(values, name=None) + elif is_bool_dtype(dtype): + values = values.astype(dtype) + + # we only support object dtypes bool Index + if isinstance(original, Index): + values = values.astype(object) elif dtype is not None: values = values.astype(dtype) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9b5641b72efda..bc1925662b265 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -272,7 +272,7 @@ def maybe_promote(dtype, fill_value=np.nan): else: if issubclass(dtype.type, np.datetime64): try: - fill_value = Timestamp(fill_value).value + fill_value = tslib.Timestamp(fill_value).value except: # the proper thing to do here would probably be to upcast # to object (but numpy 1.6.1 doesn't do this properly) @@ -333,6 +333,23 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value +def infer_dtype_from(val, pandas_dtype=False): + """ + interpret the dtype from a scalar or array. This is a convenience + routines to infer dtype from a scalar or an array + + Parameters + ---------- + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar/array belongs to pandas extension types is inferred as + object + """ + if is_scalar(val): + return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) + return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) + + def infer_dtype_from_scalar(val, pandas_dtype=False): """ interpret the dtype from a scalar @@ -408,23 +425,29 @@ def infer_dtype_from_scalar(val, pandas_dtype=False): return dtype, val -def infer_dtype_from_array(arr): +def infer_dtype_from_array(arr, pandas_dtype=False): """ infer the dtype from a scalar or array Parameters ---------- arr : scalar or array + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, array belongs to pandas extension types + is inferred as object Returns ------- - tuple (numpy-compat dtype, array) + tuple (numpy-compat/pandas-compat dtype, array) Notes ----- - These infer to numpy dtypes exactly - with the exception that mixed / object dtypes - are not coerced by stringifying or conversion + if pandas_dtype=False. these infer to numpy dtypes + exactly with the exception that mixed / object dtypes + + if pandas_dtype=True. datetime64tz-aware/categorical + types will retain there character. Examples -------- @@ -442,6 +465,13 @@ def infer_dtype_from_array(arr): if not is_list_like(arr): arr = [arr] + if pandas_dtype and (is_categorical_dtype(arr) or + is_datetime64tz_dtype(arr)): + return arr.dtype, arr + + elif isinstance(arr, ABCSeries): + return arr.dtype, np.asarray(arr) + # don't force numpy coerce with nan's inferred = lib.infer_dtype(arr) if inferred in ['string', 'bytes', 'unicode', @@ -552,7 +582,7 @@ def conv(r, dtype): if isnull(r): pass elif dtype == _NS_DTYPE: - r = Timestamp(r) + r = tslib.Timestamp(r) elif dtype == _TD_DTYPE: r = _coerce_scalar_to_timedelta_type(r) elif dtype == np.bool_: @@ -1028,13 +1058,25 @@ def find_common_type(types): return np.find_common_type(types, []) -def _cast_scalar_to_array(shape, value, dtype=None): +def cast_scalar_to_array(shape, value, dtype=None): """ create np.ndarray of specified shape and dtype, filled with values + + Parameters + ---------- + shape : tuple + value : scalar value + dtype : np.dtype, optional + dtype to coerce + + Returns + ------- + ndarray of shape, filled with value, of specified / inferred dtype + """ if dtype is None: - dtype, fill_value = _infer_dtype_from_scalar(value) + dtype, fill_value = infer_dtype_from_scalar(value) else: fill_value = value diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index bfec1ec3ebe8c..fbf23867a92c4 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -11,7 +11,8 @@ ExtensionDtype) from .generic import (ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, - ABCSparseArray, ABCSparseSeries) + ABCSparseArray, ABCSparseSeries, + ABCIndexClass) from .inference import is_string_like from .inference import * # noqa @@ -1540,6 +1541,16 @@ def is_bool_dtype(arr_or_dtype): except ValueError: # this isn't even a dtype return False + + if isinstance(arr_or_dtype, ABCIndexClass): + + # TODO(jreback) + # we don't have a boolean Index class + # so its object, we need to infer to + # guess this + return (arr_or_dtype.is_object and + arr_or_dtype.inferred_type == 'boolean') + return issubclass(tipo, np.bool_) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a9c71c613256e..0d5855b21aef2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -25,7 +25,8 @@ import numpy.ma as ma from pandas.core.dtypes.cast import ( - maybe_upcast, infer_dtype_from_scalar, + maybe_upcast, + cast_scalar_to_array, maybe_cast_to_datetime, maybe_infer_to_datetimelike, maybe_convert_platform, @@ -386,8 +387,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, raise_with_traceback(exc) if arr.ndim == 0 and index is not None and columns is not None: - values = _cast_scalar_to_array((len(index), len(columns)), - data, dtype=dtype) + values = cast_scalar_to_array((len(index), len(columns)), + data, dtype=dtype) mgr = self._init_ndarray(values, index, columns, dtype=values.dtype, copy=False) else: @@ -2679,8 +2680,8 @@ def reindexer(value): else: # upcast the scalar - value = _cast_scalar_to_array(len(self.index), value) - value = _possibly_cast_to_datetime(value, value.dtype) + value = cast_scalar_to_array(len(self.index), value) + value = maybe_cast_to_datetime(value, value.dtype) # return internal types directly if is_extension_type(value): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7d1a8adf381fe..f62f2b5489d79 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -13,7 +13,6 @@ from pandas.core.dtypes.common import ( _ensure_int64, _ensure_object, - needs_i8_conversion, is_scalar, is_number, is_integer, is_bool, @@ -26,7 +25,8 @@ is_dict_like, is_re_compilable, pandas_dtype) -from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask +from pandas.core.dtypes.cast import ( + maybe_promote, maybe_upcast_putmask) from pandas.core.dtypes.missing import isnull, notnull from pandas.core.dtypes.generic import ABCSeries, ABCPanel @@ -5336,48 +5336,6 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, raise NotImplementedError("cannot align with a higher " "dimensional NDFrame") - elif is_list_like(other): - - if self.ndim == 1: - - # try to set the same dtype as ourselves - try: - new_other = np.array(other, dtype=self.dtype) - except ValueError: - new_other = np.array(other) - except TypeError: - new_other = other - - # we can end up comparing integers and m8[ns] - # which is a numpy no no - is_i8 = needs_i8_conversion(self.dtype) - if is_i8: - matches = False - else: - matches = (new_other == np.array(other)) - - if matches is False or not matches.all(): - - # coerce other to a common dtype if we can - if needs_i8_conversion(self.dtype): - try: - other = np.array(other, dtype=self.dtype) - except: - other = np.array(other) - else: - other = np.asarray(other) - other = np.asarray(other, - dtype=np.common_type(other, - new_other)) - - # we need to use the new dtype - try_quick = False - else: - other = new_other - else: - - other = np.array(other) - if isinstance(other, np.ndarray): if other.shape != self.shape: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 695f9f119baa2..7292efc5533f6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -26,6 +26,7 @@ is_object_dtype, is_categorical_dtype, is_interval_dtype, + is_bool, is_bool_dtype, is_signed_integer_dtype, is_unsigned_integer_dtype, @@ -610,9 +611,18 @@ def repeat(self, repeats, *args, **kwargs): def where(self, cond, other=None): if other is None: other = self._na_value - values = np.where(cond, self.values, other) dtype = self.dtype + values = self.values + + if is_bool(other) or is_bool_dtype(other): + + # bools force casting + values = values.astype(object) + dtype = None + + values = np.where(cond, values, other) + if self._is_numeric_dtype and np.any(isnull(values)): # We can't coerce to the numeric dtype of "self" (unless # it's float) if there are NaN values in our output. diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 72d521cbe2d60..142e0f36c66ec 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -2,9 +2,14 @@ from pandas._libs import (index as libindex, algos as libalgos, join as libjoin) from pandas.core.dtypes.common import ( - is_dtype_equal, pandas_dtype, - is_float_dtype, is_object_dtype, - is_integer_dtype, is_scalar) + is_dtype_equal, + pandas_dtype, + is_float_dtype, + is_object_dtype, + is_integer_dtype, + is_bool, + is_bool_dtype, + is_scalar) from pandas.core.common import _asarray_tuplesafe, _values_from_object from pandas import compat @@ -56,6 +61,16 @@ def _maybe_cast_slice_bound(self, label, side, kind): # we will try to coerce to integers return self._maybe_cast_indexer(label) + def _convert_for_op(self, value): + """ Convert value to be insertable to ndarray """ + + if is_bool(value) or is_bool_dtype(value): + # force conversion to object + # so we don't lose the bools + raise TypeError + + return value + def _convert_tolerance(self, tolerance): try: return float(tolerance) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 106e00859105c..39b2f58055635 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1,4 +1,5 @@ import copy +from warnings import catch_warnings import itertools import re import operator @@ -22,6 +23,7 @@ is_categorical, is_categorical_dtype, is_integer_dtype, is_datetime64tz_dtype, + is_bool_dtype, is_object_dtype, is_datetimelike_v_numeric, is_float_dtype, is_numeric_dtype, @@ -33,21 +35,21 @@ _get_dtype) from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, - maybe_convert_string_to_object, maybe_upcast, - maybe_convert_scalar, maybe_promote, + maybe_promote, + infer_dtype_from, infer_dtype_from_scalar, soft_convert_objects, maybe_convert_objects, astype_nansafe, find_common_type) from pandas.core.dtypes.missing import ( - isnull, array_equivalent, + isnull, notnull, array_equivalent, _is_na_compat, is_null_datelike_scalar) import pandas.core.dtypes.concat as _concat -from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.generic import ABCSeries, ABCDatetimeIndex from pandas.core.common import is_null_slice import pandas.core.algorithms as algos @@ -169,11 +171,6 @@ def get_values(self, dtype=None): def to_dense(self): return self.values.view() - def to_object_block(self, mgr): - """ return myself as an object block """ - values = self.get_values(dtype=object) - return self.make_block(values, klass=ObjectBlock) - @property def _na_value(self): return np.nan @@ -400,12 +397,12 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, if not mask.any(): return self if inplace else self.copy() - # we cannot coerce the underlying object, so - # make an ObjectBlock - return self.to_object_block(mgr=mgr).fillna(original_value, - limit=limit, - inplace=inplace, - downcast=False) + # try again with a compatible block + block = self.coerce_to_target_dtype(original_value) + return block.fillna(original_value, + limit=limit, + inplace=inplace, + downcast=False) def _maybe_downcast(self, blocks, downcast=None): @@ -591,6 +588,14 @@ def _try_operate(self, values): def _try_coerce_args(self, values, other): """ provide coercion to our input arguments """ + + if np.any(notnull(other)) and not self._can_hold_element(other): + # coercion issues + # let higher levels handle + raise TypeError("cannot convert {} to an {}".format( + type(other).__name__, + type(self).__name__.lower().replace('Block', ''))) + return values, False, other, False def _try_coerce_result(self, result): @@ -662,7 +667,9 @@ def replace(self, to_replace, value, inplace=False, filter=None, if not mask.any(): return self if inplace else self.copy() - return self.to_object_block(mgr=mgr).replace( + # try again with a compatible block + block = self.coerce_to_target_dtype(value) + return block.replace( to_replace=original_to_replace, value=value, inplace=inplace, filter=filter, regex=regex, convert=convert) @@ -707,14 +714,14 @@ def setitem(self, indexer, value, mgr=None): # NaN promotion is handled in latter path dtype = False else: - dtype, _ = _infer_dtype_from_scalar(value, - pandas_dtype=True) + dtype, _ = infer_dtype_from_scalar(value, + pandas_dtype=True) find_dtype = True else: dtype = 'infer' if find_dtype: - dtype = _find_common_type([values.dtype, dtype]) + dtype = find_common_type([values.dtype, dtype]) if not is_dtype_equal(self.dtype, dtype): b = self.astype(dtype) return b.setitem(indexer, value, mgr=mgr) @@ -752,55 +759,12 @@ def setitem(self, indexer, value, mgr=None): def _is_scalar_indexer(indexer): # return True if we are all scalar indexers - if arr_value.ndim == 1: - if not isinstance(indexer, tuple): - indexer = tuple([indexer]) + if arr_value.ndim == 1: + if not isinstance(indexer, tuple): + indexer = tuple([indexer]) return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) - return False - - # empty indexers - # 8669 (empty) - if _is_empty_indexer(indexer): - pass - - # setting a single element for each dim and with a rhs that could - # be say a list - # GH 6043 - elif _is_scalar_indexer(indexer): - values[indexer] = value - - # if we are an exact match (ex-broadcasting), - # then use the resultant dtype - elif (len(arr_value.shape) and - arr_value.shape[0] == values.shape[0] and - np.prod(arr_value.shape) == np.prod(values.shape)): - values[indexer] = value - values = values.astype(arr_value.dtype) - - # set - else: - values[indexer] = value - - # coerce and try to infer the dtypes of the result - if hasattr(value, 'dtype') and is_dtype_equal(values.dtype, - value.dtype): - dtype = value.dtype - elif is_scalar(value): - dtype, _ = infer_dtype_from_scalar(value) - else: - dtype = 'infer' - values = self._try_coerce_and_cast_result(values, dtype) - block = self.make_block(transf(values), fastpath=True) - - # may have to soft convert_objects here - if block.is_object and not self.is_object: - block = block.convert(numeric=False) - - return block - except ValueError: - raise - except TypeError: + return False def _is_empty_indexer(indexer): # return a boolean if we have an empty indexer @@ -829,7 +793,10 @@ def _is_empty_indexer(indexer): arr_value.shape[0] == values.shape[0] and np.prod(arr_value.shape) == np.prod(values.shape)): values[indexer] = value - values = values.astype(arr_value.dtype) + try: + values = values.astype(arr_value.dtype) + except ValueError: + pass # set else: @@ -950,6 +917,46 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, return [self.make_block(new_values, fastpath=True)] + def coerce_to_target_dtype(self, other): + """ + coerce the current block to a dtype compat for other + we will return a block, possibly object, and not raise + + we can also safely try to coerce to the same dtype + and will receive the same block + """ + + # if we cannot then coerce to object + dtype, _ = infer_dtype_from(other, pandas_dtype=True) + + if is_dtype_equal(self.dtype, dtype): + return self + + if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype): + # we don't upcast to bool + return self.astype(object) + + elif self.is_datelike: + + # we don't upcast i8 + if is_integer_dtype(dtype): + return self.astype(object) + + # don't upcast timezone with different timezone or no timezone + if self.is_datetime: + mytz = getattr(self.dtype, 'tz', None) + othertz = getattr(dtype, 'tz', None) + + if str(mytz) != str(othertz): + return self.astype(object) + + try: + return self.astype(dtype) + except (ValueError, TypeError): + pass + + return self.astype(object) + def interpolate(self, method='pad', axis=0, index=None, values=None, inplace=False, limit=None, limit_direction='forward', fill_value=None, coerce=False, downcast=None, mgr=None, @@ -1156,6 +1163,7 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): ------- a new block, the result of the func """ + orig_other = other values = self.values if hasattr(other, 'reindex_axis'): @@ -1180,8 +1188,14 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): transf = (lambda x: x.T) if is_transposed else (lambda x: x) # coerce/transpose the args if needed - values, values_mask, other, other_mask = self._try_coerce_args( - transf(values), other) + try: + values, values_mask, other, other_mask = self._try_coerce_args( + transf(values), other) + except TypeError: + block = self.coerce_to_target_dtype(orig_other) + return block.eval(func, orig_other, + raise_on_error=raise_on_error, + try_cast=try_cast, mgr=mgr) # get the result, may need to transpose the other def get_result(other): @@ -1278,8 +1292,8 @@ def where(self, other, cond, align=True, raise_on_error=True, ------- a new block(s), the result of the func """ - values = self.values + orig_other = other if transpose: values = values.T @@ -1299,9 +1313,6 @@ def where(self, other, cond, align=True, raise_on_error=True, raise ValueError("where must have a condition that is ndarray " "like") - other = maybe_convert_string_to_object(other) - other = maybe_convert_scalar(other) - # our where function def func(cond, values, other): if cond.ravel().all(): @@ -1325,7 +1336,17 @@ def func(cond, values, other): # see if we can operate on the entire block, or need item-by-item # or if we are a single block (ndim == 1) - result = func(cond, values, other) + try: + result = func(cond, values, other) + except TypeError: + # we cannot coerce, return a compat dtype + # we are explicity ignoring raise_on_error here + block = self.coerce_to_target_dtype(other) + return block.where(orig_other, cond, align=align, + raise_on_error=raise_on_error, + try_cast=try_cast, axis=axis, + transpose=transpose) + if self._can_hold_na or self.ndim == 1: if transpose: @@ -1625,7 +1646,7 @@ class FloatBlock(FloatOrComplexBlock): def _can_hold_element(self, element): if is_list_like(element): - element = np.array(element) + element = np.asarray(element) tipo = element.dtype.type return (issubclass(tipo, (np.floating, np.integer)) and not issubclass(tipo, (np.datetime64, np.timedelta64))) @@ -1686,7 +1707,7 @@ def _can_hold_element(self, element): (np.floating, np.integer, np.complexfloating)) return (isinstance(element, (float, int, complex, np.float_, np.int_)) and - not isinstance(bool, np.bool_)) + not isinstance(element, (bool, np.bool_))) def _try_cast(self, element): try: @@ -1795,14 +1816,13 @@ def _try_coerce_args(self, values, other): other = Timedelta(other).value elif isinstance(other, timedelta): other = Timedelta(other).value - elif isinstance(other, np.ndarray): + elif hasattr(other, 'dtype') and is_timedelta64_dtype(other): other_mask = isnull(other) other = other.astype('i8', copy=False).view('i8') else: - # scalar - other = Timedelta(other) - other_mask = isnull(other) - other = other.value + # coercion issues + # let higher levels handle + raise TypeError return values, values_mask, other, other_mask @@ -1852,9 +1872,9 @@ class BoolBlock(NumericBlock): def _can_hold_element(self, element): if is_list_like(element): - element = np.array(element) - return issubclass(element.dtype.type, np.integer) - return isinstance(element, (int, bool)) + element = np.asarray(element) + return issubclass(element.dtype.type, np.bool_) + return isinstance(element, (bool, np.bool_)) def _try_cast(self, element): try: @@ -2344,17 +2364,13 @@ def _try_coerce_args(self, values, other): "naive Block") other_mask = isnull(other) other = other.asm8.view('i8') + elif hasattr(other, 'dtype') and is_datetime64_dtype(other): + other_mask = isnull(other) + other = other.astype('i8', copy=False).view('i8') else: - try: - other = np.asarray(other) - other_mask = isnull(other) - - other = other.astype('i8', copy=False).view('i8') - except ValueError: - - # coercion issues - # let higher levels handle - raise TypeError + # coercion issues + # let higher levels handle + raise TypeError return values, values_mask, other, other_mask @@ -2454,21 +2470,6 @@ def get_values(self, dtype=None): self.values.ravel(), f).reshape(self.values.shape) return self.values - def to_object_block(self, mgr): - """ - return myself as an object block - - Since we keep the DTI as a 1-d object, this is different - depends on BlockManager's ndim - """ - values = self.get_values(dtype=object) - kwargs = {} - if mgr.ndim > 1: - values = _block_shape(values, ndim=mgr.ndim) - kwargs['ndim'] = mgr.ndim - kwargs['placement'] = [0] - return self.make_block(values, klass=ObjectBlock, **kwargs) - def _slice(self, slicer): """ return a slice of my values """ if isinstance(slicer, tuple): @@ -4837,7 +4838,9 @@ def _putmask_smart(v, m, n): if not _is_na_compat(v, nn[0]): raise ValueError - nn_at = nn.astype(v.dtype) + # we ignore ComplexWarning here + with catch_warnings(record=True): + nn_at = nn.astype(v.dtype) # avoid invalid dtype comparisons if not is_numeric_v_string_like(nn, nn_at): @@ -4849,7 +4852,8 @@ def _putmask_smart(v, m, n): except (ValueError, IndexError, TypeError): pass - # change the dtype + # change the dtype if needed + n = np.asarray(n) dtype, _ = maybe_promote(n.dtype) if is_extension_type(v.dtype) and is_object_dtype(dtype): @@ -4863,6 +4867,9 @@ def _putmask_smart(v, m, n): idx, = np.where(np.squeeze(m)) for mask_index, new_val in zip(idx, n[m]): nv[mask_index] = new_val + except IndexError: + nv[m] = n + return nv diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 5a47258fa782e..3b8be05bfc4a7 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -8,6 +8,7 @@ import warnings from pandas.core.dtypes.cast import ( infer_dtype_from_scalar, + cast_scalar_to_array, maybe_cast_item) from pandas.core.dtypes.common import ( is_integer, is_list_like, @@ -178,8 +179,8 @@ def _init_data(self, data, copy, dtype, **kwargs): copy = False dtype = None elif is_scalar(data) and all(x is not None for x in passed_axes): - values = _cast_scalar_to_array([len(x) for x in passed_axes], - data, dtype=dtype) + values = cast_scalar_to_array([len(x) for x in passed_axes], + data, dtype=dtype) mgr = self._init_matrix(values, passed_axes, dtype=values.dtype, copy=False) copy = False @@ -580,7 +581,7 @@ def __setitem__(self, key, value): shape[1:], tuple(map(int, value.shape)))) mat = np.asarray(value) elif is_scalar(value): - mat = _cast_scalar_to_array(shape[1:], value) + mat = cast_scalar_to_array(shape[1:], value) else: raise TypeError('Cannot set item of type: %s' % str(type(value))) diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index 33fab26408784..3434357e6f018 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -9,11 +9,14 @@ from datetime import datetime, timedelta, date import numpy as np -from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT +import pandas as pd +from pandas import (Timedelta, Timestamp, DatetimeIndex, + DataFrame, NaT, Period) from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, maybe_convert_objects, + cast_scalar_to_array, infer_dtype_from_scalar, infer_dtype_from_array, maybe_convert_string_to_object, @@ -23,6 +26,8 @@ CategoricalDtype, DatetimeTZDtype, PeriodDtype) +from pandas.core.dtypes.common import ( + is_dtype_equal) from pandas.util import testing as tm @@ -90,8 +95,8 @@ def test_datetime_with_timezone(self): class TestInferDtype(object): - def test_infer_dtype_from_scalar(self): - # Test that _infer_dtype_from_scalar is returning correct dtype for int + def testinfer_dtype_from_scalar(self): + # Test that infer_dtype_from_scalar is returning correct dtype for int # and float. for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, @@ -131,29 +136,89 @@ def test_infer_dtype_from_scalar(self): dtype, val = infer_dtype_from_scalar(data) assert dtype == 'm8[ns]' + for tz in ['UTC', 'US/Eastern', 'Asia/Tokyo']: + dt = Timestamp(1, tz=tz) + dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=True) + assert dtype == 'datetime64[ns, {0}]'.format(tz) + assert val == dt.value + + dtype, val = infer_dtype_from_scalar(dt) + assert dtype == np.object_ + assert val == dt + + for freq in ['M', 'D']: + p = Period('2011-01-01', freq=freq) + dtype, val = infer_dtype_from_scalar(p, pandas_dtype=True) + assert dtype == 'period[{0}]'.format(freq) + assert val == p.ordinal + + dtype, val = infer_dtype_from_scalar(p) + dtype == np.object_ + assert val == p + + # misc for data in [date(2000, 1, 1), Timestamp(1, tz='US/Eastern'), 'foo']: + dtype, val = infer_dtype_from_scalar(data) assert dtype == np.object_ @pytest.mark.parametrize( - "arr, expected", - [('foo', np.object_), - (b'foo', np.object_), - (1, np.int_), - (1.5, np.float_), - ([1], np.int_), - (np.array([1]), np.int_), - ([np.nan, 1, ''], np.object_), - (np.array([[1.0, 2.0]]), np.float_), - (Timestamp('20160101'), np.object_), - (np.datetime64('2016-01-01'), np.dtype(' int - exp = pd.Series([1, 1, 3, 4]) - self._assert_setitem_series_conversion(obj, True, exp, np.int64) + # int + bool -> object + exp = pd.Series([1, True, 3, 4]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) def test_setitem_series_int8(self): # integer dtype coercion (no change) obj = pd.Series([1, 2, 3, 4], dtype=np.int8) - self.assertEqual(obj.dtype, np.int8) + assert obj.dtype == np.int8 exp = pd.Series([1, 1, 3, 4], dtype=np.int8) self._assert_setitem_series_conversion(obj, np.int32(1), exp, np.int8) @@ -135,9 +135,9 @@ def test_setitem_series_float64(self): self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - # float + bool -> float - exp = pd.Series([1.1, 1.0, 3.3, 4.4]) - self._assert_setitem_series_conversion(obj, True, exp, np.float64) + # float + bool -> object + exp = pd.Series([1.1, True, 3.3, 4.4]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) def test_setitem_series_complex128(self): obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) @@ -145,7 +145,7 @@ def test_setitem_series_complex128(self): # complex + int -> complex exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, True, exp, np.complex128) + self._assert_setitem_series_conversion(obj, 1, exp, np.complex128) # complex + float -> complex exp = pd.Series([1 + 1j, 1.1, 3 + 3j, 4 + 4j]) @@ -155,9 +155,9 @@ def test_setitem_series_complex128(self): exp = pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j]) self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, 4 + 4j]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) def test_setitem_series_bool(self): obj = pd.Series([True, False, True, False]) @@ -211,12 +211,11 @@ def test_setitem_series_datetime64(self): exp, 'datetime64[ns]') # datetime64 + int -> object - # ToDo: The result must be object exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp(1), + 1, pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self._assert_setitem_series_conversion(obj, 1, exp, 'datetime64[ns]') + self._assert_setitem_series_conversion(obj, 1, exp, 'object') # datetime64 + object -> object exp = pd.Series([pd.Timestamp('2011-01-01'), @@ -225,8 +224,6 @@ def test_setitem_series_datetime64(self): pd.Timestamp('2011-01-04')]) self._assert_setitem_series_conversion(obj, 'x', exp, np.object) - # ToDo: add more tests once the above issue has been fixed - def test_setitem_series_datetime64tz(self): tz = 'US/Eastern' obj = pd.Series([pd.Timestamp('2011-01-01', tz=tz), @@ -274,7 +271,7 @@ def test_setitem_series_timedelta64(self): pd.Timedelta('2 day'), pd.Timedelta('3 day'), pd.Timedelta('4 day')]) - self.assertEqual(obj.dtype, 'timedelta64[ns]') + assert obj.dtype == 'timedelta64[ns]' # timedelta64 + timedelta64 -> timedelta64 exp = pd.Series([pd.Timedelta('1 day'), @@ -285,12 +282,11 @@ def test_setitem_series_timedelta64(self): exp, 'timedelta64[ns]') # timedelta64 + int -> object - # ToDo: The result must be object exp = pd.Series([pd.Timedelta('1 day'), - pd.Timedelta(1), + 1, pd.Timedelta('3 day'), pd.Timedelta('4 day')]) - self._assert_setitem_series_conversion(obj, 1, exp, 'timedelta64[ns]') + self._assert_setitem_series_conversion(obj, 1, exp, np.object) # timedelta64 + object -> object exp = pd.Series([pd.Timedelta('1 day'), @@ -299,8 +295,6 @@ def test_setitem_series_timedelta64(self): pd.Timedelta('4 day')]) self._assert_setitem_series_conversion(obj, 'x', exp, np.object) - # ToDo: add more tests once the above issue has been fixed - def test_setitem_series_period(self): pass @@ -673,13 +667,13 @@ def _where_int64_common(self, klass): self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # int + bool -> int - exp = klass([1, 1, 3, 1]) - self._assert_where_conversion(obj, cond, True, exp, np.int64) + # int + bool -> object + exp = klass([1, True, 3, True]) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = klass([True, False, True, True]) - exp = klass([1, 0, 3, 1]) - self._assert_where_conversion(obj, cond, values, exp, np.int64) + exp = klass([1, False, 3, True]) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_series_int64(self): self._where_int64_common(pd.Series) @@ -719,13 +713,13 @@ def _where_float64_common(self, klass): self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # float + bool -> float - exp = klass([1.1, 1.0, 3.3, 1.0]) - self._assert_where_conversion(obj, cond, True, exp, np.float64) + # float + bool -> object + exp = klass([1.1, True, 3.3, True]) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = klass([True, False, True, True]) - exp = klass([1.1, 0.0, 3.3, 1.0]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) + exp = klass([1.1, False, 3.3, True]) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_series_float64(self): self._where_float64_common(pd.Series) @@ -762,45 +756,46 @@ def test_where_series_complex128(self): exp = pd.Series([1 + 1j, 6 + 6j, 3 + 3j, 8 + 8j]) self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 1]) - self._assert_where_conversion(obj, cond, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, True]) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = pd.Series([True, False, True, True]) - exp = pd.Series([1 + 1j, 0, 3 + 3j, 1]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) + exp = pd.Series([1 + 1j, False, 3 + 3j, True]) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_index_complex128(self): pass def test_where_series_bool(self): + obj = pd.Series([True, False, True, False]) assert obj.dtype == np.bool cond = pd.Series([True, False, True, False]) - # bool + int -> int - exp = pd.Series([1, 1, 1, 1]) - self._assert_where_conversion(obj, cond, 1, exp, np.int64) + # bool + int -> object + exp = pd.Series([True, 1, True, 1]) + self._assert_where_conversion(obj, cond, 1, exp, np.object) values = pd.Series([5, 6, 7, 8]) - exp = pd.Series([1, 6, 1, 8]) - self._assert_where_conversion(obj, cond, values, exp, np.int64) + exp = pd.Series([True, 6, True, 8]) + self._assert_where_conversion(obj, cond, values, exp, np.object) - # bool + float -> float - exp = pd.Series([1.0, 1.1, 1.0, 1.1]) - self._assert_where_conversion(obj, cond, 1.1, exp, np.float64) + # bool + float -> object + exp = pd.Series([True, 1.1, True, 1.1]) + self._assert_where_conversion(obj, cond, 1.1, exp, np.object) values = pd.Series([5.5, 6.6, 7.7, 8.8]) - exp = pd.Series([1.0, 6.6, 1.0, 8.8]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) + exp = pd.Series([True, 6.6, True, 8.8]) + self._assert_where_conversion(obj, cond, values, exp, np.object) - # bool + complex -> complex - exp = pd.Series([1, 1 + 1j, 1, 1 + 1j]) - self._assert_where_conversion(obj, cond, 1 + 1j, exp, np.complex128) + # bool + complex -> object + exp = pd.Series([True, 1 + 1j, True, 1 + 1j]) + self._assert_where_conversion(obj, cond, 1 + 1j, exp, np.object) values = pd.Series([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j]) - exp = pd.Series([1, 6 + 6j, 1, 8 + 8j]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) + exp = pd.Series([True, 6 + 6j, True, 8 + 8j]) + self._assert_where_conversion(obj, cond, values, exp, np.object) # bool + bool -> bool exp = pd.Series([True, True, True, True]) @@ -839,10 +834,15 @@ def test_where_series_datetime64(self): pd.Timestamp('2012-01-04')]) self._assert_where_conversion(obj, cond, values, exp, 'datetime64[ns]') - # ToDo: coerce to object - msg = "cannot coerce a Timestamp with a tz on a naive Block" - with tm.assert_raises_regex(TypeError, msg): - obj.where(cond, pd.Timestamp('2012-01-01', tz='US/Eastern')) + # datetime64 + datetime64tz -> object + exp = pd.Series([pd.Timestamp('2011-01-01'), + pd.Timestamp('2012-01-01', tz='US/Eastern'), + pd.Timestamp('2011-01-03'), + pd.Timestamp('2012-01-01', tz='US/Eastern')]) + self._assert_where_conversion( + obj, cond, + pd.Timestamp('2012-01-01', tz='US/Eastern'), + exp, np.object) # ToDo: do not coerce to UTC, must be object values = pd.Series([pd.Timestamp('2012-01-01', tz='US/Eastern'), @@ -961,7 +961,7 @@ def test_fillna_series_int64(self): def test_fillna_index_int64(self): pass - def _fillna_float64_common(self, klass): + def _fillna_float64_common(self, klass, complex): obj = klass([1.1, np.nan, 3.3, 4.4]) assert obj.dtype == np.float64 @@ -973,26 +973,21 @@ def _fillna_float64_common(self, klass): exp = klass([1.1, 1.1, 3.3, 4.4]) self._assert_fillna_conversion(obj, 1.1, exp, np.float64) - if klass is pd.Series: - # float + complex -> complex - exp = klass([1.1, 1 + 1j, 3.3, 4.4]) - self._assert_fillna_conversion(obj, 1 + 1j, exp, np.complex128) - elif klass is pd.Index: - # float + complex -> object - exp = klass([1.1, 1 + 1j, 3.3, 4.4]) - self._assert_fillna_conversion(obj, 1 + 1j, exp, np.object) - else: - NotImplementedError + # float + complex -> we don't support a complex Index + # complex for Series, + # object for Index + exp = klass([1.1, 1 + 1j, 3.3, 4.4]) + self._assert_fillna_conversion(obj, 1 + 1j, exp, complex) - # float + bool -> float - exp = klass([1.1, 1.0, 3.3, 4.4]) - self._assert_fillna_conversion(obj, True, exp, np.float64) + # float + bool -> object + exp = klass([1.1, True, 3.3, 4.4]) + self._assert_fillna_conversion(obj, True, exp, np.object) def test_fillna_series_float64(self): - self._fillna_float64_common(pd.Series) + self._fillna_float64_common(pd.Series, complex=np.complex128) def test_fillna_index_float64(self): - self._fillna_float64_common(pd.Index) + self._fillna_float64_common(pd.Index, complex=np.object) def test_fillna_series_complex128(self): obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j]) @@ -1010,9 +1005,10 @@ def test_fillna_series_complex128(self): exp = pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j]) self._assert_fillna_conversion(obj, 1 + 1j, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_fillna_conversion(obj, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, 4 + 4j]) + import pdb; pdb.set_trace() + self._assert_fillna_conversion(obj, True, exp, np.object) def test_fillna_index_complex128(self): self._fillna_float64_common(pd.Index) @@ -1048,12 +1044,11 @@ def test_fillna_series_datetime64(self): self._assert_fillna_conversion(obj, value, exp, np.object) # datetime64 + int => object - # ToDo: must be coerced to object exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp(1), + 1, pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self._assert_fillna_conversion(obj, 1, exp, 'datetime64[ns]') + self._assert_fillna_conversion(obj, 1, exp, 'object') # datetime64 + object => object exp = pd.Series([pd.Timestamp('2011-01-01'), diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index da8a896cb6f4a..7a2f25c72f103 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -1,5 +1,3 @@ -import pytest - import numpy as np import pandas as pd from pandas import date_range, Index, DataFrame, Series, Timestamp @@ -56,10 +54,12 @@ def test_indexing_with_datetime_tz(self): 'US/Pacific') # trying to set a single element on a part of a different timezone - def f(): - df.loc[df.new_col == 'new', 'time'] = v + # this converts to object + df2 = df.copy() + df2.loc[df2.new_col == 'new', 'time'] = v - pytest.raises(ValueError, f) + expected = Series([v[0], df.loc[1, 'time']], name='time') + tm.assert_series_equal(df2.time, expected) v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s') df.loc[df.new_col == 'new', 'time'] = v diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 7774d10c5eaf8..3b39de2682e26 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1404,9 +1404,9 @@ def test_where_datetime(self): expected = Series([pd.NaT, s[1]]) assert_series_equal(rs, expected) - def test_where_timedelta(self): + def test_where_timedelta_coerce(self): s = Series([1, 2], dtype='timedelta64[ns]') - expected = Series([10, 10], dtype='timedelta64[ns]') + expected = Series([10, 10], dtype='object') mask = np.array([False, False]) rs = s.where(mask, [10, 10]) @@ -1422,7 +1422,7 @@ def test_where_timedelta(self): assert_series_equal(rs, expected) rs = s.where(mask, [10.0, np.nan]) - expected = Series([10, None], dtype='timedelta64[ns]') + expected = Series([10, None], dtype='object') assert_series_equal(rs, expected) def test_mask(self):