diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 98b37021eae4f..3d55647236c3f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -50,6 +50,19 @@ Backwards incompatible API changes - Accessing a non-existent attribute on a closed :class:`HDFStore` will now raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) +.. _whatsnew_0210.dtype_conversions: + +Dtype Conversions +^^^^^^^^^^^^^^^^^ + +Example about setitem / where with bools. + + + +- Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) +- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) + .. _whatsnew_0210.api: @@ -88,9 +101,6 @@ Bug Fixes Conversion ^^^^^^^^^^ -- Bug in assignment against datetime-like data with ``int`` may incorrectly converted to datetime-like (:issue:`14145`) -- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) - Indexing diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 21680fb0b3921..975c84b79ed0b 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -19,6 +19,7 @@ cimport tslib from hashtable cimport * from pandas._libs import tslib, algos, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta +from datetime import datetime, timedelta from datetime cimport (get_datetime64_value, _pydatetime_to_dts, pandas_datetimestruct) @@ -507,24 +508,37 @@ cdef class TimedeltaEngine(DatetimeEngine): return 'm8[ns]' cpdef convert_scalar(ndarray arr, object value): + # we don't turn integers + # into datetimes/timedeltas + + # we don't turn bools into int/float/complex + if arr.descr.type_num == NPY_DATETIME: if isinstance(value, np.ndarray): pass - elif isinstance(value, Timestamp): - return value.value + elif isinstance(value, datetime): + return Timestamp(value).value elif value is None or value != value: return iNaT - else: + elif util.is_string_object(value): return Timestamp(value).value + raise ValueError("cannot set a Timestamp with a non-timestamp") + elif arr.descr.type_num == NPY_TIMEDELTA: if isinstance(value, np.ndarray): pass - elif isinstance(value, Timedelta): - return value.value + elif isinstance(value, timedelta): + return Timedelta(value).value elif value is None or value != value: return iNaT - else: + elif util.is_string_object(value): return Timedelta(value).value + raise ValueError("cannot set a Timedelta with a non-timedelta") + + if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and not + issubclass(arr.dtype.type, np.bool_)): + if util.is_bool_object(value): + raise ValueError('Cannot assign bool to float/integer series') if issubclass(arr.dtype.type, (np.integer, np.bool_)): if util.is_float_object(value) and value != value: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f53a06948e708..11208baa6d1ed 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -6,7 +6,7 @@ import warnings from pandas._libs import tslib, lib -from pandas._libs.tslib import iNaT +from pandas._libs.tslib import iNaT, Timestamp from pandas.compat import string_types, text_type, PY3 from .common import (_ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, @@ -333,6 +333,23 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value +def infer_dtype_from(val, pandas_dtype=False): + """ + interpret the dtype from a scalar or array. This is a convenience + routines to infer dtype from a scalar or an array + + Parameters + ---------- + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar/array belongs to pandas extension types is inferred as + object + """ + if is_scalar(val): + return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) + return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) + + def infer_dtype_from_scalar(val, pandas_dtype=False): """ interpret the dtype from a scalar @@ -408,24 +425,32 @@ def infer_dtype_from_scalar(val, pandas_dtype=False): return dtype, val -def infer_dtype_from_array(arr): +def infer_dtype_from_array(arr, pandas_dtype=False): """ infer the dtype from a scalar or array Parameters ---------- arr : scalar or array + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, array belongs to pandas extension types + is inferred as object Returns ------- - tuple (numpy-compat dtype, array) + tuple (numpy-compat/pandas-compat dtype, array) Notes ----- - These infer to numpy dtypes exactly - with the exception that mixed / object dtypes + + if pandas_dtype=False. these infer to numpy dtypes + exactly with the exception that mixed / object dtypes are not coerced by stringifying or conversion + if pandas_dtype=True. datetime64tz-aware/categorical + types will retain there character. + Examples -------- >>> np.asarray([1, '1']) @@ -442,6 +467,10 @@ def infer_dtype_from_array(arr): if not is_list_like(arr): arr = [arr] + if pandas_dtype and (is_categorical_dtype(arr) or + is_datetime64tz_dtype(arr)): + return arr.dtype, arr + # don't force numpy coerce with nan's inferred = lib.infer_dtype(arr) if inferred in ['string', 'bytes', 'unicode', @@ -1028,13 +1057,25 @@ def find_common_type(types): return np.find_common_type(types, []) -def _cast_scalar_to_array(shape, value, dtype=None): +def cast_scalar_to_array(shape, value, dtype=None): """ create np.ndarray of specified shape and dtype, filled with values + + Parameters + ---------- + shape : tuple + value : scalar value + dtype : np.dtype, optional + dtype to coerce + + Returns + ------- + ndarray of shape, filled with value, of specified / inferred dtype + """ if dtype is None: - dtype, fill_value = _infer_dtype_from_scalar(value) + dtype, fill_value = infer_dtype_from_scalar(value) else: fill_value = value diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index bfec1ec3ebe8c..7af9b504ec130 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -11,7 +11,8 @@ ExtensionDtype) from .generic import (ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, - ABCSparseArray, ABCSparseSeries) + ABCSparseArray, ABCSparseSeries, + ABCIndexClass) from .inference import is_string_like from .inference import * # noqa @@ -1535,11 +1536,22 @@ def is_bool_dtype(arr_or_dtype): if arr_or_dtype is None: return False + try: tipo = _get_dtype_type(arr_or_dtype) except ValueError: # this isn't even a dtype return False + + if isinstance(arr_or_dtype, ABCIndexClass): + + # TODO(jreback) + # we don't have a boolean Index class + # so its object, we need to infer to + # guess this + return (arr_or_dtype.is_object and + arr_or_dtype.inferred_type == 'boolean') + return issubclass(tipo, np.bool_) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 055ba4494c126..ee2db84513f06 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -25,7 +25,7 @@ import numpy.ma as ma from pandas.core.dtypes.cast import ( - maybe_upcast, infer_dtype_from_scalar, + maybe_upcast, maybe_cast_to_datetime, maybe_infer_to_datetimelike, maybe_convert_platform, @@ -33,6 +33,7 @@ invalidate_string_dtypes, coerce_to_dtypes, maybe_upcast_putmask, + cast_scalar_to_array, find_common_type) from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -356,8 +357,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, raise_with_traceback(exc) if arr.ndim == 0 and index is not None and columns is not None: - values = _cast_scalar_to_array((len(index), len(columns)), - data, dtype=dtype) + values = cast_scalar_to_array((len(index), len(columns)), + data, dtype=dtype) mgr = self._init_ndarray(values, index, columns, dtype=values.dtype, copy=False) else: @@ -2649,8 +2650,8 @@ def reindexer(value): else: # upcast the scalar - value = _cast_scalar_to_array(len(self.index), value) - value = _possibly_cast_to_datetime(value, value.dtype) + value = cast_scalar_to_array(len(self.index), value) + value = maybe_cast_to_datetime(value, value.dtype) # return internal types directly if is_extension_type(value): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e541f1532d0a0..cf66487b77020 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -13,7 +13,6 @@ from pandas.core.dtypes.common import ( _ensure_int64, _ensure_object, - needs_i8_conversion, is_scalar, is_number, is_integer, is_bool, @@ -5301,48 +5300,6 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, raise NotImplemented("cannot align with a higher dimensional " "NDFrame") - elif is_list_like(other): - - if self.ndim == 1: - - # try to set the same dtype as ourselves - try: - new_other = np.array(other, dtype=self.dtype) - except ValueError: - new_other = np.array(other) - except TypeError: - new_other = other - - # we can end up comparing integers and m8[ns] - # which is a numpy no no - is_i8 = needs_i8_conversion(self.dtype) - if is_i8: - matches = False - else: - matches = (new_other == np.array(other)) - - if matches is False or not matches.all(): - - # coerce other to a common dtype if we can - if needs_i8_conversion(self.dtype): - try: - other = np.array(other, dtype=self.dtype) - except: - other = np.array(other) - else: - other = np.asarray(other) - other = np.asarray(other, - dtype=np.common_type(other, - new_other)) - - # we need to use the new dtype - try_quick = False - else: - other = new_other - else: - - other = np.array(other) - if isinstance(other, np.ndarray): if other.shape != self.shape: @@ -5407,7 +5364,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, # reconstruct the block manager self._check_inplace_setting(other) - new_data = self._data.putmask(mask=cond, new=other, align=align, + new_data = self._data.putmask(mask=cond, other=other, align=align, inplace=True, axis=block_axis, transpose=self._AXIS_REVERSED) self._update_inplace(new_data) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2af4f112ca941..759501c604cc4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -22,6 +22,7 @@ _ensure_platform_int, is_integer, is_float, + is_bool, is_dtype_equal, is_object_dtype, is_categorical_dtype, @@ -608,11 +609,21 @@ def repeat(self, repeats, *args, **kwargs): @Appender(_index_shared_docs['where']) def where(self, cond, other=None): + if other is None: other = self._na_value - values = np.where(cond, self.values, other) dtype = self.dtype + values = self.values + + if is_bool(other) or is_bool_dtype(other): + + # bools force casting + values = values.astype(object) + dtype = None + + values = np.where(cond, values, other) + if self._is_numeric_dtype and np.any(isnull(values)): # We can't coerce to the numeric dtype of "self" (unless # it's float) if there are NaN values in our output. @@ -1040,6 +1051,7 @@ def _convert_can_do_setop(self, other): def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ + return value def _assert_can_do_op(self, value): @@ -3615,6 +3627,7 @@ def fillna(self, value=None, downcast=None): # no need to care metadata other than name # because it can't have freq if return Index(result, name=self.name) + return self._shallow_copy() _index_shared_docs['dropna'] = """ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index bdae0ac7ac5e9..ca6ff06534824 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -4,7 +4,9 @@ from pandas.core.dtypes.common import ( is_dtype_equal, pandas_dtype, is_float_dtype, is_object_dtype, - is_integer_dtype, is_scalar) + is_bool_dtype, + is_integer_dtype, is_scalar, + is_bool) from pandas.core.common import _asarray_tuplesafe, _values_from_object from pandas import compat @@ -63,6 +65,16 @@ def _convert_tolerance(self, tolerance): raise ValueError('tolerance argument for %s must be numeric: %r' % (type(self).__name__, tolerance)) + def _convert_for_op(self, value): + """ Convert value to be insertable to ndarray """ + + if is_bool(value) or is_bool_dtype(value): + # force conversion to object + # so we don't lose the bools + raise TypeError + + return value + @classmethod def _assert_safe_casting(cls, data, subarr): """ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index b9913722f86e2..6076ca4722b16 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -14,9 +14,11 @@ CategoricalDtype) from pandas.core.dtypes.common import ( _TD_DTYPE, _NS_DTYPE, - _ensure_int64, _ensure_platform_int, + _ensure_int64, + _ensure_platform_int, is_integer, is_dtype_equal, + is_bool_dtype, is_timedelta64_dtype, is_datetime64_dtype, is_datetimetz, is_sparse, is_categorical, is_categorical_dtype, @@ -33,17 +35,16 @@ _get_dtype) from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, - maybe_convert_string_to_object, maybe_upcast, - maybe_convert_scalar, maybe_promote, + maybe_promote, infer_dtype_from_scalar, + infer_dtype_from, soft_convert_objects, maybe_convert_objects, astype_nansafe, find_common_type) from pandas.core.dtypes.missing import ( - isnull, array_equivalent, - _is_na_compat, + isnull, notnull, array_equivalent, is_null_datelike_scalar) import pandas.core.dtypes.concat as _concat @@ -374,7 +375,6 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, else: return self.copy() - original_value = value mask = isnull(self.values) if limit is not None: if not is_integer(limit): @@ -387,25 +387,10 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, mask[mask.cumsum(self.ndim - 1) > limit] = False # fillna, but if we cannot coerce, then try again as an ObjectBlock - try: - values, _, _, _ = self._try_coerce_args(self.values, value) - # value may be converted to internal, thus drop - blocks = self.putmask(mask, value, inplace=inplace) - blocks = [b.make_block(values=self._try_coerce_result(b.values)) - for b in blocks] - return self._maybe_downcast(blocks, downcast) - except (TypeError, ValueError): - - # we can't process the value, but nothing to do - if not mask.any(): - return self if inplace else self.copy() - - # we cannot coerce the underlying object, so - # make an ObjectBlock - return self.to_object_block(mgr=mgr).fillna(original_value, - limit=limit, - inplace=inplace, - downcast=False) + blocks = self.putmask(mask, value, inplace=inplace, mgr=mgr) + blocks = [b.make_block(values=self._try_coerce_result(b.values)) + for b in blocks] + return self._maybe_downcast(blocks, downcast) def _maybe_downcast(self, blocks, downcast=None): @@ -549,9 +534,6 @@ def convert(self, copy=True, **kwargs): def _can_hold_element(self, value): raise NotImplementedError() - def _try_cast(self, value): - raise NotImplementedError() - def _try_cast_result(self, result, dtype=None): """ try to cast the result to our original type, we may have roundtripped thru object in the mean-time @@ -591,6 +573,14 @@ def _try_operate(self, values): def _try_coerce_args(self, values, other): """ provide coercion to our input arguments """ + + if np.any(notnull(other)) and not self._can_hold_element(other): + # coercion issues + # let higher levels handle + raise TypeError("cannot convert {} to an {}".format( + type(other).__name__, + type(self).__name__.lower().replace('Block', ''))) + return values, False, other, False def _try_coerce_result(self, result): @@ -651,18 +641,15 @@ def replace(self, to_replace, value, inplace=False, filter=None, filtered_out = ~self.mgr_locs.isin(filter) mask[filtered_out.nonzero()[0]] = False - blocks = self.putmask(mask, value, inplace=inplace) + blocks = self.putmask(mask, value, inplace=inplace, mgr=mgr) if convert: blocks = [b.convert(by_item=True, numeric=False, copy=not inplace) for b in blocks] return blocks except (TypeError, ValueError): - # we can't process the value, but nothing to do - if not mask.any(): - return self if inplace else self.copy() - - return self.to_object_block(mgr=mgr).replace( + block = self.to_object_block(mgr) + return block.replace( to_replace=original_to_replace, value=value, inplace=inplace, filter=filter, regex=regex, convert=convert) @@ -677,55 +664,28 @@ def setitem(self, indexer, value, mgr=None): indexer is a direct slice/positional indexer; value must be a compatible shape """ + orig_value = value # coerce None values, if appropriate if value is None: if self.is_numeric: value = np.nan - # coerce if block dtype can store value - values = self.values + # coerce args try: - values, _, value, _ = self._try_coerce_args(values, value) - # can keep its own dtype - if hasattr(value, 'dtype') and is_dtype_equal(values.dtype, - value.dtype): - dtype = self.dtype - else: - dtype = 'infer' - - except (TypeError, ValueError): - # current dtype cannot store value, coerce to common dtype - find_dtype = False - - if hasattr(value, 'dtype'): - dtype = value.dtype - find_dtype = True - - elif is_scalar(value): - if isnull(value): - # NaN promotion is handled in latter path - dtype = False - else: - dtype, _ = _infer_dtype_from_scalar(value, - pandas_dtype=True) - find_dtype = True - else: - dtype = 'infer' + values, _, value, _ = self._try_coerce_args(self.values, value) + arr_value = np.array(value) + except (ValueError, TypeError): - if find_dtype: - dtype = _find_common_type([values.dtype, dtype]) - if not is_dtype_equal(self.dtype, dtype): - b = self.astype(dtype) - return b.setitem(indexer, value, mgr=mgr) - - # value must be storeable at this moment - arr_value = np.array(value) + # coercion has failed to the current type + # upcast to something that can hold it + block = self.coerce_to_target_dtype(value) + return block.setitem(indexer, orig_value, mgr=mgr) # cast the values to a type that can hold nan (if necessary) - if not self._can_hold_element(value): - dtype, _ = maybe_promote(arr_value.dtype) - values = values.astype(dtype) + if not self._can_hold_element(orig_value): + block = self.coerce_to_target_dtype(value) + return block.setitem(indexer, orig_value, mgr=mgr) transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x) values = transf(values) @@ -749,8 +709,19 @@ def setitem(self, indexer, value, mgr=None): raise ValueError("cannot set using a slice indexer with a " "different length than the value") - def _is_scalar_indexer(indexer): - # return True if we are all scalar indexers + try: + + def _is_scalar_indexer(indexer): + # return True if we are all scalar indexers + + if arr_value.ndim == 1: + if not isinstance(indexer, tuple): + indexer = tuple([indexer]) + return all([is_scalar(idx) for idx in indexer]) + return False + + def _is_empty_indexer(indexer): + # return a boolean if we have an empty indexer if arr_value.ndim == 1: if not isinstance(indexer, tuple): @@ -782,6 +753,8 @@ def _is_scalar_indexer(indexer): else: values[indexer] = value + # TODO: replace with coerce_to_target_dtype + ##### # coerce and try to infer the dtypes of the result if hasattr(value, 'dtype') and is_dtype_equal(values.dtype, value.dtype): @@ -802,55 +775,37 @@ def _is_scalar_indexer(indexer): raise except TypeError: - def _is_empty_indexer(indexer): - # return a boolean if we have an empty indexer + # cast to the passed dtype if possible + # otherwise raise the original error + try: + # e.g. we are uint32 and our value is uint64 + # this is for compat with older numpies + block = self.make_block(transf(values.astype(value.dtype))) + return block.setitem(indexer=indexer, value=value, mgr=mgr) - if arr_value.ndim == 1: - if not isinstance(indexer, tuple): - indexer = tuple([indexer]) - return any(isinstance(idx, np.ndarray) and len(idx) == 0 - for idx in indexer) - return False + except: + pass - # empty indexers - # 8669 (empty) - if _is_empty_indexer(indexer): - pass + raise - # setting a single element for each dim and with a rhs that could - # be say a list - # GH 6043 - elif _is_scalar_indexer(indexer): - values[indexer] = value - - # if we are an exact match (ex-broadcasting), - # then use the resultant dtype - elif (len(arr_value.shape) and - arr_value.shape[0] == values.shape[0] and - np.prod(arr_value.shape) == np.prod(values.shape)): - values[indexer] = value - values = values.astype(arr_value.dtype) - - # set - else: - values[indexer] = value + except Exception: + pass - # coerce and try to infer the dtypes of the result - values = self._try_coerce_and_cast_result(values, dtype) - block = self.make_block(transf(values), fastpath=True) - return block + return [self] - def putmask(self, mask, new, align=True, inplace=False, axis=0, + def putmask(self, mask, other, align=True, inplace=False, axis=0, transpose=False, mgr=None): - """ putmask the data to the block; it is possible that we may create a - new dtype of block + """ putmask the data to the block; we may create 1 or more + split blocks, with different dtypes return the resulting block(s) + this will NOT raise to the outside context! + Parameters ---------- mask : the condition to respect - new : a ndarray/object + other : a ndarray/object align : boolean, perform alignment on other/cond, default is True inplace : perform inplace modification, default is False axis : int @@ -861,49 +816,78 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, ------- a list of new blocks, the result of the putmask """ + new_values = self.values + orig_other = other - new_values = self.values if inplace else self.values.copy() - - if hasattr(new, 'reindex_axis'): - new = new.values + if hasattr(other, 'reindex_axis'): + other = other.values if hasattr(mask, 'reindex_axis'): mask = mask.values # if we are passed a scalar None, convert it here - if not is_list_like(new) and isnull(new) and not self.is_object: - new = self.fill_value + if is_scalar(other) and isnull(other) and not self.is_object: + other = self.fill_value + + # we will raise with an incompt type here + try: + _, _, other, _ = self._try_coerce_args(new_values, other) + except (ValueError, TypeError): + # coercion has failed to the current type + # upcast to object; if we are a single column + # already, convert to object + pass + + if self._can_hold_element(orig_other): + # we may have converted the other + # at this point + + new_values = self.values if inplace else self.values.copy() - if self._can_hold_element(new): if transpose: new_values = new_values.T - new = self._try_cast(new) - # If the default repeat behavior in np.putmask would go in the # wrong direction, then explictly repeat and reshape new instead - if getattr(new, 'ndim', 0) >= 1: - if self.ndim - 1 == new.ndim and axis == 1: - new = np.repeat( - new, new_values.shape[-1]).reshape(self.shape) - new = new.astype(new_values.dtype) + if getattr(other, 'ndim', 0) >= 1: + if self.ndim - 1 == other.ndim and axis == 1: + other = np.repeat( + other, new_values.shape[-1]).reshape(self.shape) + other = other.astype(new_values.dtype) + + # we require exact matches between the len of the + # values we are setting (or is compat). np.putmask + # doesn't check this and will simply truncate / pad + # the output, but we want sane error messages + # + # TODO: this prob needs some better checking + # for 2D cases + if ((is_list_like(other) and + np.any(mask[mask]) and + getattr(other, 'ndim', 1) == 1)): + + if not (mask.shape[-1] == len(other) or + mask[mask].shape[-1] == len(other) or + len(other) == 1): + raise ValueError("cannot assign mismatch " + "length to masked array") - np.putmask(new_values, mask, new) + np.putmask(new_values, mask, other) # maybe upcast me elif mask.any(): if transpose: mask = mask.T - if isinstance(new, np.ndarray): - new = new.T + if isinstance(other, np.ndarray): + other = other.T axis = new_values.ndim - axis - 1 # Pseudo-broadcast - if getattr(new, 'ndim', 0) >= 1: - if self.ndim - 1 == new.ndim: - new_shape = list(new.shape) + if getattr(other, 'ndim', 0) >= 1: + if self.ndim - 1 == other.ndim: + new_shape = list(other.shape) new_shape.insert(axis, 1) - new = new.reshape(tuple(new_shape)) + other = other.reshape(tuple(new_shape)) # need to go column by column new_blocks = [] @@ -912,33 +896,30 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, m = mask[i] v = new_values[i] - # need a new block if m.any(): - if isinstance(new, np.ndarray): - n = np.squeeze(new[i % new.shape[0]]) - else: - n = np.array(new) - - # type of the new block - dtype, _ = maybe_promote(n.dtype) - - # we need to explicitly astype here to make a copy - n = n.astype(dtype) + # need a new block + block = make_block( + _block_shape(v, ndim=self.ndim), + placement=[ref_loc]) + block = block.putmask_a_column(m, orig_other, + inplace=inplace) - nv = _putmask_smart(v, m, n) else: nv = v if inplace else v.copy() + nv = nv[np.newaxis] - # Put back the dimension that was taken from it and make - # a block out of the result. - block = self.make_block(values=nv[np.newaxis], - placement=[ref_loc], fastpath=True) + # Put back the dimension that was taken + # from it and make + # a block out of the result. + block = self.make_block( + values=nv, placement=[ref_loc], fastpath=True) new_blocks.append(block) else: - nv = _putmask_smart(new_values, mask, new) - new_blocks.append(self.make_block(values=nv, fastpath=True)) + + b = self.putmask_a_column(mask, orig_other, inplace=inplace) + new_blocks.append(b) return new_blocks @@ -950,6 +931,79 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, return [self.make_block(new_values, fastpath=True)] + def putmask_a_column(self, mask, other, inplace=False): + """ + a helper routine that will putmask on a single column + return a block with a potentially new dtype + + Parameters + ---------- + mask : boolean mask, same shape as self + other : scalar or shape compat with self + inplace : boolean, default False + operate in-place + + Returns + ------- + Block + + """ + + try: + _, _, new, _ = self._try_coerce_args(self.values, other) + + if not inplace: + self = self.copy() + np.putmask(self.values, mask, new) + return self + + except (ValueError, TypeError): + pass + + self = self.coerce_to_target_dtype(other) + return self.putmask_a_column(mask=mask, other=other, + inplace=False) + + def coerce_to_target_dtype(self, other): + """ + coerce the current block to a dtype compat for other + we will return a block, possibly object, and not raise + + we can also safely try to coerce to the same dtype + and will receive the same block + """ + + # if we cannot then coerce to object + dtype, _ = infer_dtype_from(other, pandas_dtype=True) + + if is_dtype_equal(self.dtype, dtype): + return self + + if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype): + # we don't upcast to bool + return self.astype(object) + + elif self.is_datelike: + + # we don't upcast i8 + if is_integer_dtype(dtype): + return self.astype(object) + + # don't upcast timezone with different timezone or no timezone + if self.is_datetime: + mytz = getattr(self.dtype, 'tz', None) + othertz = getattr(dtype, 'tz', None) + + if str(mytz) != str(othertz): + return self.astype(object) + + try: + return self.astype(dtype) + except (ValueError, TypeError): + pass + + return self.astype(object) + def interpolate(self, method='pad', axis=0, index=None, values=None, inplace=False, limit=None, limit_direction='forward', fill_value=None, coerce=False, downcast=None, mgr=None, @@ -1180,8 +1234,17 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): transf = (lambda x: x.T) if is_transposed else (lambda x: x) # coerce/transpose the args if needed - values, values_mask, other, other_mask = self._try_coerce_args( - transf(values), other) + try: + values, values_mask, other, other_mask = self._try_coerce_args( + transf(values), other) + except (ValueError, TypeError): + + # coercion has failed to the current type + # upcast to object + block = self.to_object_block(mgr) + return block.eval(func=func, other=other, + raise_on_error=raise_on_error, + try_cast=try_cast, mgr=None) # get the result, may need to transpose the other def get_result(other): @@ -1210,19 +1273,6 @@ def get_result(other): return self._try_coerce_result(result) - # error handler if we have an issue operating with the function - def handle_error(): - - if raise_on_error: - # The 'detail' variable is defined in outer scope. - raise TypeError('Could not operate %s with block values %s' % - (repr(other), str(detail))) # noqa - else: - # return the values - result = np.empty(values.shape, dtype='O') - result.fill(np.nan) - return result - # get the result try: with np.errstate(all='ignore'): @@ -1232,8 +1282,19 @@ def handle_error(): # GH4576, so raise instead of allowing to pass through except ValueError as detail: raise + + # convert these to TypeErrors + except NotImplementedError as detail: + raise TypeError(detail) + except Exception as detail: - result = handle_error() + + if raise_on_error: + raise + + # return the values + result = np.empty(values.shape, dtype='O') + result.fill(np.nan) # technically a broadcast error in numpy can 'work' by returning a # boolean False @@ -1278,7 +1339,6 @@ def where(self, other, cond, align=True, raise_on_error=True, ------- a new block(s), the result of the func """ - values = self.values if transpose: values = values.T @@ -1299,29 +1359,37 @@ def where(self, other, cond, align=True, raise_on_error=True, raise ValueError("where must have a condition that is ndarray " "like") - other = maybe_convert_string_to_object(other) - other = maybe_convert_scalar(other) + # all True + if cond.ravel().all(): + return self.make_block(self.values) + + try: + values, _, other, _ = self._try_coerce_args(values, other) + except (ValueError, TypeError) as detail: + + # try to coerce to the other dtype + block = self.coerce_to_target_dtype(other) + return block.where(other, cond, align=align, + raise_on_error=raise_on_error, + try_cast=try_cast, axis=axis, + transpose=transpose, mgr=mgr) # our where function def func(cond, values, other): - if cond.ravel().all(): - return values - - values, values_mask, other, other_mask = self._try_coerce_args( - values, other) try: - return self._try_coerce_result(expressions.where( - cond, values, other, raise_on_error=True)) + result = expressions.where( + cond, values, other, raise_on_error=True) + return self._try_coerce_result(result) except Exception as detail: + if raise_on_error: - raise TypeError('Could not operate [%s] with block values ' - '[%s]' % (repr(other), str(detail))) - else: - # return the values - result = np.empty(values.shape, dtype='float64') - result.fill(np.nan) - return result + raise + + # return the values + result = np.empty(values.shape, dtype='float64') + result.fill(np.nan) + return result # see if we can operate on the entire block, or need item-by-item # or if we are a single block (ndim == 1) @@ -1583,13 +1651,21 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, # use block's copy logic. # .values may be an Index which does shallow copy by default new_values = self.values if inplace else self.copy().values - new_values, _, new, _ = self._try_coerce_args(new_values, new) + try: + new_values, _, new, _ = self._try_coerce_args(new_values, new) + except: + + # we cannot coerce the underlying object, so + # make an ObjectBlock + block = self.to_object_block(mgr=mgr) + return block.putmask(mask=mask, other=new, align=align, + inplace=inplace, axis=axis, + transpose=transpose, mgr=mgr) if isinstance(new, np.ndarray) and len(new) == len(mask): new = new[mask] mask = _safe_reshape(mask, new_values.shape) - new_values[mask] = new new_values = self._try_coerce_result(new_values) return [self.make_block(values=new_values)] @@ -1629,16 +1705,10 @@ def _can_hold_element(self, element): tipo = element.dtype.type return (issubclass(tipo, (np.floating, np.integer)) and not issubclass(tipo, (np.datetime64, np.timedelta64))) - return (isinstance(element, (float, int, np.float_, np.int_)) and + return (isinstance(element, (float, int, np.floating, np.integer)) and not isinstance(element, (bool, np.bool_, datetime, timedelta, np.datetime64, np.timedelta64))) - def _try_cast(self, element): - try: - return float(element) - except: # pragma: no cover - return element - def to_native_types(self, slicer=None, na_rep='', float_format=None, decimal='.', quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ @@ -1682,17 +1752,14 @@ class ComplexBlock(FloatOrComplexBlock): def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) - return issubclass(element.dtype.type, - (np.floating, np.integer, np.complexfloating)) + return (issubclass( + element.dtype.type, (np.floating, + np.integer, + np.complexfloating)) and not + issubclass(element.dtype.type, np.bool_)) return (isinstance(element, (float, int, complex, np.float_, np.int_)) and - not isinstance(bool, np.bool_)) - - def _try_cast(self, element): - try: - return complex(element) - except: # pragma: no cover - return element + not isinstance(element, (bool, np.bool_))) def should_store(self, value): return issubclass(value.dtype.type, np.complexfloating) @@ -1711,12 +1778,6 @@ def _can_hold_element(self, element): not issubclass(tipo, (np.datetime64, np.timedelta64))) return is_integer(element) - def _try_cast(self, element): - try: - return int(element) - except: # pragma: no cover - return element - def should_store(self, value): return is_integer_dtype(value) and value.dtype == self.dtype @@ -1755,11 +1816,28 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): def _box_func(self): return lambda x: tslib.Timedelta(x, unit='ns') - def fillna(self, value, **kwargs): + def _can_hold_element(self, element): + """ + boolean if we can hold this element + """ + + if is_list_like(element): + + element = np.asarray(element) + return element.dtype == _TD_DTYPE + + elif isnull(element): + return True + + elif isinstance(element, timedelta): + return True + return False + + def fillna(self, value, **kwargs): # allow filling with integers to be # interpreted as seconds - if not isinstance(value, np.timedelta64): + if not isinstance(value, np.timedelta64) and is_integer(value): value = Timedelta(value, unit='s') return super(TimeDeltaBlock, self).fillna(value, **kwargs) @@ -1783,7 +1861,7 @@ def _try_coerce_args(self, values, other): other_mask = False if isinstance(other, bool): - raise TypeError + raise TypeError("cannot convert bool to a Timedelta") elif is_null_datelike_scalar(other): other = tslib.iNaT other_mask = True @@ -1795,14 +1873,15 @@ def _try_coerce_args(self, values, other): other = Timedelta(other).value elif isinstance(other, timedelta): other = Timedelta(other).value - elif isinstance(other, np.ndarray): - other_mask = isnull(other) + elif hasattr(other, 'dtype') and is_timedelta64_dtype(other): other = other.astype('i8', copy=False).view('i8') - else: - # scalar - other = Timedelta(other) other_mask = isnull(other) - other = other.value + else: + + # coercion issues + # let higher levels handle + raise TypeError("cannot convert {} to a Timedelta".format( + type(other).__name__)) return values, values_mask, other, other_mask @@ -1853,14 +1932,8 @@ class BoolBlock(NumericBlock): def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) - return issubclass(element.dtype.type, np.integer) - return isinstance(element, (int, bool)) - - def _try_cast(self, element): - try: - return bool(element) - except: # pragma: no cover - return element + return issubclass(element.dtype.type, np.bool_) + return isinstance(element, bool) def should_store(self, value): return issubclass(value.dtype.type, np.bool_) @@ -1996,18 +2069,6 @@ def _maybe_downcast(self, blocks, downcast=None): def _can_hold_element(self, element): return True - def _try_coerce_args(self, values, other): - """ provide coercion to our input arguments """ - - if isinstance(other, ABCDatetimeIndex): - # to store DatetimeTZBlock as object - other = other.asobject.values - - return values, False, other, False - - def _try_cast(self, element): - return element - def should_store(self, value): return not (issubclass(value.dtype.type, (np.integer, np.floating, np.complexfloating, @@ -2299,17 +2360,33 @@ def _astype(self, dtype, mgr=None, **kwargs): return super(DatetimeBlock, self)._astype(dtype=dtype, **kwargs) def _can_hold_element(self, element): + """ + boolean if we can hold this element, will raise on a + tz-aware datetime + """ + if is_list_like(element): - element = np.array(element) - return element.dtype == _NS_DTYPE or element.dtype == np.int64 - return (is_integer(element) or isinstance(element, datetime) or - isnull(element)) - def _try_cast(self, element): - try: - return int(element) - except: - return element + # we cannot hold tz-aware + # higher level to handle + if getattr(element, 'tz', None) is not None: + return False + + element = np.asarray(element) + return element.dtype == _NS_DTYPE + + elif isnull(element): + return True + + elif isinstance(element, datetime): + + # we cannot hold tz-aware + if getattr(element, 'tzinfo', None) is not None: + return False + + return True + + return False def _try_coerce_args(self, values, other): """ @@ -2333,7 +2410,7 @@ def _try_coerce_args(self, values, other): other_mask = False if isinstance(other, bool): - raise TypeError + raise TypeError("cannot convert a bool to a Datetime") elif is_null_datelike_scalar(other): other = tslib.iNaT other_mask = True @@ -2344,28 +2421,26 @@ def _try_coerce_args(self, values, other): "naive Block") other_mask = isnull(other) other = other.asm8.view('i8') - else: - try: - other = np.asarray(other) - other_mask = isnull(other) + elif hasattr(other, 'dtype') and is_datetime64_dtype(other): + if is_datetime64tz_dtype(other): + raise TypeError("cannot coerce a Timestamp with a tz on a " + "naive Block") + other = other.view('i8') + other_mask = isnull(other) - other = other.astype('i8', copy=False).view('i8') - except ValueError: + else: - # coercion issues - # let higher levels handle - raise TypeError + # coercion issues + # let higher levels handle + raise TypeError("cannot convert a {} to a Datetime".format( + type(other).__name__)) return values, values_mask, other, other_mask def _try_coerce_result(self, result): """ reverse of try_coerce_args """ if isinstance(result, np.ndarray): - if result.dtype.kind in ['i', 'f', 'O']: - try: - result = result.astype('M8[ns]') - except ValueError: - pass + result = _coerce_array_to_datetime(result) elif isinstance(result, (np.integer, np.float, np.datetime64)): result = self._box_func(result) return result @@ -2440,6 +2515,37 @@ def copy(self, deep=True, mgr=None): values = values.copy(deep=True) return self.make_block_same_class(values) + def _can_hold_element(self, element): + """ + boolean if we can hold this element, will raise on a + tz-aware datetime + """ + + if is_list_like(element): + + dtype = getattr(element, 'dtype', None) + tz = getattr(dtype, 'tz', None) + + # we can only hold an identical tz-aware + if str(self.values.tz) != str(tz): + return False + + element = np.asarray(element) + return element.dtype == _NS_DTYPE + + elif isnull(element): + return True + + elif isinstance(element, datetime): + + # we can only hold an identical tz-aware + if str(self.values.tz) != str(getattr(element, 'tzinfo', None)): + return False + + return True + + return False + def external_values(self): """ we internally represent the data as a DatetimeIndex, but for external compat with ndarray, export as a ndarray of Timestamps @@ -2501,7 +2607,7 @@ def _try_coerce_args(self, values, other): other_mask = isnull(other) if isinstance(other, bool): - raise TypeError + raise TypeError("cannot convert a bool to a tz-aware Datetime") elif (is_null_datelike_scalar(other) or (is_scalar(other) and isnull(other))): other = tslib.iNaT @@ -2520,23 +2626,39 @@ def _try_coerce_args(self, values, other): raise ValueError("incompatible or non tz-aware value") other_mask = isnull(other) other = other.value + elif hasattr(other, 'dtype'): + tz = getattr(other, 'tz', None) + if tz is None or str(tz) != str(self.values.tz): + raise ValueError("incompatible or non tz-aware value") + other_mask = isnull(other) + other = other.view('i8') else: - raise TypeError + + if is_null_datelike_scalar(other): + other_mask = True + else: + # higher level to coerce + raise TypeError( + "cannot convert a {} to a tz-aware Datetime".format( + type(other).__name__)) return values, values_mask, other, other_mask def _try_coerce_result(self, result): """ reverse of try_coerce_args """ if isinstance(result, np.ndarray): - if result.dtype.kind in ['i', 'f', 'O']: - result = result.astype('M8[ns]') + result = _coerce_array_to_datetime(result) elif isinstance(result, (np.integer, np.float, np.datetime64)): result = lib.Timestamp(result, tz=self.values.tz) if isinstance(result, np.ndarray): # allow passing of > 1dim if its trivial if result.ndim > 1: result = result.reshape(np.prod(result.shape)) - result = self.values._shallow_copy(result) + + try: + result = self.values._shallow_copy(result) + except (TypeError, ValueError): + pass return result @@ -2619,6 +2741,11 @@ def sp_index(self): def kind(self): return self.values.kind + def _can_hold_element(self, element): + """ we should actually check that our dtype is compat + with the inferred type """ + return True + def _astype(self, dtype, copy=False, raise_on_error=True, values=None, klass=None, mgr=None, **kwargs): if values is None: @@ -3111,7 +3238,7 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, elif f == 'putmask': align_copy = False if kwargs.get('align', True): - align_keys = ['new', 'mask'] + align_keys = ['other', 'mask'] else: align_keys = ['mask'] elif f == 'eval': @@ -3302,16 +3429,6 @@ def comp(s): return isnull(values) return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq) - def _cast_scalar(block, scalar): - dtype, val = infer_dtype_from_scalar(scalar, pandas_dtype=True) - if not is_dtype_equal(block.dtype, dtype): - dtype = find_common_type([block.dtype, dtype]) - block = block.astype(dtype) - # use original value - val = scalar - - return block, val - masks = [comp(s) for i, s in enumerate(src_list)] result_blocks = [] @@ -3334,8 +3451,8 @@ def _cast_scalar(block, scalar): # particular block m = masks[i][b.mgr_locs.indexer] if m.any(): - b, val = _cast_scalar(b, d) - new_rb.extend(b.putmask(m, val, inplace=True)) + b = b.putmask(m, d, mgr=mgr) + new_rb.extend(b) else: new_rb.append(b) rb = new_rb @@ -4566,6 +4683,23 @@ def _interleaved_dtype(blocks): return dtype +def _coerce_array_to_datetime(result): + """ preserves the underlying array """ + + if result.dtype.kind in ['i', 'f']: + result = result.astype('M8[ns]') + elif result.dtype.kind in ['O']: + try: + # PITA + # we could have mixed naive & tz-aware + from pandas import to_datetime + result = to_datetime(result.ravel(), box=False) + except (TypeError, ValueError): + pass + + return result + + def _consolidate(blocks): """ Merge blocks having same dtype, exclude non-consolidating blocks @@ -4812,61 +4946,6 @@ def _transform_index(index, func, level=None): return Index(items, name=index.name) -def _putmask_smart(v, m, n): - """ - Return a new block, try to preserve dtype if possible. - - Parameters - ---------- - v : `values`, updated in-place (array like) - m : `mask`, applies to both sides (array like) - n : `new values` either scalar or an array like aligned with `values` - """ - # n should be the length of the mask or a scalar here - if not is_list_like(n): - n = np.array([n] * len(m)) - elif isinstance(n, np.ndarray) and n.ndim == 0: # numpy scalar - n = np.repeat(np.array(n, ndmin=1), len(m)) - - # see if we are only masking values that if putted - # will work in the current dtype - try: - nn = n[m] - - # make sure that we have a nullable type - # if we have nulls - if not _is_na_compat(v, nn[0]): - raise ValueError - - nn_at = nn.astype(v.dtype) - - # avoid invalid dtype comparisons - if not is_numeric_v_string_like(nn, nn_at): - comp = (nn == nn_at) - if is_list_like(comp) and comp.all(): - nv = v.copy() - nv[m] = nn_at - return nv - except (ValueError, IndexError, TypeError): - pass - - # change the dtype - dtype, _ = maybe_promote(n.dtype) - - if is_extension_type(v.dtype) and is_object_dtype(dtype): - nv = v.get_values(dtype) - else: - nv = v.astype(dtype) - - try: - nv[m] = n[m] - except ValueError: - idx, = np.where(np.squeeze(m)) - for mask_index, new_val in zip(idx, n[m]): - nv[mask_index] = new_val - return nv - - def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy): """ Concatenate block managers into one. diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 5a47258fa782e..3b8be05bfc4a7 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -8,6 +8,7 @@ import warnings from pandas.core.dtypes.cast import ( infer_dtype_from_scalar, + cast_scalar_to_array, maybe_cast_item) from pandas.core.dtypes.common import ( is_integer, is_list_like, @@ -178,8 +179,8 @@ def _init_data(self, data, copy, dtype, **kwargs): copy = False dtype = None elif is_scalar(data) and all(x is not None for x in passed_axes): - values = _cast_scalar_to_array([len(x) for x in passed_axes], - data, dtype=dtype) + values = cast_scalar_to_array([len(x) for x in passed_axes], + data, dtype=dtype) mgr = self._init_matrix(values, passed_axes, dtype=values.dtype, copy=False) copy = False @@ -580,7 +581,7 @@ def __setitem__(self, key, value): shape[1:], tuple(map(int, value.shape)))) mat = np.asarray(value) elif is_scalar(value): - mat = _cast_scalar_to_array(shape[1:], value) + mat = cast_scalar_to_array(shape[1:], value) else: raise TypeError('Cannot set item of type: %s' % str(type(value))) diff --git a/pandas/core/series.py b/pandas/core/series.py index 129f291e5f843..7f10899091c73 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1696,7 +1696,7 @@ def update(self, other): other = other.reindex_like(self) mask = notnull(other) - self._data = self._data.putmask(mask=mask, new=other, inplace=True) + self._data = self._data.putmask(mask=mask, other=other, inplace=True) self._maybe_update_cacher() # ---------------------------------------------------------------------- diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index 33fab26408784..e91a4d0317905 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -9,8 +9,9 @@ from datetime import datetime, timedelta, date import numpy as np -from pandas import Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT - +from pandas import Period, Timedelta, Timestamp, DatetimeIndex, DataFrame, NaT +from pandas.api.types import is_dtype_equal +import pandas as pd from pandas.core.dtypes.cast import ( maybe_downcast_to_dtype, maybe_convert_objects, @@ -18,7 +19,8 @@ infer_dtype_from_array, maybe_convert_string_to_object, maybe_convert_scalar, - find_common_type) + find_common_type, + cast_scalar_to_array) from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, @@ -131,29 +133,85 @@ def test_infer_dtype_from_scalar(self): dtype, val = infer_dtype_from_scalar(data) assert dtype == 'm8[ns]' - for data in [date(2000, 1, 1), - Timestamp(1, tz='US/Eastern'), 'foo']: + for tz in ['UTC', 'US/Eastern', 'Asia/Tokyo']: + dt = Timestamp(1, tz=tz) + dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=True) + assert dtype == 'datetime64[ns, {0}]'.format(tz) + assert val == dt.value + + dtype, val = infer_dtype_from_scalar(dt) + assert dtype == np.object_ + assert val == dt + + for freq in ['M', 'D']: + p = Period('2011-01-01', freq=freq) + dtype, val = infer_dtype_from_scalar(p, pandas_dtype=True) + assert dtype == 'period[{0}]'.format(freq) + assert val == p.ordinal + + dtype, val = infer_dtype_from_scalar(p) + dtype == np.object_ + assert val == p + + # misc + for data in [date(2000, 1, 1), 'foo']: dtype, val = infer_dtype_from_scalar(data) assert dtype == np.object_ @pytest.mark.parametrize( - "arr, expected", - [('foo', np.object_), - (b'foo', np.object_), - (1, np.int_), - (1.5, np.float_), - ([1], np.int_), - (np.array([1]), np.int_), - ([np.nan, 1, ''], np.object_), - (np.array([[1.0, 2.0]]), np.float_), - (Timestamp('20160101'), np.object_), - (np.datetime64('2016-01-01'), np.dtype(' 0.5, 'dates2'] = pd.NaT ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + for left, right in ops.items(): left_f = getattr(operator, left) right_f = getattr(operator, right) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 92d06950e61d2..d4f0ec547f9ef 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -98,17 +98,17 @@ def test_setitem_series_int64(self): self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64) # int + complex -> complex - exp = pd.Series([1, 1 + 1j, 3, 4]) + exp = pd.Series([1, 1 + 1j, 3, 4], dtype=np.complex128) self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - # int + bool -> int - exp = pd.Series([1, 1, 3, 4]) - self._assert_setitem_series_conversion(obj, True, exp, np.int64) + # int + bool -> object + exp = pd.Series([1, True, 3, 4]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) def test_setitem_series_int8(self): # integer dtype coercion (no change) obj = pd.Series([1, 2, 3, 4], dtype=np.int8) - self.assertEqual(obj.dtype, np.int8) + assert obj.dtype == np.int8 exp = pd.Series([1, 1, 3, 4], dtype=np.int8) self._assert_setitem_series_conversion(obj, np.int32(1), exp, np.int8) @@ -131,13 +131,13 @@ def test_setitem_series_float64(self): self._assert_setitem_series_conversion(obj, 1.1, exp, np.float64) # float + complex -> complex - exp = pd.Series([1.1, 1 + 1j, 3.3, 4.4]) + exp = pd.Series([1.1, 1 + 1j, 3.3, 4.4], dtype=np.complex128) self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - # float + bool -> float - exp = pd.Series([1.1, 1.0, 3.3, 4.4]) - self._assert_setitem_series_conversion(obj, True, exp, np.float64) + # float + bool -> object + exp = pd.Series([1.1, True, 3.3, 4.4]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) def test_setitem_series_complex128(self): obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) @@ -145,7 +145,7 @@ def test_setitem_series_complex128(self): # complex + int -> complex exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, True, exp, np.complex128) + self._assert_setitem_series_conversion(obj, 1, exp, np.complex128) # complex + float -> complex exp = pd.Series([1 + 1j, 1.1, 3 + 3j, 4 + 4j]) @@ -155,9 +155,9 @@ def test_setitem_series_complex128(self): exp = pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j]) self._assert_setitem_series_conversion(obj, 1 + 1j, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_setitem_series_conversion(obj, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, 4 + 4j]) + self._assert_setitem_series_conversion(obj, True, exp, np.object) def test_setitem_series_bool(self): obj = pd.Series([True, False, True, False]) @@ -211,12 +211,11 @@ def test_setitem_series_datetime64(self): exp, 'datetime64[ns]') # datetime64 + int -> object - # ToDo: The result must be object exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp(1), + 1, pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self._assert_setitem_series_conversion(obj, 1, exp, 'datetime64[ns]') + self._assert_setitem_series_conversion(obj, 1, exp, np.object) # datetime64 + object -> object exp = pd.Series([pd.Timestamp('2011-01-01'), @@ -225,8 +224,6 @@ def test_setitem_series_datetime64(self): pd.Timestamp('2011-01-04')]) self._assert_setitem_series_conversion(obj, 'x', exp, np.object) - # ToDo: add more tests once the above issue has been fixed - def test_setitem_series_datetime64tz(self): tz = 'US/Eastern' obj = pd.Series([pd.Timestamp('2011-01-01', tz=tz), @@ -267,14 +264,12 @@ def test_setitem_series_datetime64tz(self): pd.Timestamp('2011-01-04', tz=tz)]) self._assert_setitem_series_conversion(obj, 1, exp, np.object) - # ToDo: add more tests once the above issue has been fixed - def test_setitem_series_timedelta64(self): obj = pd.Series([pd.Timedelta('1 day'), pd.Timedelta('2 day'), pd.Timedelta('3 day'), pd.Timedelta('4 day')]) - self.assertEqual(obj.dtype, 'timedelta64[ns]') + assert obj.dtype == 'timedelta64[ns]' # timedelta64 + timedelta64 -> timedelta64 exp = pd.Series([pd.Timedelta('1 day'), @@ -285,12 +280,11 @@ def test_setitem_series_timedelta64(self): exp, 'timedelta64[ns]') # timedelta64 + int -> object - # ToDo: The result must be object exp = pd.Series([pd.Timedelta('1 day'), - pd.Timedelta(1), + 1, pd.Timedelta('3 day'), pd.Timedelta('4 day')]) - self._assert_setitem_series_conversion(obj, 1, exp, 'timedelta64[ns]') + self._assert_setitem_series_conversion(obj, 1, exp, np.object) # timedelta64 + object -> object exp = pd.Series([pd.Timedelta('1 day'), @@ -299,8 +293,7 @@ def test_setitem_series_timedelta64(self): pd.Timedelta('4 day')]) self._assert_setitem_series_conversion(obj, 'x', exp, np.object) - # ToDo: add more tests once the above issue has been fixed - + @pytest.mark.xfail(reason="add some tests for me") def test_setitem_series_period(self): pass @@ -673,13 +666,13 @@ def _where_int64_common(self, klass): self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # int + bool -> int - exp = klass([1, 1, 3, 1]) - self._assert_where_conversion(obj, cond, True, exp, np.int64) + # int + bool -> object + exp = klass([1, True, 3, 1], dtype=object) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = klass([True, False, True, True]) - exp = klass([1, 0, 3, 1]) - self._assert_where_conversion(obj, cond, values, exp, np.int64) + exp = klass([1, False, 3, True], dtype=object) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_series_int64(self): self._where_int64_common(pd.Series) @@ -719,13 +712,13 @@ def _where_float64_common(self, klass): self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # float + bool -> float - exp = klass([1.1, 1.0, 3.3, 1.0]) - self._assert_where_conversion(obj, cond, True, exp, np.float64) + # float + bool -> object + exp = klass([1.1, True, 3.3, 1.0], dtype=object) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = klass([True, False, True, True]) - exp = klass([1.1, 0.0, 3.3, 1.0]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) + exp = klass([1.1, False, 3.3, True], dtype=object) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_series_float64(self): self._where_float64_common(pd.Series) @@ -762,13 +755,13 @@ def test_where_series_complex128(self): exp = pd.Series([1 + 1j, 6 + 6j, 3 + 3j, 8 + 8j]) self._assert_where_conversion(obj, cond, values, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 1]) - self._assert_where_conversion(obj, cond, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, 1]) + self._assert_where_conversion(obj, cond, True, exp, np.object) values = pd.Series([True, False, True, True]) - exp = pd.Series([1 + 1j, 0, 3 + 3j, 1]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) + exp = pd.Series([1 + 1j, False, 3 + 3j, True]) + self._assert_where_conversion(obj, cond, values, exp, np.object) def test_where_index_complex128(self): pass @@ -778,29 +771,29 @@ def test_where_series_bool(self): assert obj.dtype == np.bool cond = pd.Series([True, False, True, False]) - # bool + int -> int - exp = pd.Series([1, 1, 1, 1]) - self._assert_where_conversion(obj, cond, 1, exp, np.int64) + # bool + int -> object + exp = pd.Series([True, 1, True, 1]) + self._assert_where_conversion(obj, cond, 1, exp, np.object) values = pd.Series([5, 6, 7, 8]) - exp = pd.Series([1, 6, 1, 8]) - self._assert_where_conversion(obj, cond, values, exp, np.int64) + exp = pd.Series([True, 6, True, 8], dtype=object) + self._assert_where_conversion(obj, cond, values, exp, np.object) - # bool + float -> float - exp = pd.Series([1.0, 1.1, 1.0, 1.1]) - self._assert_where_conversion(obj, cond, 1.1, exp, np.float64) + # bool + float -> object + exp = pd.Series([True, 1.1, True, 1.1]) + self._assert_where_conversion(obj, cond, 1.1, exp, np.object) values = pd.Series([5.5, 6.6, 7.7, 8.8]) - exp = pd.Series([1.0, 6.6, 1.0, 8.8]) - self._assert_where_conversion(obj, cond, values, exp, np.float64) + exp = pd.Series([True, 6.6, True, 8.8], dtype=object) + self._assert_where_conversion(obj, cond, values, exp, np.object) - # bool + complex -> complex - exp = pd.Series([1, 1 + 1j, 1, 1 + 1j]) - self._assert_where_conversion(obj, cond, 1 + 1j, exp, np.complex128) + # bool + complex -> object + exp = pd.Series([True, 1 + 1j, True, 1 + 1j], dtype=object) + self._assert_where_conversion(obj, cond, 1 + 1j, exp, np.object) values = pd.Series([5 + 5j, 6 + 6j, 7 + 7j, 8 + 8j]) - exp = pd.Series([1, 6 + 6j, 1, 8 + 8j]) - self._assert_where_conversion(obj, cond, values, exp, np.complex128) + exp = pd.Series([True, 6 + 6j, True, 8 + 8j], dtype=object) + self._assert_where_conversion(obj, cond, values, exp, np.object) # bool + bool -> bool exp = pd.Series([True, True, True, True]) @@ -839,12 +832,15 @@ def test_where_series_datetime64(self): pd.Timestamp('2012-01-04')]) self._assert_where_conversion(obj, cond, values, exp, 'datetime64[ns]') - # ToDo: coerce to object - msg = "cannot coerce a Timestamp with a tz on a naive Block" - with tm.assert_raises_regex(TypeError, msg): - obj.where(cond, pd.Timestamp('2012-01-01', tz='US/Eastern')) + # datetime64 + datetime64tz -> object + exp = pd.Series([pd.Timestamp('2011-01-01'), + pd.Timestamp('2012-01-01', tz='US/Eastern'), + pd.Timestamp('2011-01-03'), + pd.Timestamp('2012-01-01', tz='US/Eastern')]) + values = pd.Timestamp('2012-01-01', tz='US/Eastern') + self._assert_where_conversion(obj, cond, values, exp, np.object) - # ToDo: do not coerce to UTC, must be object + # TODO: do not coerce to UTC, must be object values = pd.Series([pd.Timestamp('2012-01-01', tz='US/Eastern'), pd.Timestamp('2012-01-02', tz='US/Eastern'), pd.Timestamp('2012-01-03', tz='US/Eastern'), @@ -984,9 +980,9 @@ def _fillna_float64_common(self, klass): else: NotImplementedError - # float + bool -> float - exp = klass([1.1, 1.0, 3.3, 4.4]) - self._assert_fillna_conversion(obj, True, exp, np.float64) + # float + bool -> object + exp = klass([1.1, True, 3.3, 4.4]) + self._assert_fillna_conversion(obj, True, exp, np.object) def test_fillna_series_float64(self): self._fillna_float64_common(pd.Series) @@ -1010,9 +1006,9 @@ def test_fillna_series_complex128(self): exp = pd.Series([1 + 1j, 1 + 1j, 3 + 3j, 4 + 4j]) self._assert_fillna_conversion(obj, 1 + 1j, exp, np.complex128) - # complex + bool -> complex - exp = pd.Series([1 + 1j, 1, 3 + 3j, 4 + 4j]) - self._assert_fillna_conversion(obj, True, exp, np.complex128) + # complex + bool -> object + exp = pd.Series([1 + 1j, True, 3 + 3j, 4 + 4j]) + self._assert_fillna_conversion(obj, True, exp, np.object) def test_fillna_index_complex128(self): self._fillna_float64_common(pd.Index) @@ -1045,15 +1041,15 @@ def test_fillna_series_datetime64(self): pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) value = pd.Timestamp('2012-01-01', tz='US/Eastern') + self._assert_fillna_conversion(obj, value, exp, np.object) # datetime64 + int => object - # ToDo: must be coerced to object exp = pd.Series([pd.Timestamp('2011-01-01'), - pd.Timestamp(1), + 1, pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-04')]) - self._assert_fillna_conversion(obj, 1, exp, 'datetime64[ns]') + self._assert_fillna_conversion(obj, 1, exp, np.object) # datetime64 + object => object exp = pd.Series([pd.Timestamp('2011-01-01'), @@ -1077,6 +1073,7 @@ def test_fillna_series_datetime64tz(self): pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) value = pd.Timestamp('2012-01-01', tz=tz) + self._assert_fillna_conversion(obj, value, exp, 'datetime64[ns, US/Eastern]') diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index da8a896cb6f4a..a6bca819f9898 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -1,5 +1,3 @@ -import pytest - import numpy as np import pandas as pd from pandas import date_range, Index, DataFrame, Series, Timestamp @@ -12,7 +10,6 @@ def test_indexing_with_datetime_tz(self): # 8260 # support datetime64 with tz - idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), name='foo') dr = date_range('20130110', periods=3) @@ -56,10 +53,11 @@ def test_indexing_with_datetime_tz(self): 'US/Pacific') # trying to set a single element on a part of a different timezone - def f(): - df.loc[df.new_col == 'new', 'time'] = v + df2 = df.copy() + assert df2.time.dtype == 'datetime64[ns, UTC]' - pytest.raises(ValueError, f) + df2.loc[df2.new_col == 'new', 'time'] = v + assert df2.time.dtype == 'object' v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s') df.loc[df.new_col == 'new', 'time'] = v diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index c3fb35b56a9a7..3f9b4146f1616 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -358,10 +358,11 @@ def test_multi_assign(self): # GH 14001 expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], - 'col1': [0., 1., 4., 6., 8., 10.], + 'col1': [0, 1, 4, 6, 8, 10], 'col2': [12, 7, 16, np.nan, 20, 22]}) df2 = df.copy() df2.loc[mask, cols] = dft.loc[mask, cols].values + tm.assert_frame_equal(df2, expected) df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 6ded4d593a571..0397dda6d021f 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -771,13 +771,14 @@ def test_setitem_dtypes(self): s[0] = np.nan assert_series_equal(s, expected) + # bool & float -> object s = Series([False]) s.loc[0] = np.nan - assert_series_equal(s, Series([np.nan])) + assert_series_equal(s, Series([np.nan], dtype=object)) s = Series([False, True]) s.loc[0] = np.nan - assert_series_equal(s, Series([np.nan, 1.0])) + assert_series_equal(s, Series([np.nan, True])) def test_set_value(self): idx = self.ts.index[10] @@ -1360,14 +1361,16 @@ def test_where_dups(self): expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2]) assert_series_equal(comb, expected) - def test_where_datetime(self): + def test_where_datetime_coerce(self): + s = Series(date_range('20130102', periods=2)) - expected = Series([10, 10], dtype='datetime64[ns]') + expected = Series([10, 10], dtype='object') mask = np.array([False, False]) rs = s.where(mask, [10, 10]) assert_series_equal(rs, expected) + # convert to object as we are passing non-datetime64 rs = s.where(mask, 10) assert_series_equal(rs, expected) @@ -1378,7 +1381,7 @@ def test_where_datetime(self): assert_series_equal(rs, expected) rs = s.where(mask, [10.0, np.nan]) - expected = Series([10, None], dtype='datetime64[ns]') + expected = Series([10, None], dtype=object) assert_series_equal(rs, expected) # GH 15701 @@ -1389,9 +1392,9 @@ def test_where_datetime(self): expected = Series([pd.NaT, s[1]]) assert_series_equal(rs, expected) - def test_where_timedelta(self): + def test_where_timedelta_coerce(self): s = Series([1, 2], dtype='timedelta64[ns]') - expected = Series([10, 10], dtype='timedelta64[ns]') + expected = Series([10, 10], dtype=object) mask = np.array([False, False]) rs = s.where(mask, [10, 10]) @@ -1407,9 +1410,38 @@ def test_where_timedelta(self): assert_series_equal(rs, expected) rs = s.where(mask, [10.0, np.nan]) - expected = Series([10, None], dtype='timedelta64[ns]') + expected = Series([10, None], dtype=object) assert_series_equal(rs, expected) + def test_where_consistency(self): + + # 16402 + # where should be consisten across various functions + s = Series([Timestamp('20130101'), pd.NaT]) + + # this is currently wrong :<, should be object + result = s.fillna(Timestamp('20130101', tz='US/Eastern')) + expected = Series([Timestamp('2012-12-31 19:00:00'), + Timestamp('2013-01-01 00:00:00')] + ).dt.tz_localize('US/Eastern') + assert_series_equal(result, expected) + + result = s.fillna('foo') + expected = Series([Timestamp('20130101'), 'foo']) + assert_series_equal(result, expected) + + s2 = s.copy() + s2[1] = 'bar' + expected = Series([Timestamp('20130101'), 'bar']) + assert_series_equal(s2, expected) + + # see 16406 for constrution bug + result = s.where([True, False], Timestamp('20130101', tz='US/Eastern')) + expected = Series([Timestamp('20130101'), + Timestamp('20130101', tz='US/Eastern')], + dtype=object) + assert_series_equal(result, expected) + def test_mask(self): # compare with tested results in test_where s = Series(np.random.randn(5)) @@ -1589,7 +1621,7 @@ def test_setitem_na(self): expected = Series([np.nan, 1, np.nan, 0]) s = Series([True, True, False, False]) s[::2] = np.nan - assert_series_equal(s, expected) + assert_series_equal(s, Series([np.nan, True, np.nan, False])) expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 35d13a62ca083..6903b93e176de 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -101,6 +101,7 @@ def test_replace_gh5319(self): ser = pd.Series(pd.date_range('20130101', periods=5)) expected = ser.copy() expected.loc[2] = pd.Timestamp('20120101') + result = ser.replace({pd.Timestamp('20130103'): pd.Timestamp('20120101')}) tm.assert_series_equal(result, expected) @@ -133,8 +134,8 @@ def check_replace(to_rep, val, expected): tm.assert_series_equal(expected, r) tm.assert_series_equal(expected, sc) - # MUST upcast to float - e = pd.Series([0., 1., 2., 3., 4.]) + # will NOT upcast to float + e = s tr, v = [3], [3.0] check_replace(tr, v, e)