From 4141317d09cf0113f36fda17c66c992b705b49a2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 3 Oct 2017 15:24:24 -0500 Subject: [PATCH] Use argument dtype to inform coercion Master: ```python >>> import dask.dataframe as dd >>> s = dd.core.Scalar({('s', 0): 10}, 's', 'i8') >>> pdf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], ... 'b': [7, 6, 5, 4, 3, 2, 1]}) >>> (pdf + s).dtypes a object b object dtype: object Head: ``` >>> (pdf + s).dtypes a int64 b int64 dtype: object ``` This is more consistent with 0.20.3, while still most of the changes in https://github.com/pandas-dev/pandas/pull/16821 Closes https://github.com/pandas-dev/pandas/issues/17767 --- pandas/core/internals.py | 46 ++++++++++-------- pandas/tests/internals/test_internals.py | 61 ++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 19 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1fddf985f0cdbb..6cfaa237d4ea47 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -629,9 +629,8 @@ def convert(self, copy=True, **kwargs): def _can_hold_element(self, element): """ require the same dtype as ourselves """ dtype = self.values.dtype.type - if is_list_like(element): - element = np.asarray(element) - tipo = element.dtype.type + tipo = _maybe_get_element_dtype_type(element) + if tipo: return issubclass(tipo, dtype) return isinstance(element, dtype) @@ -1806,9 +1805,8 @@ class FloatBlock(FloatOrComplexBlock): _downcast_dtype = 'int64' def _can_hold_element(self, element): - if is_list_like(element): - element = np.asarray(element) - tipo = element.dtype.type + tipo = _maybe_get_element_dtype_type(element) + if tipo: return (issubclass(tipo, (np.floating, np.integer)) and not issubclass(tipo, (np.datetime64, np.timedelta64))) return (isinstance(element, (float, int, np.floating, np.int_)) and @@ -1856,9 +1854,9 @@ class ComplexBlock(FloatOrComplexBlock): is_complex = True def _can_hold_element(self, element): - if is_list_like(element): - element = np.array(element) - return issubclass(element.dtype.type, + tipo = _maybe_get_element_dtype_type(element) + if tipo: + return issubclass(tipo, (np.floating, np.integer, np.complexfloating)) return (isinstance(element, (float, int, complex, np.float_, np.int_)) and @@ -1874,9 +1872,8 @@ class IntBlock(NumericBlock): _can_hold_na = False def _can_hold_element(self, element): - if is_list_like(element): - element = np.array(element) - tipo = element.dtype.type + tipo = _maybe_get_element_dtype_type(element) + if tipo: return (issubclass(tipo, np.integer) and not issubclass(tipo, (np.datetime64, np.timedelta64)) and self.dtype.itemsize >= element.dtype.itemsize) @@ -1917,9 +1914,8 @@ def _box_func(self): return lambda x: tslib.Timedelta(x, unit='ns') def _can_hold_element(self, element): - if is_list_like(element): - element = np.array(element) - tipo = element.dtype.type + tipo = _maybe_get_element_dtype_type(element) + if tipo: return issubclass(tipo, np.timedelta64) return isinstance(element, (timedelta, np.timedelta64)) @@ -2018,9 +2014,9 @@ class BoolBlock(NumericBlock): _can_hold_na = False def _can_hold_element(self, element): - if is_list_like(element): - element = np.asarray(element) - return issubclass(element.dtype.type, np.bool_) + tipo = _maybe_get_element_dtype_type(element) + if tipo: + return issubclass(tipo, np.bool_) return isinstance(element, (bool, np.bool_)) def should_store(self, value): @@ -2450,7 +2446,9 @@ def _astype(self, dtype, mgr=None, **kwargs): return super(DatetimeBlock, self)._astype(dtype=dtype, **kwargs) def _can_hold_element(self, element): - if is_list_like(element): + tipo = _maybe_get_element_dtype_type(element) + if tipo: + # TODO: this still uses asarray, instead of dtype.type element = np.array(element) return element.dtype == _NS_DTYPE or element.dtype == np.int64 return (is_integer(element) or isinstance(element, datetime) or @@ -5525,3 +5523,13 @@ def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill): if not allow_fill: indexer = maybe_convert_indices(indexer, length) return 'fancy', indexer, len(indexer) + + +def _maybe_get_element_dtype_type(element): + tipo = None + if hasattr(element, 'dtype'): + tipo = element.dtype.type + elif is_list_like(element): + element = np.asarray(element) + tipo = element.dtype.type + return tipo diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index f40fc151676da1..2df9e298336525 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -2,6 +2,7 @@ # pylint: disable=W0102 from datetime import datetime, date +import operator import sys import pytest import numpy as np @@ -1213,3 +1214,63 @@ def assert_add_equals(val, inc, result): with pytest.raises(ValueError): BlockPlacement(slice(2, None, -1)).add(-1) + + +class DummyElement(object): + def __init__(self, value, dtype): + self.value = value + self.dtype = np.dtype(dtype) + + def __array__(self): + return np.array(self.value, dtype=self.dtype) + + def __str__(self): + return "DummyElement({}, {})".format(self.value, self.dtype) + + def __repr__(self): + return str(self) + + def astype(self, dtype, copy=False): + self.dtype = dtype + return self + + def view(self, dtype): + return type(self)(self.value.view(dtype), dtype) + + def any(self, axis=None): + return bool(self.value) + + +class TestCanHoldElement(object): + @pytest.mark.parametrize('value, dtype', [ + (1, 'i8'), + (1.0, 'f8'), + (1j, 'complex128'), + (True, 'bool'), + # (np.timedelta64(20, 'ns'), '