From 1fc76b80abdd3e346e6ee055ea38585c959851fa Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Jan 2019 16:12:43 -0800 Subject: [PATCH] CLN: Follow-ups to #24024 (#24573) --- pandas/core/algorithms.py | 3 --- pandas/core/arrays/datetimelike.py | 8 ------ pandas/core/arrays/datetimes.py | 3 +-- pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 14 ++++------ pandas/core/indexes/datetimelike.py | 29 ++++++++------------- pandas/core/indexes/datetimes.py | 40 ++++------------------------- pandas/core/indexes/multi.py | 4 +-- pandas/core/indexes/period.py | 11 -------- pandas/core/indexes/timedeltas.py | 5 ---- pandas/core/internals/blocks.py | 17 +++--------- pandas/core/internals/managers.py | 5 ++-- pandas/io/packers.py | 29 ++++++++++----------- 13 files changed, 44 insertions(+), 126 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 8d85b84ec7507..94d716a08d9dc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -350,9 +350,6 @@ def unique(values): if is_extension_array_dtype(values): # Dispatch to extension dtype's unique. return values.unique() - elif is_datetime64tz_dtype(values): - # TODO: merge this check into the previous one following #24024 - return values.unique() original = values htable, _, values, dtype, ndtype = _get_hashtable_algo(values) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d233e1d09a1e9..517c80619baea 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -47,10 +47,6 @@ def cmp_method(self, other): if isinstance(other, ABCDataFrame): return NotImplemented - if isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries, cls)): - if other.ndim > 0 and len(self) != len(other): - raise ValueError('Lengths must match to compare') - if needs_i8_conversion(self) and needs_i8_conversion(other): # we may need to directly compare underlying # representations @@ -586,10 +582,6 @@ def view(self, dtype=None): # ------------------------------------------------------------------ # ExtensionArray Interface - # TODO: - # * _from_sequence - # * argsort / _values_for_argsort - # * _reduce def unique(self): result = unique1d(self.asi8) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f42930929747d..ea2742c5808a3 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -280,8 +280,7 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): ) raise ValueError(msg.format(values.dtype)) - dtype = pandas_dtype(dtype) - _validate_dt64_dtype(dtype) + dtype = _validate_dt64_dtype(dtype) if freq == "infer": msg = ( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e7c03de879e8a..3e782c6ef89e0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3082,7 +3082,7 @@ def _box_item_values(self, key, values): def _maybe_cache_changed(self, item, value): """The object has called back to us saying maybe it has changed. """ - self._data.set(item, value, check=False) + self._data.set(item, value) @property def _is_cached(self): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a26daba49f5d1..c702eae5da012 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -68,8 +68,7 @@ def cmp_method(self, other): if other.ndim > 0 and len(self) != len(other): raise ValueError('Lengths must match to compare') - from .multi import MultiIndex - if is_object_dtype(self) and not isinstance(self, MultiIndex): + if is_object_dtype(self) and not isinstance(self, ABCMultiIndex): # don't pass MultiIndex with np.errstate(all='ignore'): result = ops._comp_method_OBJECT_ARRAY(op, self.values, other) @@ -1307,8 +1306,7 @@ def set_names(self, names, level=None, inplace=False): names=['species', 'year']) """ - from .multi import MultiIndex - if level is not None and not isinstance(self, MultiIndex): + if level is not None and not isinstance(self, ABCMultiIndex): raise ValueError('Level must be None for non-MultiIndex') if level is not None and not is_list_like(level) and is_list_like( @@ -3145,9 +3143,8 @@ def _reindex_non_unique(self, target): @Appender(_index_shared_docs['join']) def join(self, other, how='left', level=None, return_indexers=False, sort=False): - from .multi import MultiIndex - self_is_mi = isinstance(self, MultiIndex) - other_is_mi = isinstance(other, MultiIndex) + self_is_mi = isinstance(self, ABCMultiIndex) + other_is_mi = isinstance(other, ABCMultiIndex) # try to figure out the join level # GH3662 @@ -4394,8 +4391,7 @@ def groupby(self, values): # TODO: if we are a MultiIndex, we can do better # that converting to tuples - from .multi import MultiIndex - if isinstance(values, MultiIndex): + if isinstance(values, ABCMultiIndex): values = values.values values = ensure_categorical(values) result = values._reverse_indexer() diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index daca4b5116027..5547266ea6bab 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -31,23 +31,24 @@ _index_doc_kwargs = dict(ibase._index_doc_kwargs) -def ea_passthrough(name): +def ea_passthrough(array_method): """ Make an alias for a method of the underlying ExtensionArray. Parameters ---------- - name : str + array_method : method on an Array class Returns ------- method """ + def method(self, *args, **kwargs): - return getattr(self._eadata, name)(*args, **kwargs) + return array_method(self._data, *args, **kwargs) - method.__name__ = name - # TODO: docstrings + method.__name__ = array_method.__name__ + method.__doc__ = array_method.__doc__ return method @@ -67,9 +68,10 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin): _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget) resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) - _box_values = ea_passthrough("_box_values") - _maybe_mask_results = ea_passthrough("_maybe_mask_results") - __iter__ = ea_passthrough("__iter__") + _box_values = ea_passthrough(DatetimeLikeArrayMixin._box_values) + _maybe_mask_results = ea_passthrough( + DatetimeLikeArrayMixin._maybe_mask_results) + __iter__ = ea_passthrough(DatetimeLikeArrayMixin.__iter__) @property def _eadata(self): @@ -275,9 +277,6 @@ def sort_values(self, return_indexer=False, ascending=True): if not ascending: sorted_values = sorted_values[::-1] - sorted_values = self._maybe_box_as_values(sorted_values, - **attribs) - return self._simple_new(sorted_values, **attribs) @Appender(_index_shared_docs['take'] % _index_doc_kwargs) @@ -613,14 +612,6 @@ def _concat_same_dtype(self, to_concat, name): new_data = type(self._values)._concat_same_type(to_concat).asi8 return self._simple_new(new_data, **attribs) - def _maybe_box_as_values(self, values, **attribs): - # TODO(DatetimeArray): remove - # This is a temporary shim while PeriodArray is an ExtensoinArray, - # but others are not. When everyone is an ExtensionArray, this can - # be removed. Currently used in - # - sort_values - return values - @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): if is_dtype_equal(self.dtype, dtype) and copy is False: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index a6a910f66359c..6d9829d4ef659 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -356,36 +356,6 @@ def tz(self, value): tzinfo = tz - @property - def size(self): - # TODO: Remove this when we have a DatetimeTZArray - # Necessary to avoid recursion error since DTI._values is a DTI - # for TZ-aware - return self._ndarray_values.size - - @property - def shape(self): - # TODO: Remove this when we have a DatetimeTZArray - # Necessary to avoid recursion error since DTI._values is a DTI - # for TZ-aware - return self._ndarray_values.shape - - @property - def nbytes(self): - # TODO: Remove this when we have a DatetimeTZArray - # Necessary to avoid recursion error since DTI._values is a DTI - # for TZ-aware - return self._ndarray_values.nbytes - - def memory_usage(self, deep=False): - # TODO: Remove this when we have a DatetimeTZArray - # Necessary to avoid recursion error since DTI._values is a DTI - # for TZ-aware - result = self._ndarray_values.nbytes - # include our engine hashtable - result += self._engine.sizeof(deep=deep) - return result - @cache_readonly def _is_dates_only(self): """Return a boolean if we are only dates (and don't have a timezone)""" @@ -455,11 +425,11 @@ def _mpl_repr(self): def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): from pandas.io.formats.format import _get_format_datetime64_from_values - format = _get_format_datetime64_from_values(self, date_format) + fmt = _get_format_datetime64_from_values(self, date_format) return libts.format_array_from_datetime(self.asi8, tz=self.tz, - format=format, + format=fmt, na_rep=na_rep) @property @@ -1142,9 +1112,9 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) _resolution = cache_readonly(DatetimeArray._resolution.fget) - strftime = ea_passthrough("strftime") - _has_same_tz = ea_passthrough("_has_same_tz") - __array__ = ea_passthrough("__array__") + strftime = ea_passthrough(DatetimeArray.strftime) + _has_same_tz = ea_passthrough(DatetimeArray._has_same_tz) + __array__ = ea_passthrough(DatetimeArray.__array__) @property def offset(self): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 60059d5a43440..253ce2a28d165 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1468,9 +1468,9 @@ def to_frame(self, index=True, name=None): # Guarantee resulting column order result = DataFrame( OrderedDict([ - ((level if name is None else name), + ((level if lvlname is None else lvlname), self._get_level_values(level)) - for name, level in zip(idx_names, range(len(self.levels))) + for lvlname, level in zip(idx_names, range(len(self.levels))) ]), copy=False ) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 5bc76ed210edb..0eeb7551db26f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -357,17 +357,6 @@ def func(x): return Period._from_ordinal(ordinal=x, freq=self.freq) return func - def _maybe_box_as_values(self, values, **attribs): - """Box an array of ordinals to a PeriodArray - - This is purely for compatibility between PeriodIndex - and Datetime/TimedeltaIndex. Once these are all backed by - an ExtensionArray, this can be removed - """ - # TODO(DatetimeArray): remove - freq = attribs['freq'] - return PeriodArray(values, freq=freq) - def _maybe_convert_timedelta(self, other): """ Convert timedelta-like input to an integer multiple of self.freq diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 3a3b9ed97c8fe..241d12dd06159 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -303,11 +303,6 @@ def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique - _create_comparison_method = DatetimeIndexOpsMixin._create_comparison_method - # TODO: make sure we have a test for name retention analogous - # to series.test_arithmetic.test_ser_cmp_result_names; - # also for PeriodIndex which I think may be missing one - @property def _box_func(self): return lambda x: Timedelta(x, unit='ns') diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7845a62bb7edb..5ce5ae7186774 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -338,7 +338,7 @@ def concat_same_type(self, to_concat, placement=None): def iget(self, i): return self.values[i] - def set(self, locs, values, check=False): + def set(self, locs, values): """ Modify Block in-place with new item value @@ -2416,7 +2416,7 @@ def f(m, v, i): return blocks - def set(self, locs, values, check=False): + def set(self, locs, values): """ Modify Block in-place with new item value @@ -2424,14 +2424,6 @@ def set(self, locs, values, check=False): ------- None """ - - # GH6026 - if check: - try: - if (self.values[locs] == values).all(): - return - except (IndexError, ValueError): - pass try: self.values[locs] = values except (ValueError): @@ -2902,7 +2894,7 @@ def should_store(self, value): not is_datetime64tz_dtype(value) and not is_extension_array_dtype(value)) - def set(self, locs, values, check=False): + def set(self, locs, values): """ Modify Block in-place with new item value @@ -3053,8 +3045,7 @@ def _try_coerce_args(self, values, other): elif (is_null_datelike_scalar(other) or (lib.is_scalar(other) and isna(other))): other = tslibs.iNaT - elif isinstance(other, (self._holder, DatetimeArray)): - # TODO: DatetimeArray check will be redundant after GH#24024 + elif isinstance(other, self._holder): if other.tz != self.values.tz: raise ValueError("incompatible or non tz-aware value") other = _block_shape(other.asi8, ndim=self.ndim) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d50f9c3e65ebd..eba49d18431ef 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1009,11 +1009,10 @@ def delete(self, item): self._shape = None self._rebuild_blknos_and_blklocs() - def set(self, item, value, check=False): + def set(self, item, value): """ Set new item in-place. Does not consolidate. Adds new Block if not contained in the current set of items - if check, then validate that we are not setting the same data in-place """ # FIXME: refactor, clearly separate broadcasting & zip-like assignment # can prob also fix the various if tests for sparse/categorical @@ -1065,7 +1064,7 @@ def value_getitem(placement): blk = self.blocks[blkno] blk_locs = blklocs[val_locs.indexer] if blk.should_store(value): - blk.set(blk_locs, value_getitem(val_locs), check=check) + blk.set(blk_locs, value_getitem(val_locs)) else: unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs]) unfit_val_locs.append(val_locs) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 1e41369b00811..e6d18d5d4193a 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -584,23 +584,23 @@ def decode(obj): dtype = dtype_for(obj[u'dtype']) data = unconvert(obj[u'data'], dtype, obj.get(u'compress')) - return globals()[obj[u'klass']](data, dtype=dtype, name=obj[u'name']) + return Index(data, dtype=dtype, name=obj[u'name']) elif typ == u'range_index': - return globals()[obj[u'klass']](obj[u'start'], - obj[u'stop'], - obj[u'step'], - name=obj[u'name']) + return RangeIndex(obj[u'start'], + obj[u'stop'], + obj[u'step'], + name=obj[u'name']) elif typ == u'multi_index': dtype = dtype_for(obj[u'dtype']) data = unconvert(obj[u'data'], dtype, obj.get(u'compress')) data = [tuple(x) for x in data] - return globals()[obj[u'klass']].from_tuples(data, names=obj[u'names']) + return MultiIndex.from_tuples(data, names=obj[u'names']) elif typ == u'period_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) d = dict(name=obj[u'name'], freq=obj[u'freq']) freq = d.pop('freq', None) - return globals()[obj[u'klass']](PeriodArray(data, freq), **d) + return PeriodIndex(PeriodArray(data, freq), **d) elif typ == u'datetime_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) @@ -631,11 +631,10 @@ def decode(obj): pd_dtype = pandas_dtype(dtype) index = obj[u'index'] - result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype, - obj[u'compress']), - index=index, - dtype=pd_dtype, - name=obj[u'name']) + result = Series(unconvert(obj[u'data'], dtype, obj[u'compress']), + index=index, + dtype=pd_dtype, + name=obj[u'name']) return result elif typ == u'block_manager': @@ -671,18 +670,18 @@ def create_block(b): return np.timedelta64(int(obj[u'data'])) # elif typ == 'sparse_series': # dtype = dtype_for(obj['dtype']) - # return globals()[obj['klass']]( + # return SparseSeries( # unconvert(obj['sp_values'], dtype, obj['compress']), # sparse_index=obj['sp_index'], index=obj['index'], # fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name']) # elif typ == 'sparse_dataframe': - # return globals()[obj['klass']]( + # return SparseDataFrame( # obj['data'], columns=obj['columns'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind'] # ) # elif typ == 'sparse_panel': - # return globals()[obj['klass']]( + # return SparsePanel( # obj['data'], items=obj['items'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind'])