From 275802275c345568133d548658d6a48416b6b0cc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 27 Nov 2017 06:43:06 -0500 Subject: [PATCH] API: empty map should not infer closes #18509 --- doc/source/whatsnew/v0.22.0.txt | 2 +- pandas/core/indexes/base.py | 18 +----------------- pandas/tests/indexes/common.py | 17 ++++++++++++++--- pandas/tests/indexes/datetimelike.py | 3 ++- pandas/tests/indexes/test_base.py | 4 +--- pandas/tests/series/test_apply.py | 8 ++++++++ pandas/tests/test_resample.py | 24 +++++++++++++----------- pandas/util/testing.py | 3 ++- 8 files changed, 42 insertions(+), 37 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 1a08a1353a605..09b504cac5ed4 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -107,7 +107,7 @@ Other API Changes - :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) - `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`) - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) -- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`). +- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`). - :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) - :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) - Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2bf3afe47d007..94e9947155c41 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2897,25 +2897,9 @@ def map(self, mapper, na_action=None): names=names) attributes['copy'] = False - - # we want to try to return our original dtype - # ints infer to integer, but if we have - # uints, would prefer to return these - if is_unsigned_integer_dtype(self.dtype): - inferred = lib.infer_dtype(new_values) - if inferred == 'integer': - attributes['dtype'] = self.dtype - - elif not new_values.size: + if not new_values.size: # empty attributes['dtype'] = self.dtype - elif isna(new_values).all(): - # all nan - inferred = lib.infer_dtype(self) - if inferred in ['datetime', 'datetime64', - 'timedelta', 'timedelta64', - 'period']: - new_values = [libts.NaT] * len(new_values) return Index(new_values, **attributes) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 99bdaf02e25ff..c1ee18526cc01 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -1009,7 +1009,13 @@ def test_searchsorted_monotonic(self, indices): def test_map(self): # callable index = self.create_index() - expected = index + + # we don't infer UInt64 + if isinstance(index, pd.UInt64Index): + expected = index.astype('int64') + else: + expected = index + result = index.map(lambda x: x) tm.assert_index_equal(result, expected) @@ -1024,9 +1030,14 @@ def test_map_dictlike(self, mapper): if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)): pytest.skip("skipping tests for {}".format(type(index))) - expected = index - identity = mapper(index.values, index) + + # we don't infer to UInt64 for a dict + if isinstance(index, pd.UInt64Index) and isinstance(identity, dict): + expected = index.astype('int64') + else: + expected = index + result = index.map(identity) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index a01c60a47c0f9..ad76d17c93c41 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -1,5 +1,6 @@ """ generic datetimelike tests """ import pytest +import numpy as np import pandas as pd from .common import Base import pandas.util.testing as tm @@ -72,6 +73,6 @@ def test_map_dictlike(self, mapper): # empty map; these map to np.nan because we cannot know # to re-infer things - expected = pd.Index([pd.NaT] * len(self.index)) + expected = pd.Index([np.nan] * len(self.index)) result = self.index.map(mapper([], [])) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 0b782e600822a..9ef7a43b2193a 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -885,9 +885,7 @@ def test_map_dictlike(self, mapper): expected = Index(np.arange(len(index), 0, -1)) # to match proper result coercion for uints - if name == 'uintIndex': - expected = expected.astype('uint64') - elif name == 'empty': + if name == 'empty': expected = Index([]) result = index.map(mapper(expected, index)) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index cafe6a34720be..8899ab585d6cb 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -377,6 +377,14 @@ def test_map(self): exp = Series([np.nan, 'B', 'C', 'D']) tm.assert_series_equal(a.map(c), exp) + @pytest.mark.parametrize("index", tm.all_index_generator(10)) + def test_map_empty(self, index): + s = Series(index) + result = s.map({}) + + expected = pd.Series(np.nan, index=s.index) + tm.assert_series_equal(result, expected) + def test_map_compat(self): # related GH 8024 s = Series([True, True, False], index=[1, 2, 3]) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index b0154f6db7022..29dd99ac9c655 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -816,21 +816,23 @@ def test_resample_empty_dataframe(self): # test size for GH13212 (currently stays as df) - def test_resample_empty_dtypes(self): + @pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0)) + @pytest.mark.parametrize( + "dtype", + [np.float, np.int, np.object, 'datetime64[ns]']) + def test_resample_empty_dtypes(self, index, dtype): # Empty series were sometimes causing a segfault (for the functions # with Cython bounds-checking disabled) or an IndexError. We just run # them to ensure they no longer do. (GH #10228) - for index in tm.all_timeseries_index_generator(0): - for dtype in (np.float, np.int, np.object, 'datetime64[ns]'): - for how in downsample_methods + upsample_methods: - empty_series = Series([], index, dtype) - try: - getattr(empty_series.resample('d'), how)() - except DataError: - # Ignore these since some combinations are invalid - # (ex: doing mean with dtype of np.object) - pass + for how in downsample_methods + upsample_methods: + empty_series = Series([], index, dtype) + try: + getattr(empty_series.resample('d'), how)() + except DataError: + # Ignore these since some combinations are invalid + # (ex: doing mean with dtype of np.object) + pass def test_resample_loffset_arg_type(self): # GH 13218, 15002 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index ff6fa8ae717d3..850c42a011958 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1695,7 +1695,8 @@ def all_index_generator(k=10): """ all_make_index_funcs = [makeIntIndex, makeFloatIndex, makeStringIndex, makeUnicodeIndex, makeDateIndex, makePeriodIndex, - makeTimedeltaIndex, makeBoolIndex, + makeTimedeltaIndex, makeBoolIndex, makeRangeIndex, + makeIntervalIndex, makeCategoricalIndex] for make_index_func in all_make_index_funcs: yield make_index_func(k=k)