Skip to content

Commit

Permalink
API: empty map should not infer
Browse files Browse the repository at this point in the history
closes #18509
  • Loading branch information
jreback committed Nov 27, 2017
1 parent 262e8ff commit c4fdd71
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 37 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ Other API Changes
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`).
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`).
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)
Expand Down
18 changes: 1 addition & 17 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2925,25 +2925,9 @@ def map(self, mapper, na_action=None):
names=names)

attributes['copy'] = False

# we want to try to return our original dtype
# ints infer to integer, but if we have
# uints, would prefer to return these
if is_unsigned_integer_dtype(self.dtype):
inferred = lib.infer_dtype(new_values)
if inferred == 'integer':
attributes['dtype'] = self.dtype

elif not new_values.size:
if not new_values.size:
# empty
attributes['dtype'] = self.dtype
elif isna(new_values).all():
# all nan
inferred = lib.infer_dtype(self)
if inferred in ['datetime', 'datetime64',
'timedelta', 'timedelta64',
'period']:
new_values = [libts.NaT] * len(new_values)

return Index(new_values, **attributes)

Expand Down
17 changes: 14 additions & 3 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,13 @@ def test_searchsorted_monotonic(self, indices):
def test_map(self):
# callable
index = self.create_index()
expected = index

# we don't infer UInt64
if isinstance(index, pd.UInt64Index):
expected = index.astype('int64')
else:
expected = index

result = index.map(lambda x: x)
tm.assert_index_equal(result, expected)

Expand All @@ -1024,9 +1030,14 @@ def test_map_dictlike(self, mapper):
if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)):
pytest.skip("skipping tests for {}".format(type(index)))

expected = index

identity = mapper(index.values, index)

# we don't infer to UInt64 for a dict
if isinstance(index, pd.UInt64Index) and isinstance(identity, dict):
expected = index.astype('int64')
else:
expected = index

result = index.map(identity)
tm.assert_index_equal(result, expected)

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/datetimelike.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" generic datetimelike tests """
import pytest
import numpy as np
import pandas as pd
from .common import Base
import pandas.util.testing as tm
Expand Down Expand Up @@ -72,6 +73,6 @@ def test_map_dictlike(self, mapper):

# empty map; these map to np.nan because we cannot know
# to re-infer things
expected = pd.Index([pd.NaT] * len(self.index))
expected = pd.Index([np.nan] * len(self.index))
result = self.index.map(mapper([], []))
tm.assert_index_equal(result, expected)
4 changes: 1 addition & 3 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,9 +875,7 @@ def test_map_dictlike(self, mapper):
expected = Index(np.arange(len(index), 0, -1))

# to match proper result coercion for uints
if name == 'uintIndex':
expected = expected.astype('uint64')
elif name == 'empty':
if name == 'empty':
expected = Index([])

result = index.map(mapper(expected, index))
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/series/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,14 @@ def test_map(self):
exp = Series([np.nan, 'B', 'C', 'D'])
tm.assert_series_equal(a.map(c), exp)

@pytest.mark.parametrize("index", tm.all_index_generator(10))
def test_map_empty(self, index):
s = Series(index)
result = s.map({})

expected = pd.Series(np.nan, index=s.index)
tm.assert_series_equal(result, expected)

def test_map_compat(self):
# related GH 8024
s = Series([True, True, False], index=[1, 2, 3])
Expand Down
24 changes: 13 additions & 11 deletions pandas/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,21 +816,23 @@ def test_resample_empty_dataframe(self):

# test size for GH13212 (currently stays as df)

def test_resample_empty_dtypes(self):
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
@pytest.mark.parametrize(
"dtype",
[np.float, np.int, np.object, 'datetime64[ns]'])
def test_resample_empty_dtypes(self, index, dtype):

# Empty series were sometimes causing a segfault (for the functions
# with Cython bounds-checking disabled) or an IndexError. We just run
# them to ensure they no longer do. (GH #10228)
for index in tm.all_timeseries_index_generator(0):
for dtype in (np.float, np.int, np.object, 'datetime64[ns]'):
for how in downsample_methods + upsample_methods:
empty_series = Series([], index, dtype)
try:
getattr(empty_series.resample('d'), how)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object)
pass
for how in downsample_methods + upsample_methods:
empty_series = Series([], index, dtype)
try:
getattr(empty_series.resample('d'), how)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object)
pass

def test_resample_loffset_arg_type(self):
# GH 13218, 15002
Expand Down
3 changes: 2 additions & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1702,7 +1702,8 @@ def all_index_generator(k=10):
"""
all_make_index_funcs = [makeIntIndex, makeFloatIndex, makeStringIndex,
makeUnicodeIndex, makeDateIndex, makePeriodIndex,
makeTimedeltaIndex, makeBoolIndex,
makeTimedeltaIndex, makeBoolIndex, makeRangeIndex,
makeIntervalIndex,
makeCategoricalIndex]
for make_index_func in all_make_index_funcs:
yield make_index_func(k=k)
Expand Down

0 comments on commit c4fdd71

Please sign in to comment.