Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: empty map should not infer #18517

Merged
merged 1 commit into from
Dec 2, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ Other API Changes
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`).
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`).
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)
Expand Down
18 changes: 1 addition & 17 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2897,25 +2897,9 @@ def map(self, mapper, na_action=None):
names=names)

attributes['copy'] = False

# we want to try to return our original dtype
# ints infer to integer, but if we have
# uints, would prefer to return these
if is_unsigned_integer_dtype(self.dtype):
inferred = lib.infer_dtype(new_values)
if inferred == 'integer':
attributes['dtype'] = self.dtype

elif not new_values.size:
if not new_values.size:
# empty
attributes['dtype'] = self.dtype
elif isna(new_values).all():
# all nan
inferred = lib.infer_dtype(self)
if inferred in ['datetime', 'datetime64',
'timedelta', 'timedelta64',
'period']:
new_values = [libts.NaT] * len(new_values)

return Index(new_values, **attributes)

Expand Down
17 changes: 14 additions & 3 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,13 @@ def test_searchsorted_monotonic(self, indices):
def test_map(self):
# callable
index = self.create_index()
expected = index

# we don't infer UInt64
if isinstance(index, pd.UInt64Index):
expected = index.astype('int64')
else:
expected = index

result = index.map(lambda x: x)
tm.assert_index_equal(result, expected)

Expand All @@ -1024,9 +1030,14 @@ def test_map_dictlike(self, mapper):
if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)):
pytest.skip("skipping tests for {}".format(type(index)))

expected = index

identity = mapper(index.values, index)

# we don't infer to UInt64 for a dict
if isinstance(index, pd.UInt64Index) and isinstance(identity, dict):
expected = index.astype('int64')
else:
expected = index

result = index.map(identity)
tm.assert_index_equal(result, expected)

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/datetimelike.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" generic datetimelike tests """
import pytest
import numpy as np
import pandas as pd
from .common import Base
import pandas.util.testing as tm
Expand Down Expand Up @@ -72,6 +73,6 @@ def test_map_dictlike(self, mapper):

# empty map; these map to np.nan because we cannot know
# to re-infer things
expected = pd.Index([pd.NaT] * len(self.index))
expected = pd.Index([np.nan] * len(self.index))
result = self.index.map(mapper([], []))
tm.assert_index_equal(result, expected)
4 changes: 1 addition & 3 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -885,9 +885,7 @@ def test_map_dictlike(self, mapper):
expected = Index(np.arange(len(index), 0, -1))

# to match proper result coercion for uints
if name == 'uintIndex':
expected = expected.astype('uint64')
elif name == 'empty':
if name == 'empty':
expected = Index([])

result = index.map(mapper(expected, index))
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/series/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,14 @@ def test_map(self):
exp = Series([np.nan, 'B', 'C', 'D'])
tm.assert_series_equal(a.map(c), exp)

@pytest.mark.parametrize("index", tm.all_index_generator(10))
def test_map_empty(self, index):
s = Series(index)
result = s.map({})

expected = pd.Series(np.nan, index=s.index)
tm.assert_series_equal(result, expected)

def test_map_compat(self):
# related GH 8024
s = Series([True, True, False], index=[1, 2, 3])
Expand Down
24 changes: 13 additions & 11 deletions pandas/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,21 +816,23 @@ def test_resample_empty_dataframe(self):

# test size for GH13212 (currently stays as df)

def test_resample_empty_dtypes(self):
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
@pytest.mark.parametrize(
"dtype",
[np.float, np.int, np.object, 'datetime64[ns]'])
def test_resample_empty_dtypes(self, index, dtype):

# Empty series were sometimes causing a segfault (for the functions
# with Cython bounds-checking disabled) or an IndexError. We just run
# them to ensure they no longer do. (GH #10228)
for index in tm.all_timeseries_index_generator(0):
for dtype in (np.float, np.int, np.object, 'datetime64[ns]'):
for how in downsample_methods + upsample_methods:
empty_series = Series([], index, dtype)
try:
getattr(empty_series.resample('d'), how)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object)
pass
for how in downsample_methods + upsample_methods:
empty_series = Series([], index, dtype)
try:
getattr(empty_series.resample('d'), how)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object)
pass

def test_resample_loffset_arg_type(self):
# GH 13218, 15002
Expand Down
3 changes: 2 additions & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1695,7 +1695,8 @@ def all_index_generator(k=10):
"""
all_make_index_funcs = [makeIntIndex, makeFloatIndex, makeStringIndex,
makeUnicodeIndex, makeDateIndex, makePeriodIndex,
makeTimedeltaIndex, makeBoolIndex,
makeTimedeltaIndex, makeBoolIndex, makeRangeIndex,
makeIntervalIndex,
makeCategoricalIndex]
for make_index_func in all_make_index_funcs:
yield make_index_func(k=k)
Expand Down