diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 83c6e0e206191..d20dce5cfb51f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -284,7 +284,7 @@ Groupby/Resample/Rolling Sparse ^^^^^^ -- Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16777`) +- Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`) Reshaping diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 1eb2b98a7d7cc..8e22dd38030ee 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -1,5 +1,6 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +from collections import OrderedDict import pytest @@ -20,6 +21,15 @@ class SharedWithSparse(object): + """ + A collection of tests Series and SparseSeries can share. + + In generic tests on this class, use ``self._assert_series_equal()`` + which is implemented in sub-classes. + """ + def _assert_series_equal(self, left, right): + """Dispatch to series class dependent assertion""" + raise NotImplementedError def test_scalarop_preserve_name(self): result = self.ts * 2 @@ -117,9 +127,81 @@ def test_to_sparse_pass_name(self): result = self.ts.to_sparse() assert result.name == self.ts.name + def test_constructor_dict(self): + d = {'a': 0., 'b': 1., 'c': 2.} + result = self.series_klass(d) + expected = self.series_klass(d, index=sorted(d.keys())) + self._assert_series_equal(result, expected) + + result = self.series_klass(d, index=['b', 'c', 'd', 'a']) + expected = self.series_klass([1, 2, np.nan, 0], + index=['b', 'c', 'd', 'a']) + self._assert_series_equal(result, expected) + + def test_constructor_subclass_dict(self): + data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) + series = self.series_klass(data) + expected = self.series_klass(dict(compat.iteritems(data))) + self._assert_series_equal(series, expected) + + def test_constructor_ordereddict(self): + # GH3283 + data = OrderedDict( + ('col%s' % i, np.random.random()) for i in range(12)) + + series = self.series_klass(data) + expected = self.series_klass(list(data.values()), list(data.keys())) + self._assert_series_equal(series, expected) + + # Test with subclass + class A(OrderedDict): + pass + + series = self.series_klass(A(data)) + self._assert_series_equal(series, expected) + + def test_constructor_dict_multiindex(self): + d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.} + _d = sorted(d.items()) + result = self.series_klass(d) + expected = self.series_klass( + [x[1] for x in _d], + index=pd.MultiIndex.from_tuples([x[0] for x in _d])) + self._assert_series_equal(result, expected) + + d['z'] = 111. + _d.insert(0, ('z', d['z'])) + result = self.series_klass(d) + expected = self.series_klass([x[1] for x in _d], + index=pd.Index([x[0] for x in _d], + tupleize_cols=False)) + result = result.reindex(index=expected.index) + self._assert_series_equal(result, expected) + + def test_constructor_dict_timedelta_index(self): + # GH #12169 : Resample category data with timedelta index + # construct Series from dict as data and TimedeltaIndex as index + # will result NaN in result Series data + expected = self.series_klass( + data=['A', 'B', 'C'], + index=pd.to_timedelta([0, 10, 20], unit='s') + ) + + result = self.series_klass( + data={pd.to_timedelta(0, unit='s'): 'A', + pd.to_timedelta(10, unit='s'): 'B', + pd.to_timedelta(20, unit='s'): 'C'}, + index=pd.to_timedelta([0, 10, 20], unit='s') + ) + self._assert_series_equal(result, expected) + class TestSeriesMisc(TestData, SharedWithSparse): + series_klass = Series + # SharedWithSparse tests use generic, series_klass-agnostic assertion + _assert_series_equal = staticmethod(tm.assert_series_equal) + def test_tab_completion(self): # GH 9910 s = Series(list('abcd')) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d591aa4f567a9..a916c42c007f9 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -20,8 +20,7 @@ from pandas._libs import lib from pandas._libs.tslib import iNaT -from pandas.compat import lrange, range, zip, OrderedDict, long -from pandas import compat +from pandas.compat import lrange, range, zip, long from pandas.util.testing import assert_series_equal import pandas.util.testing as tm @@ -605,48 +604,6 @@ def test_constructor_dict(self): expected.iloc[1] = 1 assert_series_equal(result, expected) - def test_constructor_dict_multiindex(self): - check = lambda result, expected: tm.assert_series_equal( - result, expected, check_dtype=True, check_series_type=True) - d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.} - _d = sorted(d.items()) - ser = Series(d) - expected = Series([x[1] for x in _d], - index=MultiIndex.from_tuples([x[0] for x in _d])) - check(ser, expected) - - d['z'] = 111. - _d.insert(0, ('z', d['z'])) - ser = Series(d) - expected = Series([x[1] for x in _d], index=Index( - [x[0] for x in _d], tupleize_cols=False)) - ser = ser.reindex(index=expected.index) - check(ser, expected) - - def test_constructor_dict_timedelta_index(self): - # GH #12169 : Resample category data with timedelta index - # construct Series from dict as data and TimedeltaIndex as index - # will result NaN in result Series data - expected = Series( - data=['A', 'B', 'C'], - index=pd.to_timedelta([0, 10, 20], unit='s') - ) - - result = Series( - data={pd.to_timedelta(0, unit='s'): 'A', - pd.to_timedelta(10, unit='s'): 'B', - pd.to_timedelta(20, unit='s'): 'C'}, - index=pd.to_timedelta([0, 10, 20], unit='s') - ) - # this should work - assert_series_equal(result, expected) - - def test_constructor_subclass_dict(self): - data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) - series = Series(data) - refseries = Series(dict(compat.iteritems(data))) - assert_series_equal(refseries, series) - def test_constructor_dict_datetime64_index(self): # GH 9456 @@ -670,26 +627,6 @@ def create_data(constructor): assert_series_equal(result_datetime, expected) assert_series_equal(result_Timestamp, expected) - def test_orderedDict_ctor(self): - # GH3283 - import pandas - import random - data = OrderedDict([('col%s' % i, random.random()) for i in range(12)]) - s = pandas.Series(data) - assert all(s.values == list(data.values())) - - def test_orderedDict_subclass_ctor(self): - # GH3283 - import pandas - import random - - class A(OrderedDict): - pass - - data = A([('col%s' % i, random.random()) for i in range(12)]) - s = pandas.Series(data) - assert all(s.values == list(data.values())) - def test_constructor_list_of_tuples(self): data = [(1, 1), (2, 2), (2, 3)] s = Series(data) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index a5d514644a8f1..336b8f30716cd 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1002,12 +1002,14 @@ def _check(frame, orig): shifted = frame.shift(2, freq='B') exp = orig.shift(2, freq='B') - exp = exp.to_sparse(frame.default_fill_value) + exp = exp.to_sparse(frame.default_fill_value, + kind=frame.default_kind) tm.assert_frame_equal(shifted, exp) shifted = frame.shift(2, freq=BDay()) exp = orig.shift(2, freq=BDay()) - exp = exp.to_sparse(frame.default_fill_value) + exp = exp.to_sparse(frame.default_fill_value, + kind=frame.default_kind) tm.assert_frame_equal(shifted, exp) self._check_all(_check) diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index bb56f8a51897a..a7685abd5ba4d 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -1,6 +1,8 @@ # pylint: disable-msg=E1101,W0612 import operator +from datetime import datetime + import pytest from numpy import nan @@ -58,6 +60,10 @@ def _test_data2_zero(): class TestSparseSeries(SharedWithSparse): + series_klass = SparseSeries + # SharedWithSparse tests use generic, series_klass-agnostic assertion + _assert_series_equal = staticmethod(tm.assert_sp_series_equal) + def setup_method(self, method): arr, index = _test_data1() @@ -1379,3 +1385,18 @@ def test_numpy_func_call(self): for func in funcs: for series in ('bseries', 'zbseries'): getattr(np, func)(getattr(self, series)) + + +@pytest.mark.parametrize( + 'datetime_type', (np.datetime64, + pd.Timestamp, + lambda x: datetime.strptime(x, '%Y-%m-%d'))) +def test_constructor_dict_datetime64_index(datetime_type): + # GH 9456 + dates = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15'] + values = [42544017.198965244, 1234565, 40512335.181958228, -1] + + result = SparseSeries(dict(zip(map(datetime_type, dates), values))) + expected = SparseSeries(values, map(pd.Timestamp, dates)) + + tm.assert_sp_series_equal(result, expected)