From d6730084ad9ff2c5050041025e114b78ec245adb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 28 Jun 2019 14:22:19 -0500 Subject: [PATCH 01/21] un-xfail tests, xfail instead of skip, minor cleanup --- pandas/tests/computation/test_eval.py | 8 ++++---- pandas/tests/frame/test_analytics.py | 7 +++---- pandas/tests/groupby/test_categorical.py | 2 +- pandas/tests/indexes/datetimes/test_construction.py | 2 -- pandas/tests/indexes/datetimes/test_tools.py | 4 +++- pandas/tests/io/json/test_pandas.py | 1 + pandas/tests/scalar/period/test_period.py | 2 +- pandas/tests/series/test_arithmetic.py | 10 ++-------- 8 files changed, 15 insertions(+), 21 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index ca78e2e40ec74..1562cc300e020 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -366,7 +366,7 @@ def check_single_invert_op(self, lhs, cmp1, rhs): parser=self.parser)) def check_compound_invert_op(self, lhs, cmp1, rhs): - skip_these = 'in', 'not in' + skip_these = ['in', 'not in'] ex = '~(lhs {0} rhs)'.format(cmp1) msg = (r"only list-like( or dict-like)? objects are allowed to be" @@ -1713,7 +1713,7 @@ def test_result_types(self): def test_result_types2(self): # xref https://github.com/pandas-dev/pandas/issues/12293 - pytest.skip("unreliable tests on complex128") + # at one point this was unreliable on compled128 # Did not test complex64 because DataFrame is converting it to # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952 @@ -1793,14 +1793,14 @@ def test_no_new_globals(self, engine, parser): @td.skip_if_no_ne def test_invalid_engine(): - msg = 'Invalid engine \'asdf\' passed' + msg = "Invalid engine 'asdf' passed" with pytest.raises(KeyError, match=msg): pd.eval('x + y', local_dict={'x': 1, 'y': 2}, engine='asdf') @td.skip_if_no_ne def test_invalid_parser(): - msg = 'Invalid parser \'asdf\' passed' + msg = "Invalid parser 'asdf' passed" with pytest.raises(KeyError, match=msg): pd.eval('x + y', local_dict={'x': 1, 'y': 2}, parser='asdf') diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 01a398584b5e1..ded5312286302 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1585,10 +1585,9 @@ def test_any_all_bool_only(self): (np.all, {'A': pd.Series([1, 2], dtype='category')}, True), (np.any, {'A': pd.Series([1, 2], dtype='category')}, True), - # # Mix - # GH 21484 - # (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'), - # 'B': pd.Series([10, 20], dtype='m8[ns]')}, True), + # Mix GH#21484 + (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'), + 'B': pd.Series([10, 20], dtype='m8[ns]')}, True), ]) def test_any_all_np_func(self, func, data, expected): # GH 19976 diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 58a43dc218d33..f5c58019c433c 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -207,7 +207,7 @@ def test_level_get_group(observed): assert_frame_equal(result, expected) -@pytest.mark.xfail(PY37, reason="flaky on 3.7, xref gh-21636", strict=False) +# GH#21636 previously flaky on py37 @pytest.mark.parametrize('ordered', [True, False]) def test_apply(ordered): # GH 10138 diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 56dfbfd485eb1..e0238e639c071 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -608,8 +608,6 @@ def test_constructor_with_int_tz(self, klass, box, tz, dtype): expected = klass([ts]) assert result == expected - # This is the desired future behavior - @pytest.mark.xfail(reason="Future behavior", strict=False) @pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning") def test_construction_int_rountrip(self, tz_naive_fixture): # GH 12619 diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a971a1088860a..1c517d7cc9230 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -616,7 +616,9 @@ def test_to_datetime_tz_psycopg2(self, cache): @pytest.mark.parametrize( 'cache', [pytest.param(True, - marks=pytest.mark.skipif(True, reason="GH 18111")), + marks=pytest.mark.xfail( + reason="GH#18111 pd.unique treats 0 and False " + "as equivalent")), False]) def test_datetime_bool(self, cache): # GH13176 diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index a935a731ccba6..36c3e7b9d1fbd 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1159,6 +1159,7 @@ def test_to_jsonl(self): assert result == expected assert_frame_equal(pd.read_json(result, lines=True), df) + # TODO: there is a near-identical test for pytables; can we share? def test_latin_encoding(self): # GH 13774 pytest.skip("encoding not implemented in .to_json(), " diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 2a765086af403..5f14094162a93 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -1490,7 +1490,7 @@ def test_period_immutable(): # TODO: This doesn't fail on all systems; track down which @pytest.mark.xfail(reason="Parses as Jan 1, 0007 on some systems", - strict=False) + strict=True) def test_small_year_parsing(): per1 = Period('0001-01-07', 'D') assert per1.year == 1 diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 43fcddea3d964..af6754b3b288f 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -25,7 +25,7 @@ class TestSeriesFlexArithmetic: True) ]) @pytest.mark.parametrize('opname', ['add', 'sub', 'mul', 'floordiv', - 'truediv', 'div', 'pow']) + 'truediv', 'pow']) def test_flex_method_equivalence(self, opname, ts): # check that Series.{opname} behaves like Series.__{opname}__, tser = tm.makeTimeSeries().rename('ts') @@ -34,15 +34,9 @@ def test_flex_method_equivalence(self, opname, ts): other = ts[1](tser) check_reverse = ts[2] - if opname == 'div': - pytest.skip('div test only for Py3') - op = getattr(Series, opname) - if op == 'div': - alt = operator.truediv - else: - alt = getattr(operator, opname) + alt = getattr(operator, opname) result = op(series, other) expected = alt(series, other) From bed55630fa4c12c52bf96d1c21bf8a080b19f364 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 30 Jun 2019 09:10:32 -0500 Subject: [PATCH 02/21] REF: derive __len__ from shape instead of vice-versa --- pandas/core/arrays/base.py | 13 ++++++++--- pandas/core/arrays/categorical.py | 22 +++---------------- pandas/core/arrays/datetimelike.py | 8 ++----- pandas/core/arrays/integer.py | 5 +++-- pandas/core/arrays/interval.py | 10 +++------ pandas/core/arrays/numpy_.py | 5 +++-- pandas/core/arrays/sparse.py | 5 +++-- pandas/tests/computation/test_eval.py | 8 +++---- pandas/tests/extension/arrow/bool.py | 5 +++-- pandas/tests/extension/arrow/test_bool.py | 6 ++++- pandas/tests/extension/decimal/array.py | 5 +++-- pandas/tests/extension/json/array.py | 5 +++-- pandas/tests/frame/test_analytics.py | 7 +++--- pandas/tests/frame/test_indexing.py | 5 +++-- pandas/tests/groupby/test_categorical.py | 2 +- .../indexes/datetimes/test_construction.py | 2 ++ pandas/tests/indexes/datetimes/test_tools.py | 4 +--- pandas/tests/io/json/test_pandas.py | 1 - pandas/tests/scalar/period/test_period.py | 2 +- pandas/tests/series/test_arithmetic.py | 10 +++++++-- 20 files changed, 65 insertions(+), 65 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6340cc732d6c1..b883de4fd8a56 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -272,7 +272,7 @@ def __len__(self) -> int: ------- length : int """ - raise AbstractMethodError(self) + return self.shape[0] def __iter__(self): """ @@ -300,14 +300,21 @@ def shape(self) -> Tuple[int, ...]: """ Return a tuple of the array dimensions. """ - return (len(self),) + raise AbstractMethodError(self) @property def ndim(self) -> int: """ Extension Arrays are only allowed to be 1-dimensional. """ - return 1 + return len(self.shape) + + @property + def size(self) -> int: + """ + The number of elements in this array. + """ + return np.prod(self.shape) @property def nbytes(self) -> int: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a1d591458fba3..36c20d413c060 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -489,20 +489,6 @@ def astype(self, dtype, copy=True): return self._set_dtype(dtype) return np.array(self, dtype=dtype, copy=copy) - @cache_readonly - def ndim(self): - """ - Number of dimensions of the Categorical - """ - return self._codes.ndim - - @cache_readonly - def size(self): - """ - return the len of myself - """ - return len(self) - @cache_readonly def itemsize(self): """ @@ -1927,11 +1913,9 @@ def _slice(self, slicer): codes = self._codes[slicer] return self._constructor(values=codes, dtype=self.dtype, fastpath=True) - def __len__(self): - """ - The length of this Categorical. - """ - return len(self._codes) + @property + def shape(self): + return self._codes.shape def __iter__(self): """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 93166759d8dbd..982bc55256f80 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -402,12 +402,8 @@ def __array__(self, dtype=None): return self._data @property - def size(self) -> int: - """The number of elements in this array.""" - return np.prod(self.shape) - - def __len__(self): - return len(self._data) + def shape(self): + return self._data.shape def __getitem__(self, key): """ diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 88de497a3329f..0d3f04022f9b8 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -393,8 +393,9 @@ def __setitem__(self, key, value): self._data[key] = value self._mask[key] = mask - def __len__(self): - return len(self._data) + @property + def shape(self): + return self._data.shape @property def nbytes(self): diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index aaa4124182598..952919d3a10c7 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -466,8 +466,9 @@ def _validate(self): def __iter__(self): return iter(np.asarray(self)) - def __len__(self): - return len(self.left) + @property + def shape(self): + return self.left.shape def __getitem__(self, value): left = self.left[value] @@ -701,11 +702,6 @@ def isna(self): def nbytes(self): return self.left.nbytes + self.right.nbytes - @property - def size(self): - # Avoid materializing self.values - return self.left.size - @property def shape(self): return self.left.shape diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 1c5dc7666c3a1..9faf04029471b 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -240,8 +240,9 @@ def __setitem__(self, key, value): else: self._ndarray[key] = value - def __len__(self): - return len(self._ndarray) + @property + def shape(self): + return self._ndarray.shape @property def nbytes(self): diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 3512d4e9e29db..564fd61041e01 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -849,8 +849,9 @@ def _valid_sp_values(self): mask = notna(sp_vals) return sp_vals[mask] - def __len__(self): - return self.sp_index.length + @property + def shape(self): + return (self.sp_index.length,) @property def _null_fill_value(self): diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 1562cc300e020..ca78e2e40ec74 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -366,7 +366,7 @@ def check_single_invert_op(self, lhs, cmp1, rhs): parser=self.parser)) def check_compound_invert_op(self, lhs, cmp1, rhs): - skip_these = ['in', 'not in'] + skip_these = 'in', 'not in' ex = '~(lhs {0} rhs)'.format(cmp1) msg = (r"only list-like( or dict-like)? objects are allowed to be" @@ -1713,7 +1713,7 @@ def test_result_types(self): def test_result_types2(self): # xref https://github.com/pandas-dev/pandas/issues/12293 - # at one point this was unreliable on compled128 + pytest.skip("unreliable tests on complex128") # Did not test complex64 because DataFrame is converting it to # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952 @@ -1793,14 +1793,14 @@ def test_no_new_globals(self, engine, parser): @td.skip_if_no_ne def test_invalid_engine(): - msg = "Invalid engine 'asdf' passed" + msg = 'Invalid engine \'asdf\' passed' with pytest.raises(KeyError, match=msg): pd.eval('x + y', local_dict={'x': 1, 'y': 2}, engine='asdf') @td.skip_if_no_ne def test_invalid_parser(): - msg = "Invalid parser 'asdf' passed" + msg = 'Invalid parser \'asdf\' passed' with pytest.raises(KeyError, match=msg): pd.eval('x + y', local_dict={'x': 1, 'y': 2}, parser='asdf') diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/bool.py index 0d6396033fac7..50e2d78f006cd 100644 --- a/pandas/tests/extension/arrow/bool.py +++ b/pandas/tests/extension/arrow/bool.py @@ -73,8 +73,9 @@ def __getitem__(self, item): vals = self._data.to_pandas()[item] return type(self).from_scalars(vals) - def __len__(self): - return len(self._data) + @property + def shape(self): + return (len(self._data),) def astype(self, dtype, copy=True): # needed to fix this astype for the Series constructor. diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index 21ce5e999334e..e529cef68d8c5 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -15,13 +15,17 @@ def dtype(): return ArrowBoolDtype() -@pytest.fixture def data(): values = np.random.randint(0, 2, size=100, dtype=bool) values[1] = ~values[0] return ArrowBoolArray.from_scalars(values) +@pytest.fixture(name="data") +def data_fixture(): + return data() + + @pytest.fixture def data_missing(): return ArrowBoolArray.from_scalars([None, True]) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 2b1bb53e962be..70926365d8a18 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -118,8 +118,9 @@ def __setitem__(self, key, value): value = decimal.Decimal(value) self._data[key] = value - def __len__(self): - return len(self._data) + @property + def shape(self): + return self._data.shape @property def nbytes(self): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 1b5009830303b..85d3f95164cd4 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -105,8 +105,9 @@ def __setitem__(self, key, value): assert isinstance(v, self.dtype.type) self.data[k] = v - def __len__(self): - return len(self.data) + @property + def shape(self): + return (len(self.data),) @property def nbytes(self): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index ded5312286302..01a398584b5e1 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1585,9 +1585,10 @@ def test_any_all_bool_only(self): (np.all, {'A': pd.Series([1, 2], dtype='category')}, True), (np.any, {'A': pd.Series([1, 2], dtype='category')}, True), - # Mix GH#21484 - (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'), - 'B': pd.Series([10, 20], dtype='m8[ns]')}, True), + # # Mix + # GH 21484 + # (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'), + # 'B': pd.Series([10, 20], dtype='m8[ns]')}, True), ]) def test_any_all_np_func(self, func, data, expected): # GH 19976 diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 3b8daa28227f8..4c1abfb1a7f6f 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -76,8 +76,9 @@ def test_get_none(self, df): # see gh-5652 assert df.get(None) is None - def test_loc_iterable(self, float_frame): - idx = iter(['A', 'B', 'C']) + @pytest.mark.parametrize('key_type', [iter, np.array, Series, Index]) + def test_loc_iterable(self, float_frame, key_type): + idx = key_type(['A', 'B', 'C']) result = float_frame.loc[:, idx] expected = float_frame.loc[:, ['A', 'B', 'C']] assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index f5c58019c433c..58a43dc218d33 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -207,7 +207,7 @@ def test_level_get_group(observed): assert_frame_equal(result, expected) -# GH#21636 previously flaky on py37 +@pytest.mark.xfail(PY37, reason="flaky on 3.7, xref gh-21636", strict=False) @pytest.mark.parametrize('ordered', [True, False]) def test_apply(ordered): # GH 10138 diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index e0238e639c071..56dfbfd485eb1 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -608,6 +608,8 @@ def test_constructor_with_int_tz(self, klass, box, tz, dtype): expected = klass([ts]) assert result == expected + # This is the desired future behavior + @pytest.mark.xfail(reason="Future behavior", strict=False) @pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning") def test_construction_int_rountrip(self, tz_naive_fixture): # GH 12619 diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 1c517d7cc9230..a971a1088860a 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -616,9 +616,7 @@ def test_to_datetime_tz_psycopg2(self, cache): @pytest.mark.parametrize( 'cache', [pytest.param(True, - marks=pytest.mark.xfail( - reason="GH#18111 pd.unique treats 0 and False " - "as equivalent")), + marks=pytest.mark.skipif(True, reason="GH 18111")), False]) def test_datetime_bool(self, cache): # GH13176 diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 36c3e7b9d1fbd..a935a731ccba6 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1159,7 +1159,6 @@ def test_to_jsonl(self): assert result == expected assert_frame_equal(pd.read_json(result, lines=True), df) - # TODO: there is a near-identical test for pytables; can we share? def test_latin_encoding(self): # GH 13774 pytest.skip("encoding not implemented in .to_json(), " diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 5f14094162a93..2a765086af403 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -1490,7 +1490,7 @@ def test_period_immutable(): # TODO: This doesn't fail on all systems; track down which @pytest.mark.xfail(reason="Parses as Jan 1, 0007 on some systems", - strict=True) + strict=False) def test_small_year_parsing(): per1 = Period('0001-01-07', 'D') assert per1.year == 1 diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index af6754b3b288f..43fcddea3d964 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -25,7 +25,7 @@ class TestSeriesFlexArithmetic: True) ]) @pytest.mark.parametrize('opname', ['add', 'sub', 'mul', 'floordiv', - 'truediv', 'pow']) + 'truediv', 'div', 'pow']) def test_flex_method_equivalence(self, opname, ts): # check that Series.{opname} behaves like Series.__{opname}__, tser = tm.makeTimeSeries().rename('ts') @@ -34,9 +34,15 @@ def test_flex_method_equivalence(self, opname, ts): other = ts[1](tser) check_reverse = ts[2] + if opname == 'div': + pytest.skip('div test only for Py3') + op = getattr(Series, opname) - alt = getattr(operator, opname) + if op == 'div': + alt = operator.truediv + else: + alt = getattr(operator, opname) result = op(series, other) expected = alt(series, other) From f3ce13c98c385e02b21ccd9436d7fa25d6e82cf3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 30 Jun 2019 09:33:34 -0500 Subject: [PATCH 03/21] update docstring --- pandas/core/arrays/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index b883de4fd8a56..1cc930154b5c5 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -47,7 +47,7 @@ class ExtensionArray: * _from_sequence * _from_factorized * __getitem__ - * __len__ + * shape * dtype * nbytes * isna From 4fd24c13e0c5ee62bbafec573c5db1b0f7deb3a1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 30 Jun 2019 10:06:28 -0500 Subject: [PATCH 04/21] remove duplicated methods --- pandas/core/arrays/categorical.py | 7 +------ pandas/core/arrays/interval.py | 4 ---- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 906a941a2c5e4..d59f0b41df666 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1215,8 +1215,7 @@ def shape(self): ------- shape : tuple """ - - return tuple([len(self._codes)]) + return self._codes.shape def shift(self, periods, fill_value=None): """ @@ -1899,10 +1898,6 @@ def _slice(self, slicer): codes = self._codes[slicer] return self._constructor(values=codes, dtype=self.dtype, fastpath=True) - @property - def shape(self): - return self._codes.shape - def __iter__(self): """ Returns an Iterator over the values of this Categorical. diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 952919d3a10c7..401cae1669cec 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -702,10 +702,6 @@ def isna(self): def nbytes(self): return self.left.nbytes + self.right.nbytes - @property - def shape(self): - return self.left.shape - def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): """ From c81daebf8548e1e8a98e6fb16bcd5fa7ec7276cc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 1 Jul 2019 15:15:26 -0700 Subject: [PATCH 05/21] implement shape in terms of size, with implement_2d decorator --- pandas/core/arrays/__init__.py | 2 +- pandas/core/arrays/base.py | 71 +++++++++++++++++++++++-- pandas/core/arrays/categorical.py | 3 +- pandas/core/arrays/datetimelike.py | 3 +- pandas/core/arrays/integer.py | 3 +- pandas/core/arrays/interval.py | 3 +- pandas/core/arrays/numpy_.py | 3 +- pandas/core/arrays/sparse.py | 3 +- pandas/tests/extension/arrow/bool.py | 2 + pandas/tests/extension/decimal/array.py | 4 +- pandas/tests/extension/json/array.py | 3 +- 11 files changed, 88 insertions(+), 12 deletions(-) diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index 2d09a9eac6eab..e7aa4bc5b7fcb 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -1,6 +1,6 @@ from .array_ import array # noqa: F401 from .base import ( # noqa: F401 - ExtensionArray, ExtensionOpsMixin, ExtensionScalarOpsMixin) + ExtensionArray, ExtensionOpsMixin, ExtensionScalarOpsMixin, implement_2d) from .categorical import Categorical # noqa: F401 from .datetimes import DatetimeArray # noqa: F401 from .integer import IntegerArray, integer_array # noqa: F401 diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index d9e6ffdfbc679..3019c438f1593 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -29,6 +29,32 @@ _extension_array_shared_docs = dict() # type: Dict[str, str] +def implement_2d(cls): + """ + A decorator to take a 1-dimension-only ExtensionArray subclass and make + it support limited 2-dimensional operations. + """ + + # For backwards-compatibility, if an EA author implemented __len__ + # but not size, we use that __len__ method to get an array's size. + has_size = cls.size is not ExtensionArray.size + has_shape = cls.shape is not ExtensionArray.shape + has_len = cls.__len__ is not ExtensionArray.__len__ + + if not has_size and has_len: + cls.size = property(cls.__len__) + cls.__len__ = ExtensionArray.__len__ + + elif not has_size and has_shape: + @property + def size(self) -> int: + return np.prod(self.shape) + + cls.size = size + + return cls + + class ExtensionArray: """ Abstract base class for custom 1-D array types. @@ -47,7 +73,7 @@ class ExtensionArray: * _from_sequence * _from_factorized * __getitem__ - * shape + * __len__ *or* size * dtype * nbytes * isna @@ -298,6 +324,7 @@ def __iter__(self): # ------------------------------------------------------------------------ # Required attributes # ------------------------------------------------------------------------ + _shape = None @property def dtype(self) -> ExtensionDtype: @@ -311,7 +338,12 @@ def shape(self) -> Tuple[int, ...]: """ Return a tuple of the array dimensions. """ - raise AbstractMethodError(self) + if self._shape is not None: + return self._shape + + # Default to 1D + length = self.size + return (length,) @property def ndim(self) -> int: @@ -325,7 +357,7 @@ def size(self) -> int: """ The number of elements in this array. """ - return np.prod(self.shape) + raise AbstractMethodError(self) @property def nbytes(self) -> int: @@ -848,6 +880,22 @@ def copy(self) -> ABCExtensionArray: """ raise AbstractMethodError(self) + def view(self, dtype=None) -> ABCExtensionArray: + """ + Return a view on the array. + + Returns + ------- + ExtensionArray + + Notes + ----- + - This must return a *new* object, not self. + - The only case that *must* be implemented is with dtype=None, + giving a view with the same dtype as self. + """ + raise AbstractMethodError(self) + # ------------------------------------------------------------------------ # Printing # ------------------------------------------------------------------------ @@ -915,6 +963,23 @@ def _formatting_values(self) -> np.ndarray: # Reshaping # ------------------------------------------------------------------------ + @property + def T(self) -> ABCExtensionArray: + """ + Return a transposed view on self. For 1-D arrays this is a no-op. + """ + if self.ndim != 1: + raise NotImplementedError + return self + + def ravel(self, order=None) -> ABCExtensionArray: + """ + Return a flattened view on self. For 1-D arrays this is a no-op. + """ + if self.ndim != 1: + raise NotImplementedError + return self + @classmethod def _concat_same_type( cls, diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index e7109c5db32b5..6c686ec4e837c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -37,7 +37,7 @@ from pandas.io.formats import console -from .base import ExtensionArray, _extension_array_shared_docs +from .base import ExtensionArray, _extension_array_shared_docs, implement_2d _take_msg = textwrap.dedent("""\ Interpreting negative values in 'indexer' as missing values. @@ -208,6 +208,7 @@ def contains(cat, key, container): """ +@implement_2d class Categorical(ExtensionArray, PandasObject): """ Represent a categorical variable in classic R / S-plus fashion. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 982bc55256f80..22b84ce2acc04 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -36,7 +36,7 @@ from pandas.tseries import frequencies from pandas.tseries.offsets import DateOffset, Tick -from .base import ExtensionArray, ExtensionOpsMixin +from .base import ExtensionArray, ExtensionOpsMixin, implement_2d class AttributesMixin: @@ -324,6 +324,7 @@ def ceil(self, freq, ambiguous='raise', nonexistent='raise'): return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) +@implement_2d class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin, ExtensionArray): diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 4892e3b85fc4e..4a8c099d1b23b 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -19,7 +19,7 @@ from pandas.core.dtypes.missing import isna, notna from pandas.core import nanops, ops -from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin +from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin, implement_2d from pandas.core.tools.numeric import to_numeric @@ -214,6 +214,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False): return values, mask +@implement_2d class IntegerArray(ExtensionArray, ExtensionOpsMixin): """ Array of integer (optional missing) values. diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 401cae1669cec..3f227b9d499d1 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -21,7 +21,7 @@ from pandas.core.dtypes.missing import isna, notna from pandas.core.arrays.base import ( - ExtensionArray, _extension_array_shared_docs) + ExtensionArray, _extension_array_shared_docs, implement_2d) from pandas.core.arrays.categorical import Categorical import pandas.core.common as com from pandas.core.indexes.base import Index, ensure_index @@ -125,6 +125,7 @@ :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`. """), )) +@implement_2d class IntervalArray(IntervalMixin, ExtensionArray): dtype = IntervalDtype() ndim = 1 diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 9faf04029471b..2d5df31292c88 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -17,7 +17,7 @@ from pandas.core.algorithms import searchsorted from pandas.core.missing import backfill_1d, pad_1d -from .base import ExtensionArray, ExtensionOpsMixin +from .base import ExtensionArray, ExtensionOpsMixin, implement_2d class PandasDtype(ExtensionDtype): @@ -83,6 +83,7 @@ def itemsize(self): return self._dtype.itemsize +@implement_2d class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin): """ A pandas ExtensionArray for NumPy data. diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 597c243347c03..2b8eda8d6f037 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -34,7 +34,7 @@ from pandas._typing import Dtype from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algos -from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin +from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin, implement_2d from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.missing import interpolate_2d @@ -518,6 +518,7 @@ def _wrap_result(name, data, sparse_index, fill_value, dtype=None): dtype=dtype) +@implement_2d class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin): """ An ExtensionArray for storing sparse data. diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/bool.py index 50e2d78f006cd..85cb4f38e89bd 100644 --- a/pandas/tests/extension/arrow/bool.py +++ b/pandas/tests/extension/arrow/bool.py @@ -14,6 +14,7 @@ import pandas as pd from pandas.api.extensions import ( ExtensionArray, ExtensionDtype, register_extension_dtype, take) +from pandas.core.arrays import implement_2d @register_extension_dtype @@ -40,6 +41,7 @@ def _is_boolean(self): return True +@implement_2d class ArrowBoolArray(ExtensionArray): def __init__(self, values): if not isinstance(values, pa.ChunkedArray): diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 4cd5c1ebcab70..5a5aed898711f 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -9,7 +9,8 @@ import pandas as pd from pandas.api.extensions import register_extension_dtype -from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin +from pandas.core.arrays import ( + ExtensionArray, ExtensionScalarOpsMixin, implement_2d) @register_extension_dtype @@ -48,6 +49,7 @@ def _is_numeric(self): return True +@implement_2d class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin): __array_priority__ = 1000 diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 85d3f95164cd4..a98fa352dd019 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -21,7 +21,7 @@ from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import ExtensionArray, implement_2d class JSONDtype(ExtensionDtype): @@ -48,6 +48,7 @@ def construct_from_string(cls, string): "'{}'".format(cls, string)) +@implement_2d class JSONArray(ExtensionArray): dtype = JSONDtype() __array_priority__ = 1000 From 4d77dbe16b1bbe1198a97eb93f284bfd2db1f2e4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 1 Jul 2019 16:49:37 -0700 Subject: [PATCH 06/21] move implement_2d, implement view --- pandas/core/arrays/__init__.py | 3 +- pandas/core/arrays/_reshaping.py | 91 +++++++++++++++++++++++++ pandas/core/arrays/base.py | 56 ++++++--------- pandas/core/arrays/categorical.py | 3 +- pandas/core/arrays/datetimelike.py | 3 +- pandas/core/arrays/integer.py | 5 ++ pandas/core/arrays/interval.py | 3 +- pandas/core/arrays/numpy_.py | 8 ++- pandas/core/arrays/sparse.py | 5 ++ pandas/tests/extension/arrow/bool.py | 5 ++ pandas/tests/extension/decimal/array.py | 5 ++ pandas/tests/extension/json/array.py | 5 ++ 12 files changed, 153 insertions(+), 39 deletions(-) create mode 100644 pandas/core/arrays/_reshaping.py diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index e7aa4bc5b7fcb..1eec54335df85 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -1,6 +1,7 @@ +from ._reshaping import implement_2d # noqa:F401 from .array_ import array # noqa: F401 from .base import ( # noqa: F401 - ExtensionArray, ExtensionOpsMixin, ExtensionScalarOpsMixin, implement_2d) + ExtensionArray, ExtensionOpsMixin, ExtensionScalarOpsMixin) from .categorical import Categorical # noqa: F401 from .datetimes import DatetimeArray # noqa: F401 from .integer import IntegerArray, integer_array # noqa: F401 diff --git a/pandas/core/arrays/_reshaping.py b/pandas/core/arrays/_reshaping.py new file mode 100644 index 0000000000000..af84c573b88c6 --- /dev/null +++ b/pandas/core/arrays/_reshaping.py @@ -0,0 +1,91 @@ +""" +Utilities for implementing 2D compatibility for 1D ExtensionArrays. +""" +from typing import Tuple + +import numpy as np + +from pandas._libs.lib import is_integer + + +def implement_2d(cls): + """ + A decorator to take a 1-dimension-only ExtensionArray subclass and make + it support limited 2-dimensional operations. + """ + from pandas.core.arrays import ExtensionArray + + # For backwards-compatibility, if an EA author implemented __len__ + # but not size, we use that __len__ method to get an array's size. + has_size = cls.size is not ExtensionArray.size + has_shape = cls.shape is not ExtensionArray.shape + has_len = cls.__len__ is not ExtensionArray.__len__ + + if not has_size and has_len: + cls.size = property(cls.__len__) + cls.__len__ = ExtensionArray.__len__ + + elif not has_size and has_shape: + @property + def size(self) -> int: + return np.prod(self.shape) + + cls.size = size + + return cls + + +def tuplify_shape(size: int, shape) -> Tuple[int, ...]: + """ + Convert a passed shape into a valid tuple. + Following ndarray.reshape, we accept either `reshape(a, b)` or + `reshape((a, b))`, the latter being canonical. + + Parameters + ---------- + size : int + shape : tuple + + Returns + ------- + tuple[int, ...] + """ + if len(shape) == 0: + raise ValueError("shape must be a non-empty tuple of integers", + shape) + + if len(shape) == 1: + if is_integer(shape[0]): + pass + else: + shape = shape[0] + if not isinstance(shape, tuple): + raise ValueError("shape must be a non-empty tuple of integers", + shape) + + if not all(is_integer(x) for x in shape): + raise ValueError("shape must be a non-empty tuple of integers", shape) + + if any(x < -1 for x in shape): + raise ValueError("Invalid shape {shape}".format(shape=shape)) + + if -1 in shape: + if shape.count(-1) != 1: + raise ValueError("Invalid shape {shape}".format(shape=shape)) + idx = shape.index(-1) + others = [n for n in shape if n != -1] + prod = np.prod(others) + dim = size // prod + shape = shape[:idx] + (dim,) + shape[idx + 1:] + + if np.prod(shape) != size: + raise ValueError("Product of shape ({shape}) must match " + "size ({size})".format(shape=shape, + size=size)) + + num_gt1 = len([x for x in shape if x > 1]) + if num_gt1 > 1: + raise ValueError("The default `reshape` implementation is limited to " + "shapes (N,), (N,1), and (1,N), not {shape}" + .format(shape=shape)) + return shape diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 3019c438f1593..230a7d91a8c04 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -23,38 +23,13 @@ from pandas._typing import ArrayLike from pandas.core import ops +from pandas.core.arrays._reshaping import tuplify_shape _not_implemented_message = "{} does not implement {}." _extension_array_shared_docs = dict() # type: Dict[str, str] -def implement_2d(cls): - """ - A decorator to take a 1-dimension-only ExtensionArray subclass and make - it support limited 2-dimensional operations. - """ - - # For backwards-compatibility, if an EA author implemented __len__ - # but not size, we use that __len__ method to get an array's size. - has_size = cls.size is not ExtensionArray.size - has_shape = cls.shape is not ExtensionArray.shape - has_len = cls.__len__ is not ExtensionArray.__len__ - - if not has_size and has_len: - cls.size = property(cls.__len__) - cls.__len__ = ExtensionArray.__len__ - - elif not has_size and has_shape: - @property - def size(self) -> int: - return np.prod(self.shape) - - cls.size = size - - return cls - - class ExtensionArray: """ Abstract base class for custom 1-D array types. @@ -149,6 +124,12 @@ class ExtensionArray: # Don't override this. _typ = 'extension' + # Whether this class supports 2D arrays natively. If so, set _allows_2d + # to True and override reshape, ravel, and T. Otherwise, apply the + # `implement_2d` decorator to use default implementations of limited + # 2D functionality. + _allows_2d = False + # ------------------------------------------------------------------------ # Constructors # ------------------------------------------------------------------------ @@ -963,22 +944,29 @@ def _formatting_values(self) -> np.ndarray: # Reshaping # ------------------------------------------------------------------------ + def reshape(self, *shape): + # numpy accepts either a single tuple or an expanded tuple + shape = tuplify_shape(self.size, shape) + result = self.view() + result._shape = shape + return result + @property def T(self) -> ABCExtensionArray: """ - Return a transposed view on self. For 1-D arrays this is a no-op. + Return a transposed view on self. """ - if self.ndim != 1: - raise NotImplementedError - return self + shape = self.shape[::-1] + return self.reshape(shape) def ravel(self, order=None) -> ABCExtensionArray: """ - Return a flattened view on self. For 1-D arrays this is a no-op. + Return a flattened view on self. """ - if self.ndim != 1: - raise NotImplementedError - return self + # Note: we ignore `order`, keep the argument for compat with + # numpy signature. + shape = (self.size,) + return self.reshape(shape) @classmethod def _concat_same_type( diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6c686ec4e837c..707954b4e0e44 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -37,7 +37,8 @@ from pandas.io.formats import console -from .base import ExtensionArray, _extension_array_shared_docs, implement_2d +from ._reshaping import implement_2d +from .base import ExtensionArray, _extension_array_shared_docs _take_msg = textwrap.dedent("""\ Interpreting negative values in 'indexer' as missing values. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 22b84ce2acc04..53dd2ed4cb83f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -36,7 +36,8 @@ from pandas.tseries import frequencies from pandas.tseries.offsets import DateOffset, Tick -from .base import ExtensionArray, ExtensionOpsMixin, implement_2d +from ._reshaping import implement_2d +from .base import ExtensionArray, ExtensionOpsMixin class AttributesMixin: diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 4a8c099d1b23b..f09164207e91a 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -428,6 +428,11 @@ def copy(self): mask = mask.copy() return type(self)(data, mask, copy=False) + def view(self, dtype=None): + if dtype is not None: + raise NotImplementedError + return type(self)(self._data, self._mask, copy=False) + def __setitem__(self, key, value): _is_scalar = is_scalar(value) if _is_scalar: diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 3f227b9d499d1..ac14d8b1eac54 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -20,8 +20,9 @@ ABCDatetimeIndex, ABCInterval, ABCIntervalIndex, ABCPeriodIndex, ABCSeries) from pandas.core.dtypes.missing import isna, notna +from pandas.core.arrays._reshaping import implement_2d from pandas.core.arrays.base import ( - ExtensionArray, _extension_array_shared_docs, implement_2d) + ExtensionArray, _extension_array_shared_docs) from pandas.core.arrays.categorical import Categorical import pandas.core.common as com from pandas.core.indexes.base import Index, ensure_index diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 2d5df31292c88..4efeae4620620 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -17,7 +17,8 @@ from pandas.core.algorithms import searchsorted from pandas.core.missing import backfill_1d, pad_1d -from .base import ExtensionArray, ExtensionOpsMixin, implement_2d +from ._reshaping import implement_2d +from .base import ExtensionArray, ExtensionOpsMixin class PandasDtype(ExtensionDtype): @@ -290,6 +291,11 @@ def take(self, indices, allow_fill=False, fill_value=None): def copy(self): return type(self)(self._ndarray.copy()) + def view(self, dtype=None): + if dtype is not None: + raise NotImplementedError + return type(self)(self._ndarray) + def _values_for_argsort(self): return self._ndarray diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 2b8eda8d6f037..f4ac1fc157897 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -1269,6 +1269,11 @@ def copy(self): values = self.sp_values.copy() return self._simple_new(values, self.sp_index, self.dtype) + def view(self, dtype=None): + if dtype is not None: + raise NotImplementedError + return self._simple_new(self.sp_values, self.sp_index, self.dtype) + @classmethod def _concat_same_type(cls, to_concat): fill_values = [x.fill_value for x in to_concat] diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/bool.py index 85cb4f38e89bd..646c4429bbe72 100644 --- a/pandas/tests/extension/arrow/bool.py +++ b/pandas/tests/extension/arrow/bool.py @@ -114,6 +114,11 @@ def take(self, indices, allow_fill=False, fill_value=None): def copy(self): return type(self)(copy.copy(self._data)) + def view(self, dtype=None): + if dtype is not None: + raise NotImplementedError + return type(self)(self._data) + @classmethod def _concat_same_type(cls, to_concat): chunks = list(itertools.chain.from_iterable(x._data.chunks diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 5a5aed898711f..a28990be4a282 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -129,6 +129,11 @@ def take(self, indexer, allow_fill=False, fill_value=None): def copy(self): return type(self)(self._data.copy()) + def view(self, dtype=None): + if dtype is not None: + raise NotImplementedError + return type(self)(self._data, context=self.dtype.context) + def astype(self, dtype, copy=True): if isinstance(dtype, type(self.dtype)): return type(self)(self._data, context=dtype.context) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index a98fa352dd019..024a56b778749 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -148,6 +148,11 @@ def take(self, indexer, allow_fill=False, fill_value=None): def copy(self): return type(self)(self.data[:]) + def view(self, dtype=None): + if dtype is not None: + raise NotImplementedError + return type(self)(self.data) + def astype(self, dtype, copy=True): # NumPy has issues when all the dicts are the same length. # np.array([UserDict(...), UserDict(...)]) fails, From d43ef30156415ef75162d6c4f0d7b656c1325e0b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 1 Jul 2019 17:24:44 -0700 Subject: [PATCH 07/21] port tests --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/arrays/base.py | 1 + pandas/core/arrays/categorical.py | 20 ++++++-------------- pandas/core/arrays/datetimelike.py | 2 ++ pandas/core/arrays/interval.py | 7 +++++++ pandas/tests/extension/arrow/test_bool.py | 4 ++++ pandas/tests/extension/base/interface.py | 12 ++++++++++++ pandas/tests/extension/test_interval.py | 6 +++++- pandas/tests/extension/test_sparse.py | 4 ++++ 9 files changed, 42 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 8850ee79a893b..5d88590d49ab0 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -567,6 +567,7 @@ Other API changes - Using an unsupported version of Beautiful Soup 4 will now raise an ``ImportError`` instead of a ``ValueError`` (:issue:`27063`) - :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` will now raise a ``ValueError`` when saving timezone aware data. (:issue:`27008`, :issue:`7056`) - :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` will now raise a ``NotImplementedError`` when saving a :class:`MultiIndex` with extention data types for a ``fixed`` format. (:issue:`7775`) +- :meth:`Categorical.ravel` will now return a :class:`Categorical` instead of a NumPy array. (:issue:`27153`) .. _whatsnew_0250.deprecations: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 230a7d91a8c04..e9560544d474e 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -54,6 +54,7 @@ class ExtensionArray: * isna * take * copy + * view * _concat_same_type A default repr displaying the type, (truncated) data, length, diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 707954b4e0e44..19ef5a6c61163 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1684,19 +1684,7 @@ def _values_for_rank(self): ) return values - def ravel(self, order='C'): - """ - Return a flattened (numpy) array. - - For internal compatibility with numpy arrays. - - Returns - ------- - numpy.array - """ - return np.array(self) - - def view(self): + def view(self, dtype=None): """ Return a view of myself. @@ -1707,7 +1695,11 @@ def view(self): view : Categorical Returns `self`! """ - return self + if dtype is not None: + return NotImplementedError(dtype) + return self._constructor(values=self._codes, + dtype=self.dtype, + fastpath=True) def to_dense(self): """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 53dd2ed4cb83f..653279f7a1702 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -555,6 +555,8 @@ def view(self, dtype=None): ndarray With the specified `dtype`. """ + if dtype is None: + return type(self)(self._data, dtype=self.dtype, freq=self.freq) return self._data.view(dtype=dtype) # ------------------------------------------------------------------ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index ac14d8b1eac54..862d2a91918a3 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -697,6 +697,13 @@ def copy(self): # TODO: Could skip verify_integrity here. return type(self).from_arrays(left, right, closed=closed) + def view(self, dtype=None): + if dtype is not None: + raise NotImplementedError + return type(self)._simple_new(self._left, self._right, self.closed, + copy=False, dtype=None, + verify_integrity=False) + def isna(self): return isna(self.left) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index e529cef68d8c5..8ebe874d69109 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -45,6 +45,10 @@ def test_copy(self, data): # __setitem__ does not work, so we only have a smoke-test data.copy() + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): def test_from_dtype(self, data): diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index fd47ae6f31290..7887fba271e9f 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -74,3 +74,15 @@ def test_copy(self, data): data[1] = data[0] assert result[1] != result[0] + + def test_view(self, data): + # view with no dtype should return a shallow copy, *not* the same + # object + assert data[1] != data[0] + + result = data.view() + assert result is not data + assert type(result) == type(data) + + result[1] = result[0] + assert data[1] == data[0] diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index f1f90b298ffe2..bec0ce621cdb5 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -95,7 +95,11 @@ class TestGrouping(BaseInterval, base.BaseGroupbyTests): class TestInterface(BaseInterval, base.BaseInterfaceTests): - pass + + def test_view(self, data): + # __setitem__ incorrectly makes a copy (GH#27147), so we only + # have a smoke-test + data.view() class TestReduce(base.BaseNoReduceTests): diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 86ca3e230ddd5..96ceaf5b5d940 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -102,6 +102,10 @@ def test_copy(self, data): # __setitem__ does not work, so we only have a smoke-test data.copy() + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + class TestConstructors(BaseSparseTests, base.BaseConstructorsTests): pass From bc220c3e76858fc478393bc476de5e847cf81c80 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 2 Jul 2019 06:20:48 -0700 Subject: [PATCH 08/21] shape patching, tests --- pandas/core/arrays/_reshaping.py | 40 +++++++++++++++- pandas/core/arrays/base.py | 15 +++++- pandas/core/arrays/categorical.py | 13 +----- pandas/core/arrays/datetimelike.py | 4 +- pandas/core/arrays/integer.py | 4 +- pandas/core/arrays/interval.py | 4 +- pandas/core/arrays/numpy_.py | 4 +- pandas/core/arrays/sparse.py | 4 +- pandas/core/internals/blocks.py | 8 +--- pandas/tests/arrays/test_reshaping.py | 58 ++++++++++++++++++++++++ pandas/tests/extension/arrow/bool.py | 4 +- pandas/tests/extension/base/interface.py | 7 +++ pandas/tests/extension/decimal/array.py | 4 +- pandas/tests/extension/json/array.py | 4 +- 14 files changed, 136 insertions(+), 37 deletions(-) create mode 100644 pandas/tests/arrays/test_reshaping.py diff --git a/pandas/core/arrays/_reshaping.py b/pandas/core/arrays/_reshaping.py index af84c573b88c6..e338fc1f93c8a 100644 --- a/pandas/core/arrays/_reshaping.py +++ b/pandas/core/arrays/_reshaping.py @@ -1,6 +1,7 @@ """ Utilities for implementing 2D compatibility for 1D ExtensionArrays. """ +from functools import wraps from typing import Tuple import numpy as np @@ -32,10 +33,43 @@ def size(self) -> int: cls.size = size + orig_copy = cls.copy + + @wraps(orig_copy) + def copy(self): + result = orig_copy(self) + result._shape = self._shape + return result + + cls.copy = copy + return cls -def tuplify_shape(size: int, shape) -> Tuple[int, ...]: +def can_safe_ravel(shape: Tuple[int, ...]) -> bool: + """ + Check if an array with the given shape can be ravelled unambiguously + regardless of column/row order. + + Parameters + ---------- + shape : tuple[int] + + Returns + ------- + bool + """ + if len(shape) == 1: + return True + if len(shape) > 2: + raise NotImplementedError(shape) + if shape[0] == 1 or shape[1] == 1: + # column-like or row-like + return True + return False + + +def tuplify_shape(size: int, shape, restrict=True) -> Tuple[int, ...]: """ Convert a passed shape into a valid tuple. Following ndarray.reshape, we accept either `reshape(a, b)` or @@ -45,6 +79,8 @@ def tuplify_shape(size: int, shape) -> Tuple[int, ...]: ---------- size : int shape : tuple + restrict : bool, default True + Whether to restrict to shapes (N), (1,N), and (N,1) Returns ------- @@ -84,7 +120,7 @@ def tuplify_shape(size: int, shape) -> Tuple[int, ...]: size=size)) num_gt1 = len([x for x in shape if x > 1]) - if num_gt1 > 1: + if num_gt1 > 1 and restrict: raise ValueError("The default `reshape` implementation is limited to " "shapes (N,), (N,1), and (1,N), not {shape}" .format(shape=shape)) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e9560544d474e..313e40b2685f7 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -23,7 +23,7 @@ from pandas._typing import ArrayLike from pandas.core import ops -from pandas.core.arrays._reshaping import tuplify_shape +from pandas.core.arrays._reshaping import tuplify_shape, can_safe_ravel _not_implemented_message = "{} does not implement {}." @@ -327,6 +327,13 @@ def shape(self) -> Tuple[int, ...]: length = self.size return (length,) + @shape.setter + def shape(self, value): + size = np.prod(value) + if size != self.size: + raise ValueError("Implied size must match actual size.") + self._shape = value + @property def ndim(self) -> int: """ @@ -949,7 +956,7 @@ def reshape(self, *shape): # numpy accepts either a single tuple or an expanded tuple shape = tuplify_shape(self.size, shape) result = self.view() - result._shape = shape + result.shape = shape return result @property @@ -957,6 +964,8 @@ def T(self) -> ABCExtensionArray: """ Return a transposed view on self. """ + if not can_safe_ravel(self.shape): + raise NotImplementedError shape = self.shape[::-1] return self.reshape(shape) @@ -966,6 +975,8 @@ def ravel(self, order=None) -> ABCExtensionArray: """ # Note: we ignore `order`, keep the argument for compat with # numpy signature. + if not can_safe_ravel(self.shape): + raise NotImplementedError shape = (self.size,) return self.reshape(shape) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 19ef5a6c61163..0102732f2941f 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1208,17 +1208,8 @@ def map(self, mapper): # for Series/ndarray like compat @property - def shape(self): - """ - Shape of the Categorical. - - For internal compatibility with numpy arrays. - - Returns - ------- - shape : tuple - """ - return self._codes.shape + def size(self) -> int: + return self._codes.size def shift(self, periods, fill_value=None): """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 653279f7a1702..505b4c5557774 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -404,8 +404,8 @@ def __array__(self, dtype=None): return self._data @property - def shape(self): - return self._data.shape + def size(self) -> int: + return self._data.size def __getitem__(self, key): """ diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index f09164207e91a..6cbd85753f27e 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -447,8 +447,8 @@ def __setitem__(self, key, value): self._mask[key] = mask @property - def shape(self): - return self._data.shape + def size(self) -> int: + return self._data.size @property def nbytes(self): diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index c494262d6fbd6..0ae4ced5703e0 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -470,8 +470,8 @@ def __iter__(self): return iter(np.asarray(self)) @property - def shape(self): - return self.left.shape + def size(self) -> int: + return self.left.size def __getitem__(self, value): left = self.left[value] diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 4efeae4620620..df00c5ffdbd3f 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -243,8 +243,8 @@ def __setitem__(self, key, value): self._ndarray[key] = value @property - def shape(self): - return self._ndarray.shape + def size(self) -> int: + return self._ndarray.size @property def nbytes(self): diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index f4ac1fc157897..d84e9caa9664c 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -852,8 +852,8 @@ def _valid_sp_values(self): return sp_vals[mask] @property - def shape(self): - return (self.sp_index.length,) + def size(self) -> int: + return self.sp_index.length @property def _null_fill_value(self): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index cad37bf2b8ae1..18a6b4cbae4b0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -598,10 +598,8 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, # TODO(extension) # should we make this attribute? - try: + if isinstance(values, np.ndarray): values = values.reshape(self.shape) - except AttributeError: - pass newb = make_block(values, placement=self.mgr_locs, ndim=self.ndim) @@ -2639,10 +2637,8 @@ def convert(self, *args, **kwargs): def f(m, v, i): shape = v.shape values = fn(v.ravel(), **fn_kwargs) - try: + if isinstance(values, np.ndarray): values = values.reshape(shape) - except (AttributeError, NotImplementedError): - pass values = _block_shape(values, ndim=self.ndim) return values diff --git a/pandas/tests/arrays/test_reshaping.py b/pandas/tests/arrays/test_reshaping.py new file mode 100644 index 0000000000000..6bf3d41ce32d3 --- /dev/null +++ b/pandas/tests/arrays/test_reshaping.py @@ -0,0 +1,58 @@ +""" +Tests for reshaping utilities. +""" +import pytest + +from pandas.core.arrays._reshaping import tuplify_shape + + +class TestTuplify: + def test_tuplify_single_arg(self): + # Single-tuple cases, i.e. + # arr.reshape((x, y)) + shape = tuplify_shape(3, ((3,),)) + assert shape == (3,) + + shape = tuplify_shape(3, ((1, 3),)) + assert shape == (1, 3) + + shape = tuplify_shape(3, ((3, 1),)) + assert shape == (3, 1) + + def test_tuplify_multi_arg(self): + # Multi-arg cases, i.e. + # arr.reshape(x, y) + shape = tuplify_shape(3, (3,)) + assert shape == (3,) + + shape = tuplify_shape(3, (3, 1)) + assert shape == (3, 1) + + shape = tuplify_shape(3, (1, 3)) + assert shape == (1, 3) + + def test_tuplify_minus_one(self): + shape = tuplify_shape(4, (1, -1)) + assert shape == (1, 4) + + shape = tuplify_shape(4, (-1, 1)) + assert shape == (4, 1) + + def test_tuplify_minus_one_factors(self): + shape = tuplify_shape(4, (1, -1, 2), restrict=False) + assert shape == (1, 2, 2) + + def test_tuplify_multiple_minus_ones(self): + # No more than 1 "-1" + with pytest.raises(ValueError, match="Invalid shape"): + tuplify_shape(99, (-1, -1)) + + def test_tuplify_negative(self): + # Nothing less than -1 in a shape + with pytest.raises(ValueError, match="Invalid shape"): + tuplify_shape(99, (-2, 3)) + + def test_tuplify_size_match(self): + # must match original size + with pytest.raises(ValueError, match="Product of shape"): + tuplify_shape(3, (2, 2)) diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/bool.py index 646c4429bbe72..40f14e5ba7fe9 100644 --- a/pandas/tests/extension/arrow/bool.py +++ b/pandas/tests/extension/arrow/bool.py @@ -76,8 +76,8 @@ def __getitem__(self, item): return type(self).from_scalars(vals) @property - def shape(self): - return (len(self._data),) + def size(self) -> int: + return len(self._data) def astype(self, dtype, copy=True): # needed to fix this astype for the Series constructor. diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 7887fba271e9f..d0f68005fec93 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -75,6 +75,13 @@ def test_copy(self, data): data[1] = data[0] assert result[1] != result[0] + def test_copy_preserves_shape(self, data): + data_2d = data.reshape(1, -1) + assert data_2d.shape == (1, len(data)), data_2d.shape + + copied = data_2d.copy() + assert copied.shape == data_2d.shape, (copied.shape, data_2d.shape) + def test_view(self, data): # view with no dtype should return a shallow copy, *not* the same # object diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index a28990be4a282..1421a693acb5d 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -149,8 +149,8 @@ def __setitem__(self, key, value): self._data[key] = value @property - def shape(self): - return self._data.shape + def size(self) -> int: + return self._data.size @property def nbytes(self): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 024a56b778749..4c0e44ebd7e90 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -107,8 +107,8 @@ def __setitem__(self, key, value): self.data[k] = v @property - def shape(self): - return (len(self.data),) + def size(self) -> int: + return len(self.data) @property def nbytes(self): From 203504c00811b2ea49e96200abfe1763bcef27dc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 5 Jul 2019 07:56:26 -0700 Subject: [PATCH 09/21] blackify --- pandas/core/arrays/_reshaping.py | 23 ++++++++++++----------- pandas/core/arrays/categorical.py | 4 +--- pandas/core/arrays/interval.py | 11 ++++++++--- pandas/tests/extension/decimal/array.py | 3 +-- pandas/tests/extension/test_interval.py | 1 - 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/_reshaping.py b/pandas/core/arrays/_reshaping.py index e338fc1f93c8a..55c4817b164c0 100644 --- a/pandas/core/arrays/_reshaping.py +++ b/pandas/core/arrays/_reshaping.py @@ -27,6 +27,7 @@ def implement_2d(cls): cls.__len__ = ExtensionArray.__len__ elif not has_size and has_shape: + @property def size(self) -> int: return np.prod(self.shape) @@ -87,8 +88,7 @@ def tuplify_shape(size: int, shape, restrict=True) -> Tuple[int, ...]: tuple[int, ...] """ if len(shape) == 0: - raise ValueError("shape must be a non-empty tuple of integers", - shape) + raise ValueError("shape must be a non-empty tuple of integers", shape) if len(shape) == 1: if is_integer(shape[0]): @@ -96,8 +96,7 @@ def tuplify_shape(size: int, shape, restrict=True) -> Tuple[int, ...]: else: shape = shape[0] if not isinstance(shape, tuple): - raise ValueError("shape must be a non-empty tuple of integers", - shape) + raise ValueError("shape must be a non-empty tuple of integers", shape) if not all(is_integer(x) for x in shape): raise ValueError("shape must be a non-empty tuple of integers", shape) @@ -112,16 +111,18 @@ def tuplify_shape(size: int, shape, restrict=True) -> Tuple[int, ...]: others = [n for n in shape if n != -1] prod = np.prod(others) dim = size // prod - shape = shape[:idx] + (dim,) + shape[idx + 1:] + shape = shape[:idx] + (dim,) + shape[idx + 1 :] if np.prod(shape) != size: - raise ValueError("Product of shape ({shape}) must match " - "size ({size})".format(shape=shape, - size=size)) + raise ValueError( + "Product of shape ({shape}) must match " + "size ({size})".format(shape=shape, size=size) + ) num_gt1 = len([x for x in shape if x > 1]) if num_gt1 > 1 and restrict: - raise ValueError("The default `reshape` implementation is limited to " - "shapes (N,), (N,1), and (1,N), not {shape}" - .format(shape=shape)) + raise ValueError( + "The default `reshape` implementation is limited to " + "shapes (N,), (N,1), and (1,N), not {shape}".format(shape=shape) + ) return shape diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 29e70e4c4cc13..e357d4cdb61d9 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1766,9 +1766,7 @@ def view(self, dtype=None): """ if dtype is not None: return NotImplementedError(dtype) - return self._constructor(values=self._codes, - dtype=self.dtype, - fastpath=True) + return self._constructor(values=self._codes, dtype=self.dtype, fastpath=True) def to_dense(self): """ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index e77388474312b..15a23ad167fbb 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -736,9 +736,14 @@ def copy(self): def view(self, dtype=None): if dtype is not None: raise NotImplementedError - return type(self)._simple_new(self._left, self._right, self.closed, - copy=False, dtype=None, - verify_integrity=False) + return type(self)._simple_new( + self._left, + self._right, + self.closed, + copy=False, + dtype=None, + verify_integrity=False, + ) def isna(self): return isna(self.left) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 98c4f4ed9a339..897bed8aeed9b 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -9,8 +9,7 @@ import pandas as pd from pandas.api.extensions import register_extension_dtype -from pandas.core.arrays import ( - ExtensionArray, ExtensionScalarOpsMixin, implement_2d) +from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin, implement_2d @register_extension_dtype diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index ed351bee86ad3..4fdcf930d224f 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -95,7 +95,6 @@ class TestGrouping(BaseInterval, base.BaseGroupbyTests): class TestInterface(BaseInterval, base.BaseInterfaceTests): - def test_view(self, data): # __setitem__ incorrectly makes a copy (GH#27147), so we only # have a smoke-test From 2f18daeb5dc822956ce47873787b4f6c080aa021 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 8 Jul 2019 09:27:44 -0700 Subject: [PATCH 10/21] shape fixups --- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/interval.py | 2 ++ pandas/tests/extension/test_interval.py | 5 ++++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 98ca832689e77..6a94ade9f2c58 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -944,7 +944,7 @@ def reshape(self, *shape): # numpy accepts either a single tuple or an expanded tuple shape = tuplify_shape(self.size, shape) result = self.view() - result.shape = shape + result._shape = shape return result @property diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 0f2a789298d8b..1aef4a64cff6c 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -758,6 +758,8 @@ def size(self) -> int: @property def shape(self): + if self._shape is not None: + return self._shape return self.left.shape def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 4fdcf930d224f..9fa942bd0a50e 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -143,7 +143,10 @@ def test_non_scalar_raises(self, data_missing): class TestReshaping(BaseInterval, base.BaseReshapingTests): - pass + + @pytest.mark.xfail(reason="setitem incorrectly makes copy, see GH#27147") + def test_ravel(self, data): + super().test_ravel(data) class TestSetitem(BaseInterval, base.BaseSetitemTests): From eb0645d223337f76be0f01df3fc0f0af25d08ba8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 8 Jul 2019 15:00:13 -0700 Subject: [PATCH 11/21] blackify+isort --- pandas/core/arrays/base.py | 2 +- pandas/tests/extension/test_interval.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6a94ade9f2c58..c04a701f56066 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -22,7 +22,7 @@ from pandas._typing import ArrayLike from pandas.core import ops -from pandas.core.arrays._reshaping import tuplify_shape, can_safe_ravel +from pandas.core.arrays._reshaping import can_safe_ravel, tuplify_shape from pandas.core.sorting import nargsort _not_implemented_message = "{} does not implement {}." diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 9fa942bd0a50e..505b81e88b88e 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -143,7 +143,6 @@ def test_non_scalar_raises(self, data_missing): class TestReshaping(BaseInterval, base.BaseReshapingTests): - @pytest.mark.xfail(reason="setitem incorrectly makes copy, see GH#27147") def test_ravel(self, data): super().test_ravel(data) From 25409330101be09349f29471882c198139ad94a4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 8 Jul 2019 16:44:20 -0700 Subject: [PATCH 12/21] property read-write --- pandas/core/arrays/interval.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 1aef4a64cff6c..509e6858cc601 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -762,6 +762,10 @@ def shape(self): return self._shape return self.left.shape + @shape.setter + def shape(self, value): + self._shape = value + def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): """ Take elements from the IntervalArray. From 5b60fb57a3d1f6eb861e31b3a5957a45b6ff19ac Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 8 Jul 2019 17:45:44 -0700 Subject: [PATCH 13/21] add docstring --- pandas/core/arrays/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c04a701f56066..19540945762c1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -941,6 +941,9 @@ def _formatting_values(self) -> np.ndarray: # ------------------------------------------------------------------------ def reshape(self, *shape): + """ + Return a view on this array with the given shape. + """ # numpy accepts either a single tuple or an expanded tuple shape = tuplify_shape(self.size, shape) result = self.view() From 92a0a5643c897d1ae64c55e19756043b64e9d877 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 08:47:46 -0700 Subject: [PATCH 14/21] implement base class view --- pandas/core/arrays/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 31159c16e6e90..355754841b442 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -909,7 +909,9 @@ def view(self, dtype=None) -> ABCExtensionArray: - The only case that *must* be implemented is with dtype=None, giving a view with the same dtype as self. """ - raise AbstractMethodError(self) + if dtype is not None: + raise NotImplementedError(dtype) + return self[:] # ------------------------------------------------------------------------ # Printing From 91639dd703b1c9998749731bc5101f18c5c68cdd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 09:19:42 -0700 Subject: [PATCH 15/21] use base view --- pandas/core/arrays/base.py | 1 - pandas/core/arrays/integer.py | 5 ----- pandas/core/arrays/interval.py | 26 ++----------------------- pandas/core/arrays/numpy_.py | 5 ----- pandas/core/arrays/sparse.py | 5 ----- pandas/tests/extension/arrow/bool.py | 5 ----- pandas/tests/extension/decimal/array.py | 5 ----- pandas/tests/extension/json/array.py | 8 +++----- 8 files changed, 5 insertions(+), 55 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 355754841b442..20483e28c5a00 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -87,7 +87,6 @@ class ExtensionArray: * isna * take * copy - * view * _concat_same_type A default repr displaying the type, (truncated) data, length, diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 2bba6fa4e4223..8b7137f0954a5 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -448,11 +448,6 @@ def copy(self): mask = mask.copy() return type(self)(data, mask, copy=False) - def view(self, dtype=None): - if dtype is not None: - raise NotImplementedError - return type(self)(self._data, self._mask, copy=False) - def __setitem__(self, key, value): _is_scalar = is_scalar(value) if _is_scalar: diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 4d42f6546600a..dc0fb51016cd4 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -733,40 +733,18 @@ def copy(self): # TODO: Could skip verify_integrity here. return type(self).from_arrays(left, right, closed=closed) - def view(self, dtype=None): - if dtype is not None: - raise NotImplementedError - return type(self)._simple_new( - self._left, - self._right, - self.closed, - copy=False, - dtype=None, - verify_integrity=False, - ) - def isna(self): return isna(self.left) @property - def nbytes(self) -> int: + def nbytes(self): return self.left.nbytes + self.right.nbytes @property - def size(self) -> int: + def size(self): # Avoid materializing self.values return self.left.size - @property - def shape(self): - if self._shape is not None: - return self._shape - return self.left.shape - - @shape.setter - def shape(self, value): - self._shape = value - def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): """ Take elements from the IntervalArray. diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 1fb1bc4840d80..258f00230b74d 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -291,11 +291,6 @@ def take(self, indices, allow_fill=False, fill_value=None): def copy(self): return type(self)(self._ndarray.copy()) - def view(self, dtype=None): - if dtype is not None: - raise NotImplementedError - return type(self)(self._ndarray) - def _values_for_argsort(self): return self._ndarray diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index fd9d05232cc8a..2fe9d55e7c562 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -1274,11 +1274,6 @@ def copy(self): values = self.sp_values.copy() return self._simple_new(values, self.sp_index, self.dtype) - def view(self, dtype=None): - if dtype is not None: - raise NotImplementedError - return self._simple_new(self.sp_values, self.sp_index, self.dtype) - @classmethod def _concat_same_type(cls, to_concat): fill_values = [x.fill_value for x in to_concat] diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/bool.py index 227ca3cde6663..c06802d55276a 100644 --- a/pandas/tests/extension/arrow/bool.py +++ b/pandas/tests/extension/arrow/bool.py @@ -119,11 +119,6 @@ def take(self, indices, allow_fill=False, fill_value=None): def copy(self): return type(self)(copy.copy(self._data)) - def view(self, dtype=None): - if dtype is not None: - raise NotImplementedError - return type(self)(self._data) - @classmethod def _concat_same_type(cls, to_concat): chunks = list(itertools.chain.from_iterable(x._data.chunks for x in to_concat)) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 768a31bfa7d6f..e4fd1c21b58db 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -124,11 +124,6 @@ def take(self, indexer, allow_fill=False, fill_value=None): def copy(self): return type(self)(self._data.copy()) - def view(self, dtype=None): - if dtype is not None: - raise NotImplementedError - return type(self)(self._data, context=self.dtype.context) - def astype(self, dtype, copy=True): if isinstance(dtype, type(self.dtype)): return type(self)(self._data, context=dtype.context) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 2b1a6d862c3b2..73d3ae7fb9ec8 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -81,6 +81,9 @@ def __getitem__(self, item): elif isinstance(item, abc.Iterable): # fancy indexing return type(self)([self.data[i] for i in item]) + elif isinstance(item, slice) and item == slice(None): + # make sure we get a view + return type(self)(self.data) else: # slice return type(self)(self.data[item]) @@ -148,11 +151,6 @@ def take(self, indexer, allow_fill=False, fill_value=None): def copy(self): return type(self)(self.data[:]) - def view(self, dtype=None): - if dtype is not None: - raise NotImplementedError - return type(self)(self.data) - def astype(self, dtype, copy=True): # NumPy has issues when all the dicts are the same length. # np.array([UserDict(...), UserDict(...)]) fails, From 34cc9e9f155f7b0f6a2f038a7808817c874a1128 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 10:45:49 -0700 Subject: [PATCH 16/21] patch take, getitem --- pandas/core/arrays/_reshaping.py | 92 +++++++++++++++++++++++++++++++ pandas/core/arrays/categorical.py | 4 +- 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/_reshaping.py b/pandas/core/arrays/_reshaping.py index 55c4817b164c0..866a31f9750b0 100644 --- a/pandas/core/arrays/_reshaping.py +++ b/pandas/core/arrays/_reshaping.py @@ -44,9 +44,101 @@ def copy(self): cls.copy = copy + orig_getitem = cls.__getitem__ + + def __getitem__(self, key): + if self.ndim == 1: + return orig_getitem(self, key) + + key = expand_key(key, self.shape) + if is_integer(key[0]): + assert key[0] in [0, -1] + result = orig_getitem(self, key[1]) + return result + + if isinstance(key[0], slice): + if slice_contains_zero(key[0]): + result = orig_getitem(self, key[1]) + result._shape = (1, result.size) + return result + + raise NotImplementedError(key) + # TODO: ellipses? + raise NotImplementedError(key) + + cls.__getitem__ = __getitem__ + + orig_take = cls.take + + # kwargs for compat with Interval + # allow_fill=None instead of False is for compat with Categorical + def take(self, indices, allow_fill=None, fill_value=None, axis=0, **kwargs): + if self.ndim == 1 and axis == 0: + return orig_take(self, indices, allow_fill=allow_fill, + fill_value=fill_value, **kwargs) + + if self.ndim != 2 or self.shape[0] != 1: + raise NotImplementedError + if axis not in [0, 1]: + raise ValueError(axis) + if kwargs: + raise ValueError('kwargs should not be passed in the 2D case, ' + 'are only included for compat with Interval') + + if axis == 1: + result = orig_take(self, indices, allow_fill=allow_fill, + fill_value=fill_value) + result._shape = (1, result.size) + return result + + # For axis == 0, because we only support shape (1, N) + # there are only limited indices we can accept + if len(indices) != 1: + # TODO: we could probably support zero-len here + raise NotImplementedError + + def take_item(n): + if n == -1: + seq = [fill_value] * self.shape[1] + return type(self)._from_sequence(seq) + else: + return self[n, :] + + arrs = [take_item(n) for n in indices] + result = type(self)._concat_same_type(arrs) + result.shape = (len(indices), self.shape[1]) + return result + + cls.take = take + return cls +def slice_contains_zero(slc: slice) -> bool: + if slc == slice(None): + return True + if slc == slice(0, None): + return True + if slc == slice(0, 1): + return True + raise NotImplementedError(slc) + + +def expand_key(key, shape): + ndim = len(shape) + if ndim != 2 or shape[0] != 1: + raise NotImplementedError + if not isinstance(key, tuple): + key = (key, slice(None)) + if len(key) != 2: + raise ValueError(key) + + if is_integer(key[0]) and key[0] not in [0, -1]: + raise ValueError(key) + + return key + + def can_safe_ravel(shape: Tuple[int, ...]) -> bool: """ Check if an array with the given shape can be ravelled unambiguously diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2a4c1b4a104c9..30bb9cd0983f0 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1944,7 +1944,7 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): indexer = np.asarray(indexer, dtype=np.intp) if allow_fill is None: if (indexer < 0).any(): - warn(_take_msg, FutureWarning, stacklevel=2) + warn(_take_msg, FutureWarning, stacklevel=3) allow_fill = True dtype = self.dtype @@ -1956,7 +1956,7 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): if fill_value in self.categories: fill_value = self.categories.get_loc(fill_value) else: - msg = "'fill_value' ('{}') is not in this Categorical's " "categories." + msg = "'fill_value' ('{}') is not in this Categorical's categories." raise TypeError(msg.format(fill_value)) codes = take(self._codes, indexer, allow_fill=allow_fill, fill_value=fill_value) From 444f9f758efc636aa538b9a961075ee50aafa4bd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 12:54:40 -0700 Subject: [PATCH 17/21] blackify --- pandas/core/arrays/_reshaping.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/_reshaping.py b/pandas/core/arrays/_reshaping.py index 866a31f9750b0..837c817f67468 100644 --- a/pandas/core/arrays/_reshaping.py +++ b/pandas/core/arrays/_reshaping.py @@ -74,20 +74,24 @@ def __getitem__(self, key): # allow_fill=None instead of False is for compat with Categorical def take(self, indices, allow_fill=None, fill_value=None, axis=0, **kwargs): if self.ndim == 1 and axis == 0: - return orig_take(self, indices, allow_fill=allow_fill, - fill_value=fill_value, **kwargs) + return orig_take( + self, indices, allow_fill=allow_fill, fill_value=fill_value, **kwargs + ) if self.ndim != 2 or self.shape[0] != 1: raise NotImplementedError if axis not in [0, 1]: raise ValueError(axis) if kwargs: - raise ValueError('kwargs should not be passed in the 2D case, ' - 'are only included for compat with Interval') + raise ValueError( + "kwargs should not be passed in the 2D case, " + "are only included for compat with Interval" + ) if axis == 1: - result = orig_take(self, indices, allow_fill=allow_fill, - fill_value=fill_value) + result = orig_take( + self, indices, allow_fill=allow_fill, fill_value=fill_value + ) result._shape = (1, result.size) return result From 7c15b74ce8d5e3f809e8ead07cc5dfddd9c174df Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 12:54:58 -0700 Subject: [PATCH 18/21] isort fixup --- pandas/core/arrays/integer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 8b7137f0954a5..bf4fd9fc0f97e 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -24,8 +24,8 @@ from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna, notna -from pandas.core.algorithms import take from pandas.core import nanops, ops +from pandas.core.algorithms import take from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin, implement_2d from pandas.core.tools.numeric import to_numeric From 768d75d5163b71cfd18de753e19ca420ace56b00 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 13:32:48 -0700 Subject: [PATCH 19/21] patch iter --- pandas/core/arrays/_reshaping.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/core/arrays/_reshaping.py b/pandas/core/arrays/_reshaping.py index 837c817f67468..4936e3bb339f9 100644 --- a/pandas/core/arrays/_reshaping.py +++ b/pandas/core/arrays/_reshaping.py @@ -115,6 +115,18 @@ def take_item(n): cls.take = take + orig_iter = cls.__iter__ + + def __iter__(self): + if self.ndim == 1: + for obj in orig_iter(self): + yield obj + else: + for n in range(self.shape[0]): + yield self[n] + + cls.__iter__ = __iter__ + return cls From 3b7b2b2d3597cd9d62eea028e77fa1c8d13b390b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 14:49:57 -0700 Subject: [PATCH 20/21] slice handling cleanup --- pandas/core/arrays/_reshaping.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/arrays/_reshaping.py b/pandas/core/arrays/_reshaping.py index 4936e3bb339f9..931e59678db7e 100644 --- a/pandas/core/arrays/_reshaping.py +++ b/pandas/core/arrays/_reshaping.py @@ -137,6 +137,10 @@ def slice_contains_zero(slc: slice) -> bool: return True if slc == slice(0, 1): return True + if slc.start == slc.stop: + # Note: order matters here, since we _dont_ want this to catch + # the slice(None) case. + return False raise NotImplementedError(slc) From fc331b8caebf7f05c18f2280bd027da21973d977 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 9 Aug 2019 08:43:01 -0700 Subject: [PATCH 21/21] dummy to force CI