From a2217e0018141ac5724d1e1ea8dcfeadbc3ff30f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Dec 2019 14:20:55 -0600 Subject: [PATCH 01/13] BUG: preserve EA dtype in transpose --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/generic.py | 33 +++++++++++++++++++++--- pandas/tests/extension/base/reshaping.py | 14 ++++++++++ pandas/tests/extension/test_numpy.py | 4 +++ 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index f3b058c099c14..4143ad12ed17f 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -775,6 +775,7 @@ Reshaping - Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`) - Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`) - Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) +- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtyep (:issue:``) - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) - diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e19bf9c1c39ea..8072c5a984e86 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -721,12 +721,37 @@ def transpose(self, *args, **kwargs): new_axes = self._construct_axes_dict_from( self, [self._get_axis(x) for x in axes_names] ) - new_values = self.values.transpose(axes_numbers) - if kwargs.pop("copy", None) or (len(args) and args[-1]): - new_values = new_values.copy() + + if ( + self._is_homogeneous_type + and len(self._data.blocks) + and is_extension_array_dtype(self._data.blocks[0].dtype) + ): + kwargs.pop("copy", None) # by definition, we're copying + dtype = self._data.blocks[0].dtype + arr_type = dtype.construct_array_type() + + # Slow, but unavoidable with 1D EAs. + new_values = [] + for i in range(len(self)): + new_values.append( + arr_type._from_sequence( + [block.values[i] for block in self._data.blocks], dtype=dtype + ) + ) + columns = new_axes.pop("columns") + new_values = dict(zip(columns, new_values)) + result = self._constructor(new_values, **new_axes) + + else: + new_values = self.values.transpose(axes_numbers) + if kwargs.pop("copy", None) or (len(args) and args[-1]): + new_values = new_values.copy() + + result = self._constructor(new_values, **new_axes) nv.validate_transpose(tuple(), kwargs) - return self._constructor(new_values, **new_axes).__finalize__(self) + return result.__finalize__(self) def swapaxes(self, axis1, axis2, copy=True): """ diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 90e607343297d..bb9b21b66dbb5 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -295,3 +295,17 @@ def test_ravel(self, data): # Check that we have a view, not a copy result[0] = result[1] assert data[0] == data[1] + + def test_transpose(self, data): + df = pd.DataFrame({"A": data[:4], "B": data[:4]}, index=["a", "b", "c", "d"]) + result = df.T + expected = pd.DataFrame( + { + "a": type(data)._from_sequence([data[0]] * 2, dtype=data.dtype), + "b": type(data)._from_sequence([data[1]] * 2, dtype=data.dtype), + "c": type(data)._from_sequence([data[2]] * 2, dtype=data.dtype), + "d": type(data)._from_sequence([data[3]] * 2, dtype=data.dtype), + }, + index=["A", "B"], + ) + self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 221cf0787d839..95ba2fc8f00f2 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -330,6 +330,10 @@ def test_merge_on_extension_array_duplicates(self, data): # Fails creating expected super().test_merge_on_extension_array_duplicates(data) + @skip_nested + def test_transpose(self, data): + super().test_transpose(data) + class TestSetitem(BaseNumPyTests, base.BaseSetitemTests): @skip_nested From 4fb44c535ac25623760569f43a990d036952c610 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Dec 2019 15:32:50 -0600 Subject: [PATCH 02/13] fix typo --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4143ad12ed17f..3dba0abaf7297 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -775,7 +775,7 @@ Reshaping - Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`) - Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`) - Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) -- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtyep (:issue:``) +- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`) - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) - From e18a42607acc0f4040d87fdef1cf4741a9078508 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Dec 2019 15:32:58 -0600 Subject: [PATCH 03/13] remove xpass --- pandas/tests/arithmetic/conftest.py | 19 ------------------- pandas/tests/arithmetic/test_period.py | 12 +++++------- 2 files changed, 5 insertions(+), 26 deletions(-) diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index 1f8fdfd671856..64588af3e3053 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -235,25 +235,6 @@ def box_df_fail(request): return request.param -@pytest.fixture( - params=[ - (pd.Index, False), - (pd.Series, False), - (pd.DataFrame, False), - pytest.param((pd.DataFrame, True), marks=pytest.mark.xfail), - (tm.to_array, False), - ], - ids=id_func, -) -def box_transpose_fail(request): - """ - Fixture similar to `box` but testing both transpose cases for DataFrame, - with the tranpose=True case xfailed. - """ - # GH#23620 - return request.param - - @pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, tm.to_array], ids=id_func) def box_with_array(request): """ diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index ed693d873efb8..ef95402c6215b 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -755,10 +755,10 @@ def test_pi_sub_isub_offset(self): rng -= pd.offsets.MonthEnd(5) tm.assert_index_equal(rng, expected) - def test_pi_add_offset_n_gt1(self, box_transpose_fail): + @pytest.mark.parametrize("transpose", [True, False]) + def test_pi_add_offset_n_gt1(self, box, transpose): # GH#23215 # add offset to PeriodIndex with freq.n > 1 - box, transpose = box_transpose_fail per = pd.Period("2016-01", freq="2M") pi = pd.PeriodIndex([per]) @@ -984,10 +984,9 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, mismatched_freq): with pytest.raises(IncompatibleFrequency, match=msg): rng -= other - def test_parr_add_sub_td64_nat(self, box_transpose_fail): + @pytest.mark.parametrize("transpose", [True, False]) + def test_parr_add_sub_td64_nat(self, box, transpose): # GH#23320 special handling for timedelta64("NaT") - box, transpose = box_transpose_fail - pi = pd.period_range("1994-04-01", periods=9, freq="19D") other = np.timedelta64("NaT") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") @@ -1011,10 +1010,9 @@ def test_parr_add_sub_td64_nat(self, box_transpose_fail): TimedeltaArray._from_sequence(["NaT"] * 9), ], ) - def test_parr_add_sub_tdt64_nat_array(self, box_df_fail, other): + def test_parr_add_sub_tdt64_nat_array(self, box, other): # FIXME: DataFrame fails because when when operating column-wise # timedelta64 entries become NaT and are treated like datetimes - box = box_df_fail pi = pd.period_range("1994-04-01", periods=9, freq="19D") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") From 9be80b7436479eef98931811deaf22ea1d3d93a8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 11:44:39 -0600 Subject: [PATCH 04/13] simplify --- pandas/core/frame.py | 31 +++++++++++++-- pandas/core/generic.py | 69 ---------------------------------- pandas/core/reshape/reshape.py | 4 ++ 3 files changed, 32 insertions(+), 72 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fde3d1657b4f2..0278f900d0c8c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2501,7 +2501,7 @@ def memory_usage(self, index=True, deep=False): ) return result - def transpose(self, *args, **kwargs): + def transpose(self, *args, copy=False, **kwargs): """ Transpose index and columns. @@ -2593,8 +2593,33 @@ def transpose(self, *args, **kwargs): 1 object dtype: object """ - nv.validate_transpose(args, dict()) - return super().transpose(1, 0, **kwargs) + if args == () or args == (None,): + args = (1, 0) + else: + raise ValueError( + "the 'axes' parameter is not supported in pandas.DataFrame.transpose" + ) + # construct the args + + if self._is_homogeneous_type and is_extension_array_dtype(self.iloc[:, 0]): + dtype = self.dtypes.iloc[0] + arr_type = dtype.construct_array_type() + values = self.values + + new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values] + result = self._constructor( + dict(zip(self.index, new_values)), index=self.columns + ) + + else: + new_values = self.values.transpose(args) + if copy: + new_values = new_values.copy() + result = self._constructor( + new_values, index=self.columns, columns=self.index + ) + + return result.__finalize__(self) T = property(transpose) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8072c5a984e86..808eff45e88a4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -684,75 +684,6 @@ def _set_axis(self, axis, labels): self._data.set_axis(axis, labels) self._clear_item_cache() - def transpose(self, *args, **kwargs): - """ - Permute the dimensions of the %(klass)s - - Parameters - ---------- - args : %(args_transpose)s - copy : bool, default False - Make a copy of the underlying data. Mixed-dtype data will - always result in a copy - **kwargs - Additional keyword arguments will be passed to the function. - - Returns - ------- - y : same as input - - Examples - -------- - >>> p.transpose(2, 0, 1) - >>> p.transpose(2, 0, 1, copy=True) - """ - - # construct the args - axes, kwargs = self._construct_axes_from_arguments( - args, kwargs, require_all=True - ) - axes_names = tuple(self._get_axis_name(axes[a]) for a in self._AXIS_ORDERS) - axes_numbers = tuple(self._get_axis_number(axes[a]) for a in self._AXIS_ORDERS) - - # we must have unique axes - if len(axes) != len(set(axes)): - raise ValueError(f"Must specify {self._AXIS_LEN} unique axes") - - new_axes = self._construct_axes_dict_from( - self, [self._get_axis(x) for x in axes_names] - ) - - if ( - self._is_homogeneous_type - and len(self._data.blocks) - and is_extension_array_dtype(self._data.blocks[0].dtype) - ): - kwargs.pop("copy", None) # by definition, we're copying - dtype = self._data.blocks[0].dtype - arr_type = dtype.construct_array_type() - - # Slow, but unavoidable with 1D EAs. - new_values = [] - for i in range(len(self)): - new_values.append( - arr_type._from_sequence( - [block.values[i] for block in self._data.blocks], dtype=dtype - ) - ) - columns = new_axes.pop("columns") - new_values = dict(zip(columns, new_values)) - result = self._constructor(new_values, **new_axes) - - else: - new_values = self.values.transpose(axes_numbers) - if kwargs.pop("copy", None) or (len(args) and args[-1]): - new_values = new_values.copy() - - result = self._constructor(new_values, **new_axes) - - nv.validate_transpose(tuple(), kwargs) - return result.__finalize__(self) - def swapaxes(self, axis1, axis2, copy=True): """ Interchange axes and swap values axes appropriately. diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index a8dcc995e48da..85d4cc33d1a1a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -491,6 +491,10 @@ def _unstack_extension_series(series, level, fill_value): return concat(out, axis="columns", copy=False, keys=result.columns) +def _transpose_extension_arrays(df): + pass + + def stack(frame, level=-1, dropna=True): """ Convert DataFrame to Series with multi-level Index. Columns become the From bfdfccf2304255c95c7d8169ba18ae2d026c444b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 12:04:38 -0600 Subject: [PATCH 05/13] simplify --- pandas/core/frame.py | 1 + pandas/tests/extension/base/reshaping.py | 2 ++ pandas/tests/extension/json/test_json.py | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 10aa32335ddcc..66faab9df421a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2595,6 +2595,7 @@ def transpose(self, *args, copy=False, **kwargs): 1 object dtype: object """ + nv.validate_transpose(args, dict()) if args == () or args == (None,): args = (1, 0) else: diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index bb9b21b66dbb5..c38ed020afb95 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -309,3 +309,5 @@ def test_transpose(self, data): index=["A", "B"], ) self.assert_frame_equal(result, expected) + + self.assert_frame_equal(np.transpose(np.transpose(expected)), expected) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 7e027a65eec3a..51f913b741a81 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -162,6 +162,10 @@ def test_unstack(self, data, index): # this matches otherwise return super().test_unstack(data, index) + @pytest.mark.skip(reason="Inconsistent sizes.") + def test_transpose(self, data): + super().test_transpose(data) + class TestGetitem(BaseJSON, base.BaseGetitemTests): pass From 10d81bd018d2008b5bdf774b89696b09a01cec71 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 12:06:09 -0600 Subject: [PATCH 06/13] steal tests --- pandas/tests/frame/test_operators.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index f3e61dffb500d..4ca1ee50e97b3 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -891,6 +891,22 @@ def test_no_warning(self, all_arithmetic_operators): class TestTranspose: + @pytest.mark.parametrize( + "ser", + [ + pd.date_range("2016-04-05 04:30", periods=3, tz="UTC"), + pd.period_range("1994", freq="A", periods=3), + pd.period_range("1969", freq="9s", periods=1), + pd.date_range("2016-04-05 04:30", periods=3).astype("category"), + pd.date_range("2016-04-05 04:30", periods=3, tz="UTC").astype("category"), + ], + ) + def test_transpose_retains_extension_dtype(self, ser): + # case with more than 1 column, must have same dtype + df = pd.DataFrame({"a": ser, "b": ser}) + result = df.T + assert (result.dtypes == ser.dtype).all() + def test_transpose_tzaware_1col_single_tz(self): # GH#26825 dti = pd.date_range("2016-04-05 04:30", periods=3, tz="UTC") From 132472d46748e8bf52a955e471267942515283fa Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 12:11:28 -0600 Subject: [PATCH 07/13] update docs --- pandas/core/frame.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 66faab9df421a..5c373ec2b5fab 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2503,7 +2503,7 @@ def memory_usage(self, index=True, deep=False): ) return result - def transpose(self, *args, copy=False, **kwargs): + def transpose(self, *args, copy=False): """ Transpose index and columns. @@ -2513,9 +2513,14 @@ def transpose(self, *args, copy=False, **kwargs): Parameters ---------- - *args, **kwargs - Additional arguments and keywords have no effect but might be - accepted for compatibility with numpy. + *args : tuple, optional + Accepted for compatibility with NumPy. + copy : bool, default False + Whether to copy the data after transposing, even for DataFrames + with a single dtype. + + Note that a copy is always required for mixed dtype DataFrames, + or for DataFrames with any extension types. Returns ------- @@ -2596,12 +2601,6 @@ def transpose(self, *args, copy=False, **kwargs): dtype: object """ nv.validate_transpose(args, dict()) - if args == () or args == (None,): - args = (1, 0) - else: - raise ValueError( - "the 'axes' parameter is not supported in pandas.DataFrame.transpose" - ) # construct the args if self._is_homogeneous_type and is_extension_array_dtype(self.iloc[:, 0]): @@ -2615,7 +2614,7 @@ def transpose(self, *args, copy=False, **kwargs): ) else: - new_values = self.values.transpose(args) + new_values = self.values.T if copy: new_values = new_values.copy() result = self._constructor( From 6aae8d69fa26ed1aa657127daae61ad75fd2ada1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 13:17:59 -0600 Subject: [PATCH 08/13] fixup --- pandas/core/frame.py | 8 +++++--- pandas/tests/extension/base/reshaping.py | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5c373ec2b5fab..ea68f41ca348f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2503,7 +2503,7 @@ def memory_usage(self, index=True, deep=False): ) return result - def transpose(self, *args, copy=False): + def transpose(self, *args, copy: bool = False): """ Transpose index and columns. @@ -2603,8 +2603,10 @@ def transpose(self, *args, copy=False): nv.validate_transpose(args, dict()) # construct the args - if self._is_homogeneous_type and is_extension_array_dtype(self.iloc[:, 0]): - dtype = self.dtypes.iloc[0] + dtypes = list(self.dtypes) + if self._is_homogeneous_type and is_extension_array_dtype(dtypes[0]): + # is_homogeneous_type implies that we have at least one column. + dtype = dtypes[0] arr_type = dtype.construct_array_type() values = self.values diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index c38ed020afb95..89c9ed3674a66 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -309,5 +309,5 @@ def test_transpose(self, data): index=["A", "B"], ) self.assert_frame_equal(result, expected) - - self.assert_frame_equal(np.transpose(np.transpose(expected)), expected) + self.assert_frame_equal(np.transpose(np.transpose(df)), df) + self.assert_frame_equal(np.transpose(np.transpose(df[["A"]])), df[["A"]]) From 9d703a29bfe483db8ba6583568a256ee44e56eed Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 6 Dec 2019 15:33:48 -0600 Subject: [PATCH 09/13] empty --- pandas/core/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 648711bc95f7c..44ab039e7853b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2595,8 +2595,7 @@ def transpose(self, *args, copy: bool = False): # construct the args dtypes = list(self.dtypes) - if self._is_homogeneous_type and is_extension_array_dtype(dtypes[0]): - # is_homogeneous_type implies that we have at least one column. + if self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]): dtype = dtypes[0] arr_type = dtype.construct_array_type() values = self.values From feecee8dc60d661cd26e19c3b4e46e85b40fb774 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 12 Dec 2019 08:02:54 -0600 Subject: [PATCH 10/13] box --- pandas/core/frame.py | 1 + pandas/core/reshape/reshape.py | 4 ---- pandas/tests/arithmetic/test_period.py | 18 +++++++++--------- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2dfbbac3f4492..889cfd43ea582 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2595,6 +2595,7 @@ def transpose(self, *args, copy: bool = False): dtypes = list(self.dtypes) if self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]): + # We have EAs with the same dtype. We can preserve that dtype in transpose. dtype = dtypes[0] arr_type = dtype.construct_array_type() values = self.values diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 85d4cc33d1a1a..a8dcc995e48da 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -491,10 +491,6 @@ def _unstack_extension_series(series, level, fill_value): return concat(out, axis="columns", copy=False, keys=result.columns) -def _transpose_extension_arrays(df): - pass - - def stack(frame, level=-1, dropna=True): """ Convert DataFrame to Series with multi-level Index. Columns become the diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index ef95402c6215b..f9fc766c29f8f 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -756,7 +756,7 @@ def test_pi_sub_isub_offset(self): tm.assert_index_equal(rng, expected) @pytest.mark.parametrize("transpose", [True, False]) - def test_pi_add_offset_n_gt1(self, box, transpose): + def test_pi_add_offset_n_gt1(self, box_with_array, transpose): # GH#23215 # add offset to PeriodIndex with freq.n > 1 @@ -765,8 +765,8 @@ def test_pi_add_offset_n_gt1(self, box, transpose): expected = pd.PeriodIndex(["2016-03"], freq="2M") - pi = tm.box_expected(pi, box, transpose=transpose) - expected = tm.box_expected(expected, box, transpose=transpose) + pi = tm.box_expected(pi, box_with_array, transpose=transpose) + expected = tm.box_expected(expected, box_with_array, transpose=transpose) result = pi + per.freq tm.assert_equal(result, expected) @@ -985,14 +985,14 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, mismatched_freq): rng -= other @pytest.mark.parametrize("transpose", [True, False]) - def test_parr_add_sub_td64_nat(self, box, transpose): + def test_parr_add_sub_td64_nat(self, box_with_array, transpose): # GH#23320 special handling for timedelta64("NaT") pi = pd.period_range("1994-04-01", periods=9, freq="19D") other = np.timedelta64("NaT") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") - obj = tm.box_expected(pi, box, transpose=transpose) - expected = tm.box_expected(expected, box, transpose=transpose) + obj = tm.box_expected(pi, box_with_array, transpose=transpose) + expected = tm.box_expected(expected, box_with_array, transpose=transpose) result = obj + other tm.assert_equal(result, expected) @@ -1010,15 +1010,15 @@ def test_parr_add_sub_td64_nat(self, box, transpose): TimedeltaArray._from_sequence(["NaT"] * 9), ], ) - def test_parr_add_sub_tdt64_nat_array(self, box, other): + def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other): # FIXME: DataFrame fails because when when operating column-wise # timedelta64 entries become NaT and are treated like datetimes pi = pd.period_range("1994-04-01", periods=9, freq="19D") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") - obj = tm.box_expected(pi, box) - expected = tm.box_expected(expected, box) + obj = tm.box_expected(pi, box_with_array) + expected = tm.box_expected(expected, box_with_array) result = obj + other tm.assert_equal(result, expected) From 9abd6c279b5002595370cc51f892309f718f41aa Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 20 Dec 2019 06:49:43 -0600 Subject: [PATCH 11/13] update test. --- pandas/tests/arithmetic/test_period.py | 3 --- pandas/tests/extension/json/test_json.py | 2 +- pandas/tests/frame/test_operators.py | 9 ++++++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index f9fc766c29f8f..621c759cb550f 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1011,9 +1011,6 @@ def test_parr_add_sub_td64_nat(self, box_with_array, transpose): ], ) def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other): - # FIXME: DataFrame fails because when when operating column-wise - # timedelta64 entries become NaT and are treated like datetimes - pi = pd.period_range("1994-04-01", periods=9, freq="19D") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 51f913b741a81..476dc4f1e6fa1 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -162,7 +162,7 @@ def test_unstack(self, data, index): # this matches otherwise return super().test_unstack(data, index) - @pytest.mark.skip(reason="Inconsistent sizes.") + @pytest.mark.xfail(reason="Inconsistent sizes.") def test_transpose(self, data): super().test_transpose(data) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 4ca1ee50e97b3..137a284931725 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -897,15 +897,18 @@ class TestTranspose: pd.date_range("2016-04-05 04:30", periods=3, tz="UTC"), pd.period_range("1994", freq="A", periods=3), pd.period_range("1969", freq="9s", periods=1), - pd.date_range("2016-04-05 04:30", periods=3).astype("category"), + pytest.param( + pd.date_range("2016-04-05 04:30", periods=3).astype("category"), + marks=pytest.mark.xfail(reason="buggy"), + ), pd.date_range("2016-04-05 04:30", periods=3, tz="UTC").astype("category"), ], ) def test_transpose_retains_extension_dtype(self, ser): # case with more than 1 column, must have same dtype df = pd.DataFrame({"a": ser, "b": ser}) - result = df.T - assert (result.dtypes == ser.dtype).all() + result = df.T.T + tm.assert_frame_equal(result, df) def test_transpose_tzaware_1col_single_tz(self): # GH#26825 From f6b3c37ed9f54cd217972d4720164dce79d6ef4f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 20 Dec 2019 08:13:06 -0600 Subject: [PATCH 12/13] filter --- pandas/tests/frame/test_operators.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 137a284931725..5416a33c2a222 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -1,5 +1,6 @@ from decimal import Decimal import operator +import warnings import numpy as np import pytest @@ -908,7 +909,14 @@ def test_transpose_retains_extension_dtype(self, ser): # case with more than 1 column, must have same dtype df = pd.DataFrame({"a": ser, "b": ser}) result = df.T.T - tm.assert_frame_equal(result, df) + # For Categorical[datetime64[ns, tz]], we have a warning that the + # dtype will change to preserve tz in the future. We don't care, + # since happeneing to both sides here. + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", "Converting timezone-aware", FutureWarning + ) + tm.assert_frame_equal(result, df) def test_transpose_tzaware_1col_single_tz(self): # GH#26825 From 6d9daa8b676b6556dfa5c57058b156220afb312a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 26 Dec 2019 17:11:04 -0800 Subject: [PATCH 13/13] fixup unused import --- pandas/tests/frame/test_operators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index aa0c8e4ad640b..a4f1c0688b144 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -1,6 +1,5 @@ from decimal import Decimal import operator -import warnings import numpy as np import pytest