pandas-dev · jreback · Dec 27, 2019 · Dec 5, 2019 · Dec 5, 2019 · Dec 5, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -816,6 +816,7 @@ Reshaping
 - Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`)
 - Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`)
 - Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`)
+- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`)
 - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`)
 -
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2503,7 +2503,7 @@ def memory_usage(self, index=True, deep=False):
             )
         return result
 
-    def transpose(self, *args, **kwargs):
+    def transpose(self, *args, copy=False):
         """
         Transpose index and columns.
 
@@ -2513,9 +2513,14 @@ def transpose(self, *args, **kwargs):
 
         Parameters
         ----------
-        *args, **kwargs
-            Additional arguments and keywords have no effect but might be
-            accepted for compatibility with numpy.
+        *args : tuple, optional
+            Accepted for compatibility with NumPy.
+        copy : bool, default False
+            Whether to copy the data after transposing, even for DataFrames
+            with a single dtype.
+
+            Note that a copy is always required for mixed dtype DataFrames,
+            or for DataFrames with any extension types.
 
         Returns
         -------
@@ -2596,7 +2601,27 @@ def transpose(self, *args, **kwargs):
         dtype: object
         """
         nv.validate_transpose(args, dict())
-        return super().transpose(1, 0, **kwargs)
+        # construct the args
+
+        if self._is_homogeneous_type and is_extension_array_dtype(self.iloc[:, 0]):
+            dtype = self.dtypes.iloc[0]
+            arr_type = dtype.construct_array_type()
+            values = self.values
+
+            new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values]
+            result = self._constructor(
+                dict(zip(self.index, new_values)), index=self.columns
+            )
+
+        else:
+            new_values = self.values.T
+            if copy:
+                new_values = new_values.copy()
+            result = self._constructor(
+                new_values, index=self.columns, columns=self.index
+            )
+
+        return result.__finalize__(self)
 
     T = property(transpose)
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -644,50 +644,6 @@ def _set_axis(self, axis, labels):
         self._data.set_axis(axis, labels)
         self._clear_item_cache()
 
-    def transpose(self, *args, **kwargs):
-        """
-        Permute the dimensions of the %(klass)s
-
-        Parameters
-        ----------
-        args : %(args_transpose)s
-        copy : bool, default False
-            Make a copy of the underlying data. Mixed-dtype data will
-            always result in a copy
-        **kwargs
-            Additional keyword arguments will be passed to the function.
-
-        Returns
-        -------
-        y : same as input
-
-        Examples
-        --------
-        >>> p.transpose(2, 0, 1)
-        >>> p.transpose(2, 0, 1, copy=True)
-        """
-
-        # construct the args
-        axes, kwargs = self._construct_axes_from_arguments(
-            args, kwargs, require_all=True
-        )
-        axes_names = tuple(self._get_axis_name(axes[a]) for a in self._AXIS_ORDERS)
-        axes_numbers = tuple(self._get_axis_number(axes[a]) for a in self._AXIS_ORDERS)
-
-        # we must have unique axes
-        if len(axes) != len(set(axes)):
-            raise ValueError(f"Must specify {self._AXIS_LEN} unique axes")
-
-        new_axes = self._construct_axes_dict_from(
-            self, [self._get_axis(x) for x in axes_names]
-        )
-        new_values = self.values.transpose(axes_numbers)
-        if kwargs.pop("copy", None) or (len(args) and args[-1]):
-            new_values = new_values.copy()
-
-        nv.validate_transpose(tuple(), kwargs)
-        return self._constructor(new_values, **new_axes).__finalize__(self)
-
     def swapaxes(self, axis1, axis2, copy=True):
         """
         Interchange axes and swap values axes appropriately.

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -491,6 +491,10 @@ def _unstack_extension_series(series, level, fill_value):
     return concat(out, axis="columns", copy=False, keys=result.columns)
 
 
+def _transpose_extension_arrays(df):
+    pass
+
+
 def stack(frame, level=-1, dropna=True):
     """
     Convert DataFrame to Series with multi-level Index. Columns become the

diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py
@@ -235,25 +235,6 @@ def box_df_fail(request):
     return request.param
 
 
-@pytest.fixture(
-    params=[
-        (pd.Index, False),
-        (pd.Series, False),
-        (pd.DataFrame, False),
-        pytest.param((pd.DataFrame, True), marks=pytest.mark.xfail),
-        (tm.to_array, False),
-    ],
-    ids=id_func,
-)
-def box_transpose_fail(request):
-    """
-    Fixture similar to `box` but testing both transpose cases for DataFrame,
-    with the tranpose=True case xfailed.
-    """
-    # GH#23620
-    return request.param
-
-
 @pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, tm.to_array], ids=id_func)
 def box_with_array(request):
     """

diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py
@@ -755,10 +755,10 @@ def test_pi_sub_isub_offset(self):
         rng -= pd.offsets.MonthEnd(5)
         tm.assert_index_equal(rng, expected)
 
-    def test_pi_add_offset_n_gt1(self, box_transpose_fail):
+    @pytest.mark.parametrize("transpose", [True, False])
+    def test_pi_add_offset_n_gt1(self, box, transpose):
         # GH#23215
         # add offset to PeriodIndex with freq.n > 1
-        box, transpose = box_transpose_fail
 
         per = pd.Period("2016-01", freq="2M")
         pi = pd.PeriodIndex([per])
@@ -984,10 +984,9 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, mismatched_freq):
         with pytest.raises(IncompatibleFrequency, match=msg):
             rng -= other
 
-    def test_parr_add_sub_td64_nat(self, box_transpose_fail):
+    @pytest.mark.parametrize("transpose", [True, False])
+    def test_parr_add_sub_td64_nat(self, box, transpose):
         # GH#23320 special handling for timedelta64("NaT")
-        box, transpose = box_transpose_fail
-
         pi = pd.period_range("1994-04-01", periods=9, freq="19D")
         other = np.timedelta64("NaT")
         expected = pd.PeriodIndex(["NaT"] * 9, freq="19D")
@@ -1011,10 +1010,9 @@ def test_parr_add_sub_td64_nat(self, box_transpose_fail):
             TimedeltaArray._from_sequence(["NaT"] * 9),
         ],
     )
-    def test_parr_add_sub_tdt64_nat_array(self, box_df_fail, other):
+    def test_parr_add_sub_tdt64_nat_array(self, box, other):
         # FIXME: DataFrame fails because when when operating column-wise
         #  timedelta64 entries become NaT and are treated like datetimes
-        box = box_df_fail
 
         pi = pd.period_range("1994-04-01", periods=9, freq="19D")
         expected = pd.PeriodIndex(["NaT"] * 9, freq="19D")

diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
@@ -295,3 +295,19 @@ def test_ravel(self, data):
         # Check that we have a view, not a copy
         result[0] = result[1]
         assert data[0] == data[1]
+
+    def test_transpose(self, data):
+        df = pd.DataFrame({"A": data[:4], "B": data[:4]}, index=["a", "b", "c", "d"])
+        result = df.T
+        expected = pd.DataFrame(
+            {
+                "a": type(data)._from_sequence([data[0]] * 2, dtype=data.dtype),
+                "b": type(data)._from_sequence([data[1]] * 2, dtype=data.dtype),
+                "c": type(data)._from_sequence([data[2]] * 2, dtype=data.dtype),
+                "d": type(data)._from_sequence([data[3]] * 2, dtype=data.dtype),
+            },
+            index=["A", "B"],
+        )
+        self.assert_frame_equal(result, expected)
+
+        self.assert_frame_equal(np.transpose(np.transpose(expected)), expected)
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
@@ -162,6 +162,10 @@ def test_unstack(self, data, index):
         # this matches otherwise
         return super().test_unstack(data, index)
 
+    @pytest.mark.skip(reason="Inconsistent sizes.")
+    def test_transpose(self, data):
+        super().test_transpose(data)
+
 
 class TestGetitem(BaseJSON, base.BaseGetitemTests):
     pass

diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
@@ -330,6 +330,10 @@ def test_merge_on_extension_array_duplicates(self, data):
         # Fails creating expected
         super().test_merge_on_extension_array_duplicates(data)
 
+    @skip_nested
+    def test_transpose(self, data):
+        super().test_transpose(data)
+
 
 class TestSetitem(BaseNumPyTests, base.BaseSetitemTests):
     @skip_nested

diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py
@@ -891,6 +891,22 @@ def test_no_warning(self, all_arithmetic_operators):
 
 
 class TestTranspose:
+    @pytest.mark.parametrize(
+        "ser",
+        [
+            pd.date_range("2016-04-05 04:30", periods=3, tz="UTC"),
+            pd.period_range("1994", freq="A", periods=3),
+            pd.period_range("1969", freq="9s", periods=1),
+            pd.date_range("2016-04-05 04:30", periods=3).astype("category"),
+            pd.date_range("2016-04-05 04:30", periods=3, tz="UTC").astype("category"),
+        ],
+    )
+    def test_transpose_retains_extension_dtype(self, ser):
+        # case with more than 1 column, must have same dtype
+        df = pd.DataFrame({"a": ser, "b": ser})
+        result = df.T
+        assert (result.dtypes == ser.dtype).all()
+
     def test_transpose_tzaware_1col_single_tz(self):
         # GH#26825
         dti = pd.date_range("2016-04-05 04:30", periods=3, tz="UTC")