From ee0902a832b7fa3e5821ada176566301791e09ec Mon Sep 17 00:00:00 2001 From: ZKaoChi <1953542921@qq.com> Date: Sat, 23 Nov 2024 04:20:39 +0800 Subject: [PATCH] BUG: Convert output type in Excel for MultiIndex with period levels (#60182) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/io/formats/excel.py | 8 ++++++ pandas/tests/io/excel/test_style.py | 26 ++++++++++++++++++ pandas/tests/io/excel/test_writers.py | 38 +++++++++++++++++++++++++++ 4 files changed, 73 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 120ee978292d6..1d55fc3ed7b84 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -690,6 +690,7 @@ I/O - Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`) - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) +- Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`) - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`) - Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`) - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 6a3e215de3f96..5fde6577e9f95 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -37,6 +37,7 @@ DataFrame, Index, MultiIndex, + Period, PeriodIndex, ) import pandas.core.common as com @@ -803,6 +804,9 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]: allow_fill=levels._can_hold_na, fill_value=levels._na_value, ) + # GH#60099 + if isinstance(values[0], Period): + values = values.to_timestamp() for i, span_val in spans.items(): mergestart, mergeend = None, None @@ -827,6 +831,10 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]: # Format hierarchical rows with non-merged values. for indexcolvals in zip(*self.df.index): for idx, indexcolval in enumerate(indexcolvals): + # GH#60099 + if isinstance(indexcolval, Period): + indexcolval = indexcolval.to_timestamp() + yield CssExcelCell( row=self.rowcounter + idx, col=gcolidx, diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index f70e65e34c584..71ef1201e523f 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -9,6 +9,9 @@ from pandas import ( DataFrame, + MultiIndex, + Timestamp, + period_range, read_excel, ) import pandas._testing as tm @@ -333,3 +336,26 @@ def test_styler_to_s3(s3_public_bucket, s3so): f"s3://{mock_bucket_name}/{target_file}", index_col=0, storage_options=s3so ) tm.assert_frame_equal(result, df) + + +@pytest.mark.parametrize("merge_cells", [True, False, "columns"]) +def test_format_hierarchical_rows_periodindex(merge_cells): + # GH#60099 + df = DataFrame( + {"A": [1, 2]}, + index=MultiIndex.from_arrays( + [ + period_range(start="2006-10-06", end="2006-10-07", freq="D"), + ["X", "Y"], + ], + names=["date", "category"], + ), + ) + formatter = ExcelFormatter(df, merge_cells=merge_cells) + formatted_cells = formatter._format_hierarchical_rows() + + for cell in formatted_cells: + if cell.row != 0 and cell.col == 0: + assert isinstance( + cell.val, Timestamp + ), "Period should be converted to Timestamp" diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 19fe9855dbb85..18948de72200a 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -23,6 +23,7 @@ MultiIndex, date_range, option_context, + period_range, ) import pandas._testing as tm @@ -335,6 +336,43 @@ def test_multiindex_interval_datetimes(self, tmp_excel): ) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("merge_cells", [True, False, "columns"]) + def test_excel_round_trip_with_periodindex(self, tmp_excel, merge_cells): + # GH#60099 + df = DataFrame( + {"A": [1, 2]}, + index=MultiIndex.from_arrays( + [ + period_range(start="2006-10-06", end="2006-10-07", freq="D"), + ["X", "Y"], + ], + names=["date", "category"], + ), + ) + df.to_excel(tmp_excel, merge_cells=merge_cells) + result = pd.read_excel(tmp_excel, index_col=[0, 1]) + expected = DataFrame( + {"A": [1, 2]}, + MultiIndex.from_arrays( + [ + [ + pd.to_datetime("2006-10-06 00:00:00"), + pd.to_datetime("2006-10-07 00:00:00"), + ], + ["X", "Y"], + ], + names=["date", "category"], + ), + ) + time_format = ( + "datetime64[s]" if tmp_excel.endswith(".ods") else "datetime64[us]" + ) + expected.index = expected.index.set_levels( + expected.index.levels[0].astype(time_format), level=0 + ) + + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "engine,ext",