Skip to content

Commit

Permalink
BUG: Convert output type in Excel for MultiIndex with period levels (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ZKaoChi authored Nov 22, 2024
1 parent eaa8b47 commit ee0902a
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,7 @@ I/O
- Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`)
- Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`)
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
- Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`)
- Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
- Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`)
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
Expand Down
8 changes: 8 additions & 0 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
DataFrame,
Index,
MultiIndex,
Period,
PeriodIndex,
)
import pandas.core.common as com
Expand Down Expand Up @@ -803,6 +804,9 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
allow_fill=levels._can_hold_na,
fill_value=levels._na_value,
)
# GH#60099
if isinstance(values[0], Period):
values = values.to_timestamp()

for i, span_val in spans.items():
mergestart, mergeend = None, None
Expand All @@ -827,6 +831,10 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
# Format hierarchical rows with non-merged values.
for indexcolvals in zip(*self.df.index):
for idx, indexcolval in enumerate(indexcolvals):
# GH#60099
if isinstance(indexcolval, Period):
indexcolval = indexcolval.to_timestamp()

yield CssExcelCell(
row=self.rowcounter + idx,
col=gcolidx,
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/io/excel/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

from pandas import (
DataFrame,
MultiIndex,
Timestamp,
period_range,
read_excel,
)
import pandas._testing as tm
Expand Down Expand Up @@ -333,3 +336,26 @@ def test_styler_to_s3(s3_public_bucket, s3so):
f"s3://{mock_bucket_name}/{target_file}", index_col=0, storage_options=s3so
)
tm.assert_frame_equal(result, df)


@pytest.mark.parametrize("merge_cells", [True, False, "columns"])
def test_format_hierarchical_rows_periodindex(merge_cells):
# GH#60099
df = DataFrame(
{"A": [1, 2]},
index=MultiIndex.from_arrays(
[
period_range(start="2006-10-06", end="2006-10-07", freq="D"),
["X", "Y"],
],
names=["date", "category"],
),
)
formatter = ExcelFormatter(df, merge_cells=merge_cells)
formatted_cells = formatter._format_hierarchical_rows()

for cell in formatted_cells:
if cell.row != 0 and cell.col == 0:
assert isinstance(
cell.val, Timestamp
), "Period should be converted to Timestamp"
38 changes: 38 additions & 0 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
MultiIndex,
date_range,
option_context,
period_range,
)
import pandas._testing as tm

Expand Down Expand Up @@ -335,6 +336,43 @@ def test_multiindex_interval_datetimes(self, tmp_excel):
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("merge_cells", [True, False, "columns"])
def test_excel_round_trip_with_periodindex(self, tmp_excel, merge_cells):
# GH#60099
df = DataFrame(
{"A": [1, 2]},
index=MultiIndex.from_arrays(
[
period_range(start="2006-10-06", end="2006-10-07", freq="D"),
["X", "Y"],
],
names=["date", "category"],
),
)
df.to_excel(tmp_excel, merge_cells=merge_cells)
result = pd.read_excel(tmp_excel, index_col=[0, 1])
expected = DataFrame(
{"A": [1, 2]},
MultiIndex.from_arrays(
[
[
pd.to_datetime("2006-10-06 00:00:00"),
pd.to_datetime("2006-10-07 00:00:00"),
],
["X", "Y"],
],
names=["date", "category"],
),
)
time_format = (
"datetime64[s]" if tmp_excel.endswith(".ods") else "datetime64[us]"
)
expected.index = expected.index.set_levels(
expected.index.levels[0].astype(time_format), level=0
)

tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"engine,ext",
Expand Down

0 comments on commit ee0902a

Please sign in to comment.