Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: complex Series/DataFrame display all complex nans as nan+0j #53844

Merged
merged 3 commits into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 39 additions & 7 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1492,6 +1492,35 @@ def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
).reshape(values.shape)
return formatted

def format_complex_with_na_rep(
values: ArrayLike, formatter: Callable, na_rep: str
):
real_values = np.real(values).ravel() # type: ignore[arg-type]
imag_values = np.imag(values).ravel() # type: ignore[arg-type]
real_mask, imag_mask = isna(real_values), isna(imag_values)
formatted_lst = []
for val, real_val, imag_val, re_isna, im_isna in zip(
values.ravel(),
real_values,
imag_values,
real_mask,
imag_mask,
):
if not re_isna and not im_isna:
formatted_lst.append(formatter(val))
elif not re_isna: # xxx+nanj
formatted_lst.append(f"{formatter(real_val)}+{na_rep}j")
elif not im_isna: # nan[+/-]xxxj
# The imaginary part may either start with a "-" or a space
imag_formatted = formatter(imag_val).strip()
if imag_formatted.startswith("-"):
formatted_lst.append(f"{na_rep}{imag_formatted}j")
else:
formatted_lst.append(f"{na_rep}+{imag_formatted}j")
else: # nan+nanj
formatted_lst.append(f"{na_rep}+{na_rep}j")
return np.array(formatted_lst).reshape(values.shape)

if self.formatter is not None:
return format_with_na_rep(self.values, self.formatter, self.na_rep)

Expand All @@ -1512,11 +1541,12 @@ def format_values_with(float_format):
# need to distinguish complex and float NaNs (GH #53762)
values = self.values
is_complex = is_complex_dtype(values)
if is_complex:
na_rep = f"{na_rep}+{0:.{self.digits}f}j"

# separate the wheat from the chaff
values = format_with_na_rep(values, formatter, na_rep)
if is_complex:
values = format_complex_with_na_rep(values, formatter, na_rep)
else:
values = format_with_na_rep(values, formatter, na_rep)

if self.fixed_width:
if is_complex:
Expand Down Expand Up @@ -1917,7 +1947,7 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[s
real_part, imag_part = [], []
for x in str_complexes:
# Complex numbers are represented as "(-)xxx(+/-)xxxj"
# The split will give [maybe "-", "xxx", "+/-", "xxx", "j", ""]
# The split will give [{"", "-"}, "xxx", "+/-", "xxx", "j", ""]
# Therefore, the imaginary part is the 4th and 3rd last elements,
# and the real part is everything before the imaginary part
trimmed = re.split(r"([j+-])", x)
Expand All @@ -1929,11 +1959,13 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[s
# in the array
n = len(str_complexes)
padded_parts = _trim_zeros_float(real_part + imag_part, decimal)
padded_length = max(len(part) for part in padded_parts) - 1
padded = [
padded_parts[i] # real part (including - or space, possibly "NaN")
+ padded_parts[i + n] # imaginary part (including + or -)
real_pt # real part, possibly NaN
+ imag_pt[0] # +/-
+ f"{imag_pt[1:]:>{padded_length}}" # complex part (no sign), possibly nan
+ "j"
for i in range(n)
for real_pt, imag_pt in zip(padded_parts[:n], padded_parts[n:])
]
return padded

Expand Down
14 changes: 13 additions & 1 deletion pandas/tests/io/formats/test_printing.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,11 +218,23 @@ def test_multiindex_long_element():
([-2, complex("nan"), -1], ["-2.0+0.0j", " NaN+0.0j", "-1.0+0.0j"]),
([-1.23j, complex("nan"), -1], ["-0.00-1.23j", " NaN+0.00j", "-1.00+0.00j"]),
([1.23j, complex("nan"), 1.23], [" 0.00+1.23j", " NaN+0.00j", " 1.23+0.00j"]),
(
[-1.23j, complex(np.nan, np.nan), 1],
["-0.00-1.23j", " NaN+ NaNj", " 1.00+0.00j"],
Charlie-XIAO marked this conversation as resolved.
Show resolved Hide resolved
),
(
[-1.23j, complex(1.2, np.nan), 1],
["-0.00-1.23j", " 1.20+ NaNj", " 1.00+0.00j"],
),
(
[-1.23j, complex(np.nan, -1.2), 1],
["-0.00-1.23j", " NaN-1.20j", " 1.00+0.00j"],
),
],
)
@pytest.mark.parametrize("as_frame", [True, False])
def test_ser_df_with_complex_nans(data, output, as_frame):
# GH#53762
# GH#53762, GH#53841
obj = pd.Series(data)
if as_frame:
obj = obj.to_frame(name="val")
Expand Down