Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Series.tolist to return None in place of pd.NA #51056

Closed
wants to merge 11 commits into from
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,7 @@ I/O
- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
- Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
- Bug in :func:`read_csv` unnecessarily overflowing for extension array dtype when containing ``NA`` (:issue:`32134`)
- Bug in :meth:`Series.tolist` not converting ``NA`` to ``None`` (:issue:`29738`)
- Bug in :meth:`DataFrame.to_dict` not converting ``NA`` to ``None`` (:issue:`50795`)
- Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`)
- Bug in :func:`read_xml` where file-like objects failed when iterparse is used (:issue:`50641`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,10 @@ def take(
indices_array[indices_array < 0] += len(self._data)
return type(self)(self._data.take(indices_array))

@doc(ExtensionArray.tolist)
def tolist(self) -> list:
return self._data.to_pylist()

@doc(ExtensionArray.to_numpy)
def to_numpy(
self,
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,11 +435,13 @@ def to_numpy(
return data

@doc(ExtensionArray.tolist)
def tolist(self):
def tolist(self) -> list:
if self.ndim > 1:
return [x.tolist() for x in self]
dtype = None if self._hasna else self._data.dtype
return self.to_numpy(dtype=dtype).tolist()
if self._hasna:
# pd.NA -> None (python native types)
return self.to_numpy(na_value=None).tolist()
return self._data.tolist()

@overload
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
Expand Down
15 changes: 9 additions & 6 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,6 @@ class BaseStringArray(ExtensionArray):
Mixin class for StringArray, ArrowStringArray.
"""

@doc(ExtensionArray.tolist)
def tolist(self):
if self.ndim > 1:
return [x.tolist() for x in self]
return list(self.to_numpy())


class StringArray(BaseStringArray, PandasArray):
"""
Expand Down Expand Up @@ -514,6 +508,15 @@ def searchsorted(
)
return super().searchsorted(value=value, side=side, sorter=sorter)

@doc(ExtensionArray.tolist)
def tolist(self) -> list:
if self.ndim > 1:
return [x.tolist() for x in self]
if self._hasna:
# pd.NA -> None (python native types)
return self.to_numpy(na_value=None).tolist()
return self._ndarray.tolist()

def _cmp_method(self, other, op):
from pandas.arrays import BooleanArray

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,12 @@ def test_astype_str(self, data, request):
)
super().test_astype_str(data)

def test_tolist(self, data):
result = pd.Series(data).tolist()
expected = [None if pd.isna(v) else v for v in data]
assert isinstance(result, list)
assert result == expected


class TestConstructors(base.BaseConstructorsTests):
def test_from_dtype(self, data, request):
Expand Down Expand Up @@ -782,6 +788,12 @@ class TestBaseInterface(base.BaseInterfaceTests):
def test_view(self, data):
super().test_view(data)

def test_tolist(self, data):
result = pd.Series(data).tolist()
expected = [None if pd.isna(v) else v for v in data]
assert isinstance(result, list)
assert result == expected


class TestBaseMissing(base.BaseMissingTests):
def test_dropna_array(self, data_missing):
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ class TestDtype(base.BaseDtypeTests):


class TestInterface(base.BaseInterfaceTests):
pass
def test_tolist(self, data):
result = pd.Series(data).tolist()
expected = [None if pd.isna(v) else v for v in data]
assert isinstance(result, list)
assert result == expected


class TestConstructors(base.BaseConstructorsTests):
Expand Down Expand Up @@ -229,7 +233,11 @@ def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting):


class TestCasting(base.BaseCastingTests):
pass
def test_tolist(self, data):
result = pd.Series(data).tolist()
expected = [None if pd.isna(v) else v for v in data]
assert isinstance(result, list)
assert result == expected


class TestGroupby(base.BaseGroupbyTests):
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/extension/test_floating.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,11 @@ def _compare_other(self, s, data, op, other):


class TestInterface(base.BaseInterfaceTests):
pass
def test_tolist(self, data):
result = pd.Series(data).tolist()
expected = [None if pd.isna(v) else v for v in data]
assert isinstance(result, list)
assert result == expected


class TestConstructors(base.BaseConstructorsTests):
Expand Down Expand Up @@ -177,7 +181,11 @@ class TestMethods(base.BaseMethodsTests):


class TestCasting(base.BaseCastingTests):
pass
def test_tolist(self, data):
result = pd.Series(data).tolist()
expected = [None if pd.isna(v) else v for v in data]
assert isinstance(result, list)
assert result == expected


class TestGroupby(base.BaseGroupbyTests):
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/extension/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,11 @@ def _compare_other(self, s, data, op, other):


class TestInterface(base.BaseInterfaceTests):
pass
def test_tolist(self, data):
result = pd.Series(data).tolist()
expected = [None if pd.isna(v) else v for v in data]
assert isinstance(result, list)
assert result == expected


class TestConstructors(base.BaseConstructorsTests):
Expand Down Expand Up @@ -205,7 +209,11 @@ class TestMethods(base.BaseMethodsTests):


class TestCasting(base.BaseCastingTests):
pass
def test_tolist(self, data):
result = pd.Series(data).tolist()
expected = [None if pd.isna(v) else v for v in data]
assert isinstance(result, list)
assert result == expected


class TestGroupby(base.BaseGroupbyTests):
Expand Down