Skip to content

Commit

Permalink
PERF: ArrowExtensionArray.to_numpy(dtype=object) (#51227)
Browse files Browse the repository at this point in the history
* PERF: ArrowExtensionArray.to_numpy(dtype=object)

* gh refs
  • Loading branch information
lukemanley authored Feb 9, 2023
1 parent 12faa2e commit 7ffc0ad
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 6 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ Performance improvements
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` (:issue:`50248`, :issue:`50632`)
- Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`)
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`, :issue:`51227`)
- Performance improvement when parsing strings to :class:`BooleanDtype` (:issue:`50613`)
- Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
- Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,12 +842,12 @@ def to_numpy(
na_value = self.dtype.na_value

pa_type = self._data.type
if (
is_object_dtype(dtype)
or pa.types.is_timestamp(pa_type)
or pa.types.is_duration(pa_type)
):
if pa.types.is_timestamp(pa_type) or pa.types.is_duration(pa_type):
result = np.array(list(self), dtype=dtype)
elif is_object_dtype(dtype) and self._hasna:
result = np.empty(len(self), dtype=object)
mask = ~self.isna()
result[mask] = np.asarray(self[mask]._data)
else:
result = np.asarray(self._data, dtype=dtype)
if copy or self._hasna:
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1525,6 +1525,16 @@ def test_to_numpy_with_defaults(data):
tm.assert_numpy_array_equal(result, expected)


def test_to_numpy_int_with_na():
# GH51227: ensure to_numpy does not convert int to float
data = [1, None]
arr = pd.array(data, dtype="int64[pyarrow]")
result = arr.to_numpy()
expected = np.array([1, pd.NA], dtype=object)
assert isinstance(result[0], int)
tm.assert_numpy_array_equal(result, expected)


def test_setitem_null_slice(data):
# GH50248
orig = data.copy()
Expand Down

0 comments on commit 7ffc0ad

Please sign in to comment.