From 7ffc0ad1e803513f7a24e351c198fc0308c1cc9f Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Thu, 9 Feb 2023 12:33:43 -0500
Subject: [PATCH] PERF: ArrowExtensionArray.to_numpy(dtype=object) (#51227)

* PERF: ArrowExtensionArray.to_numpy(dtype=object)

* gh refs
---
 doc/source/whatsnew/v2.0.0.rst       |  2 +-
 pandas/core/arrays/arrow/array.py    | 10 +++++-----
 pandas/tests/extension/test_arrow.py | 10 ++++++++++
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 3aed031bec00b..3c9c249e0f6ea 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -1073,7 +1073,7 @@ Performance improvements
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` (:issue:`50248`, :issue:`50632`)
 - Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`)
-- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
+- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`, :issue:`51227`)
 - Performance improvement when parsing strings to :class:`BooleanDtype` (:issue:`50613`)
 - Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
 - Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index ad10d82c0ca3c..5a72f59cad890 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -842,12 +842,12 @@ def to_numpy(
             na_value = self.dtype.na_value
 
         pa_type = self._data.type
-        if (
-            is_object_dtype(dtype)
-            or pa.types.is_timestamp(pa_type)
-            or pa.types.is_duration(pa_type)
-        ):
+        if pa.types.is_timestamp(pa_type) or pa.types.is_duration(pa_type):
             result = np.array(list(self), dtype=dtype)
+        elif is_object_dtype(dtype) and self._hasna:
+            result = np.empty(len(self), dtype=object)
+            mask = ~self.isna()
+            result[mask] = np.asarray(self[mask]._data)
         else:
             result = np.asarray(self._data, dtype=dtype)
             if copy or self._hasna:
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 51edae326417a..0d3f3e9e9e48c 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -1525,6 +1525,16 @@ def test_to_numpy_with_defaults(data):
     tm.assert_numpy_array_equal(result, expected)
 
 
+def test_to_numpy_int_with_na():
+    # GH51227: ensure to_numpy does not convert int to float
+    data = [1, None]
+    arr = pd.array(data, dtype="int64[pyarrow]")
+    result = arr.to_numpy()
+    expected = np.array([1, pd.NA], dtype=object)
+    assert isinstance(result[0], int)
+    tm.assert_numpy_array_equal(result, expected)
+
+
 def test_setitem_null_slice(data):
     # GH50248
     orig = data.copy()