diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 909e2454b7cb8..ce37fd9d8e623 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -602,6 +602,7 @@ Other API changes methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`) - Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`) - Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`) +- The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`) - :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`) - diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index 3ef5ba5835d79..8e3ba57cbaba4 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -203,9 +203,6 @@ def coo_to_sparse_series( ser = ser.sort_index() ser = ser.astype(SparseDtype(ser.dtype)) if dense_index: - # is there a better constructor method to use here? - i = range(A.shape[0]) - j = range(A.shape[1]) - ind = MultiIndex.from_product([i, j]) + ind = MultiIndex.from_product([A.row, A.col]) ser = ser.reindex(ind) return ser diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index 9ac0d9d0401ed..7d6a9e18a26c6 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -218,14 +218,11 @@ def test_series_from_coo(self, dtype, dense_index): A = scipy.sparse.eye(3, format="coo", dtype=dtype) result = pd.Series.sparse.from_coo(A, dense_index=dense_index) - # TODO: GH49560: scipy.sparse.eye always has A.row and A.col dtype as int32. - # fix index_dtype to follow scipy.sparse convention (always int32)? - index_dtype = np.int64 if dense_index else np.int32 index = pd.MultiIndex.from_tuples( [ - np.array([0, 0], dtype=index_dtype), - np.array([1, 1], dtype=index_dtype), - np.array([2, 2], dtype=index_dtype), + np.array([0, 0], dtype=np.int32), + np.array([1, 1], dtype=np.int32), + np.array([2, 2], dtype=np.int32), ], ) expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index)