From f4aacbbbf02ff5b9c6defb670ce0dbdc6fa5eb7a Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 21 Jan 2023 20:12:28 +0000 Subject: [PATCH 1/2] API: Harmonize dtype for index levels for Series.sparse.from_coo --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/arrays/sparse/scipy_sparse.py | 5 +---- pandas/tests/arrays/sparse/test_accessor.py | 9 +++------ 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 0ceda331de790..e6261c2b104c8 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -600,6 +600,7 @@ Other API changes methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`) - Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`) - Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`) +- The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`xxxxx`) - :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`) - diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index 3ef5ba5835d79..8e3ba57cbaba4 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -203,9 +203,6 @@ def coo_to_sparse_series( ser = ser.sort_index() ser = ser.astype(SparseDtype(ser.dtype)) if dense_index: - # is there a better constructor method to use here? - i = range(A.shape[0]) - j = range(A.shape[1]) - ind = MultiIndex.from_product([i, j]) + ind = MultiIndex.from_product([A.row, A.col]) ser = ser.reindex(ind) return ser diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index 9ac0d9d0401ed..7d6a9e18a26c6 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -218,14 +218,11 @@ def test_series_from_coo(self, dtype, dense_index): A = scipy.sparse.eye(3, format="coo", dtype=dtype) result = pd.Series.sparse.from_coo(A, dense_index=dense_index) - # TODO: GH49560: scipy.sparse.eye always has A.row and A.col dtype as int32. - # fix index_dtype to follow scipy.sparse convention (always int32)? - index_dtype = np.int64 if dense_index else np.int32 index = pd.MultiIndex.from_tuples( [ - np.array([0, 0], dtype=index_dtype), - np.array([1, 1], dtype=index_dtype), - np.array([2, 2], dtype=index_dtype), + np.array([0, 0], dtype=np.int32), + np.array([1, 1], dtype=np.int32), + np.array([2, 2], dtype=np.int32), ], ) expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index) From 7a9206a2938b1007aa341c1c697d957cbfac99ef Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 21 Jan 2023 20:18:38 +0000 Subject: [PATCH 2/2] add gh number --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index e6261c2b104c8..0ea8a5db4afb3 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -600,7 +600,7 @@ Other API changes methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`) - Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`) - Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`) -- The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`xxxxx`) +- The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`) - :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`) -