From 10dffd1452b50af101f26b331cbfabd3ea217434 Mon Sep 17 00:00:00 2001 From: Stephen Childs Date: Sat, 3 Mar 2018 13:19:34 -0500 Subject: [PATCH 1/4] BUG: Identify SparseDataFrame as sparse The is_sparse function checks to see if an array-like is spare by checking to see if it is an instance of ABCSparseArray or ABCSparseSeries. This commit adds ABCSparseDataFrame to that list -- so it can detect that a DataFrame (which is an array-like object) is sparse. Added a test for this. --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/dtypes/common.py | 6 ++++-- pandas/tests/dtypes/test_common.py | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index bffdf5b7918ca..f352deb4686d2 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -987,6 +987,7 @@ Sparse - Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) - Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) - Bug in constructing a ``SparseArray``: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`) +- Bug in :func:`is_sparse` which would report a ``SparseDataFrame`` as not sparse. Reshaping ^^^^^^^^^ diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 197b35de88896..c6334cc20b171 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -12,7 +12,8 @@ ExtensionDtype) from .generic import (ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, - ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, + ABCSparseArray, ABCSparseSeries, ABCSparseDataFrame, + ABCCategoricalIndex, ABCIndexClass, ABCDateOffset) from .inference import is_string_like, is_list_like from .inference import * # noqa @@ -149,7 +150,8 @@ def is_sparse(arr): False """ - return isinstance(arr, (ABCSparseArray, ABCSparseSeries)) + return isinstance(arr, (ABCSparseArray, ABCSparseSeries, + ABCSparseDataFrame)) def is_scipy_sparse(arr): diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index bfec229d32b22..3244651ab86f0 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -139,6 +139,7 @@ def test_is_object(): def test_is_sparse(check_scipy): assert com.is_sparse(pd.SparseArray([1, 2, 3])) assert com.is_sparse(pd.SparseSeries([1, 2, 3])) + assert com.is_sparse(pd.SparseDataFrame([1, 2, 3])) assert not com.is_sparse(np.array([1, 2, 3])) From 4857b0b4a61850a6f2d4bf5bea59aea2dd380162 Mon Sep 17 00:00:00 2001 From: Stephen Childs Date: Mon, 5 Mar 2018 19:02:07 -0500 Subject: [PATCH 2/4] Revert "BUG: Identify SparseDataFrame as sparse" This reverts commit 10dffd1452b50af101f26b331cbfabd3ea217434. The previous commit's change was not necessary. Will add a docstring to clarify the behaviour of the method. --- doc/source/whatsnew/v0.23.0.txt | 1 - pandas/core/dtypes/common.py | 6 ++---- pandas/tests/dtypes/test_common.py | 1 - 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index f352deb4686d2..bffdf5b7918ca 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -987,7 +987,6 @@ Sparse - Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) - Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) - Bug in constructing a ``SparseArray``: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`) -- Bug in :func:`is_sparse` which would report a ``SparseDataFrame`` as not sparse. Reshaping ^^^^^^^^^ diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index c6334cc20b171..197b35de88896 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -12,8 +12,7 @@ ExtensionDtype) from .generic import (ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, - ABCSparseArray, ABCSparseSeries, ABCSparseDataFrame, - ABCCategoricalIndex, + ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, ABCIndexClass, ABCDateOffset) from .inference import is_string_like, is_list_like from .inference import * # noqa @@ -150,8 +149,7 @@ def is_sparse(arr): False """ - return isinstance(arr, (ABCSparseArray, ABCSparseSeries, - ABCSparseDataFrame)) + return isinstance(arr, (ABCSparseArray, ABCSparseSeries)) def is_scipy_sparse(arr): diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 3244651ab86f0..bfec229d32b22 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -139,7 +139,6 @@ def test_is_object(): def test_is_sparse(check_scipy): assert com.is_sparse(pd.SparseArray([1, 2, 3])) assert com.is_sparse(pd.SparseSeries([1, 2, 3])) - assert com.is_sparse(pd.SparseDataFrame([1, 2, 3])) assert not com.is_sparse(np.array([1, 2, 3])) From 7eb5dec8736a1202f6f98941c0e2de3f6d2c1840 Mon Sep 17 00:00:00 2001 From: Stephen Childs Date: Sat, 3 Mar 2018 10:32:59 -0500 Subject: [PATCH 3/4] DOC: Revise is_sparce docstring Clean up the docstring for is_sparse so it confirms to the documentation style guide. Add additional examples and clarify that is_sparse expect a 1-dimensional array-like. --- pandas/core/dtypes/common.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 197b35de88896..3cf171557bf66 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -120,17 +120,26 @@ def is_object_dtype(arr_or_dtype): def is_sparse(arr): - """ - Check whether an array-like is a pandas sparse array. + """Check whether an array-like is a pandas sparse array. + + Check that the one-dimensional array-like is a pandas sparse array. + Returns True if it is a pandas sparse array, not another type of + sparse array. Parameters ---------- - arr : array-like - The array-like to check. + arr : array-like (1-D) + Array-like to check. Returns ------- - boolean : Whether or not the array-like is a pandas sparse array. + boolean + Whether or not the array-like is a pandas sparse array. + + See Also + -------- + DataFrame.to_sparse : Convert DataFrame to a SparseDataFrame. + Series.to_sparse : Convert Series to SparseSeries. Examples -------- @@ -147,8 +156,18 @@ def is_sparse(arr): >>> from scipy.sparse import bsr_matrix >>> is_sparse(bsr_matrix([1, 2, 3])) False - """ + This function checks that 1 dimensional arrays are sparse. + It will not identify that a `SparseDataFrame` as sparse. + + >>> df = pd.SparseDataFrame([389., 24., 80.5, np.nan], + columns=['max_speed'], + index=['falcon', 'parrot', 'lion', 'monkey']) + >>> is_sparse(df) + False + >>> is_sparse(df.max_speed) + True + """ return isinstance(arr, (ABCSparseArray, ABCSparseSeries)) From 541fa1cbe101d74535c42146e5fae156b6a6c6f9 Mon Sep 17 00:00:00 2001 From: Stephen Childs Date: Thu, 8 Mar 2018 17:44:56 -0500 Subject: [PATCH 4/4] DOC: Adjust is_sparse docstring. Responding to pull request comments. --- pandas/core/dtypes/common.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 3cf171557bf66..1c762c8461fae 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -120,7 +120,8 @@ def is_object_dtype(arr_or_dtype): def is_sparse(arr): - """Check whether an array-like is a pandas sparse array. + """ + Check whether an array-like is a 1-D pandas sparse array. Check that the one-dimensional array-like is a pandas sparse array. Returns True if it is a pandas sparse array, not another type of @@ -128,37 +129,43 @@ def is_sparse(arr): Parameters ---------- - arr : array-like (1-D) + arr : array-like Array-like to check. Returns ------- - boolean + bool Whether or not the array-like is a pandas sparse array. See Also -------- DataFrame.to_sparse : Convert DataFrame to a SparseDataFrame. Series.to_sparse : Convert Series to SparseSeries. + Series.to_dense : Return dense representation of a Series. Examples -------- - >>> is_sparse(np.array([1, 2, 3])) - False - >>> is_sparse(pd.SparseArray([1, 2, 3])) + Returns `True` if the parameter is a 1-D pandas sparse array. + + >>> is_sparse(pd.SparseArray([0, 0, 1, 0])) True - >>> is_sparse(pd.SparseSeries([1, 2, 3])) + >>> is_sparse(pd.SparseSeries([0, 0, 1, 0])) True - This function checks only for pandas sparse array instances, so - sparse arrays from other libraries will return False. + Returns `False` if the parameter is not sparse. + + >>> is_sparse(np.array([0, 0, 1, 0])) + False + >>> is_sparse(pd.Series([0, 1, 0, 0])) + False + + Returns `False` if the parameter is not a pandas sparse array. >>> from scipy.sparse import bsr_matrix - >>> is_sparse(bsr_matrix([1, 2, 3])) + >>> is_sparse(bsr_matrix([0, 1, 0, 0])) False - This function checks that 1 dimensional arrays are sparse. - It will not identify that a `SparseDataFrame` as sparse. + Returns `False` if the parameter has more than one dimension. >>> df = pd.SparseDataFrame([389., 24., 80.5, np.nan], columns=['max_speed'],