diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index b107a5d3ba100..86f376042f967 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -36,7 +36,6 @@ Other enhancements when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been updated to work correctly with NumPy >= 2 (:issue:`57739`) - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`) -- .. --------------------------------------------------------------------------- .. _whatsnew_230.notable_bug_fixes: diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 005818b0779e6..302a4f3c0b417 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -65,6 +65,7 @@ Other enhancements - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`) - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) +- Fixed bug where median percentile is included in :meth:`~Series.describe` when a blank list is passed (:issue:`60550`). .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 17d4d38c97f33..7291e748dfec7 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -351,13 +351,13 @@ def _refine_percentiles( # explicit conversion of `percentiles` to list percentiles = list(percentiles) + # percentiles are removed if an user explictly pass blank list + if len(percentiles) == 0: + return np.array([]) + # get them all to be in [0, 1] validate_percentile(percentiles) - # median should always be included - if 0.5 not in percentiles: - percentiles.append(0.5) - percentiles = np.asarray(percentiles) # sort and check for duplicates diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index e9206e86b7b08..8418e9db95d42 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -413,3 +413,34 @@ def test_describe_exclude_pa_dtype(self): dtype=pd.ArrowDtype(pa.float64()), ) tm.assert_frame_equal(result, expected) + + def test_refine_percentiles(self): + # GH#60550 + df = DataFrame({"a" : np.arange(0, 10, 1)}) + + # the default behavior is to return [0.25, 0.5, 0.75] + result = df.describe() + expected = DataFrame( + {"a" : [10, df.a.mean(), df.a.std(), 0, 2.25, 4.5, 6.75, 9]}, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"] + ) + + tm.assert_frame_equal(result, expected) + + # no percentiles if an user explictly pass blank list + result = df.describe(percentiles=[]) + expected = DataFrame( + {"a" : [10, df.a.mean(), df.a.std(), 0, 9]}, + index=["count", "mean", "std", "min", "max"] + ) + + tm.assert_frame_equal(result, expected) + + # if a list is passed, it should return with the same values + result = df.describe(percentiles=[0.2]) + expected = DataFrame( + {"a" : [10, df.a.mean(), df.a.std(), 0, 1.8, 9]}, + index=["count", "mean", "std", "min", "20%", "max"] + ) + + tm.assert_frame_equal(result, expected)