From 05709efc7a19a727ed9d1508df9a16a2c4f4db33 Mon Sep 17 00:00:00 2001 From: E33605 Date: Fri, 13 Dec 2024 12:19:37 +0530 Subject: [PATCH 1/7] ENH: removed median percentile to be always included in describe - fixes #60550 - median percentile is default when a blank list of percentiles is passed --- pandas/core/methods/describe.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 17d4d38c97f33..f8dbd669058d3 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -351,13 +351,13 @@ def _refine_percentiles( # explicit conversion of `percentiles` to list percentiles = list(percentiles) + # median should be included only if blank list is passed + if len(percentiles) == 0: + percentiles.append(0.5) + # get them all to be in [0, 1] validate_percentile(percentiles) - # median should always be included - if 0.5 not in percentiles: - percentiles.append(0.5) - percentiles = np.asarray(percentiles) # sort and check for duplicates From 5c745f32577fd025eac8a6e1be6e7fbec86f55af Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 14 Dec 2024 15:35:04 +0530 Subject: [PATCH 2/7] skip validation for blank list, return median percentile --- pandas/core/methods/describe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index f8dbd669058d3..0f03b00ddac8c 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -351,9 +351,9 @@ def _refine_percentiles( # explicit conversion of `percentiles` to list percentiles = list(percentiles) - # median should be included only if blank list is passed + # median should be included only if blank iterable is passed if len(percentiles) == 0: - percentiles.append(0.5) + return np.array([0.5]) # get them all to be in [0, 1] validate_percentile(percentiles) From 7264ee6edcf7d1388c063b594c5ee81ddcbb433e Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 14 Dec 2024 15:42:20 +0530 Subject: [PATCH 3/7] add enhancement changelog for gh#60550 --- doc/source/whatsnew/v2.3.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index b107a5d3ba100..e37368ce7e79b 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -36,7 +36,8 @@ Other enhancements when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been updated to work correctly with NumPy >= 2 (:issue:`57739`) - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`) -- +- Median percentile is only included in :meth:`~Series.describe` when a blank + list is passed (:issue:`60550`). .. --------------------------------------------------------------------------- .. _whatsnew_230.notable_bug_fixes: From 3e0868438ce60b93cdeb878a3cc66bdd91106ee1 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 14 Dec 2024 18:47:40 +0530 Subject: [PATCH 4/7] DOC: add the default median behavior in function docstring --- pandas/core/generic.py | 3 ++- pandas/core/methods/describe.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d1aa20501b060..bb003a1d11f1b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10795,7 +10795,8 @@ def describe( The percentiles to include in the output. All should fall between 0 and 1. The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and - 75th percentiles. + 75th percentiles. If a blank list is passed, then returns + only the 50th percentile value. include : 'all', list-like of dtypes or None (default), optional A white list of data types to include in the result. Ignored for ``Series``. Here are the options: diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 0f03b00ddac8c..1587fc0a4add0 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -74,7 +74,8 @@ def describe_ndframe( percentiles : list-like of numbers, optional The percentiles to include in the output. All should fall between 0 and 1. The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and - 75th percentiles. + 75th percentiles.If a blank list is passed, then returns only the + 50th percentile value. Returns ------- From c1c187911e1fe0d0b69fbed8485a584aa2a60b65 Mon Sep 17 00:00:00 2001 From: E33605 Date: Wed, 18 Dec 2024 11:02:30 +0530 Subject: [PATCH 5/7] add test cases for percentile refine for describe function --- .../tests/reductions/test_describe_ndframe.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 pandas/tests/reductions/test_describe_ndframe.py diff --git a/pandas/tests/reductions/test_describe_ndframe.py b/pandas/tests/reductions/test_describe_ndframe.py new file mode 100644 index 0000000000000..9bb45f3ae68ca --- /dev/null +++ b/pandas/tests/reductions/test_describe_ndframe.py @@ -0,0 +1,28 @@ +# -*- encoding: utf-8 -*- + +""" +We test the describe_ndframe function. +""" + +import pytest +import numpy as np + +from pandas.core.methods.describe import _refine_percentiles + +def test_refine_percentiles(): + """ + Check the performance of the _refine_percentiles when multiple + values are passed. + """ + + # by default 0.25, 0.50, 0.75 is returned + # or, when None is passed return behavior is the same + assert _refine_percentiles() == np.array([0.25, 0.5, 0.75]) + assert _refine_percentiles(percentiles = None) == np.array([0.25, 0.5, 0.75]) + + # when any value is passed, then the function should return + percentiles_ = [0.3, 0.6] + assert _refine_percentiles(percentiles_) == np.array(percentiles_) + + # when a blank list is passed, then should return only 0.5 + assert _refine_percentiles(percentiles = []) == np.array([0.5]) From dd875dadd55bee63946e3c611ae1fbc11eac2021 Mon Sep 17 00:00:00 2001 From: E33605 Date: Wed, 18 Dec 2024 12:03:55 +0530 Subject: [PATCH 6/7] use pytest mark parametrize to fix input and expected values --- .../tests/reductions/test_describe_ndframe.py | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/pandas/tests/reductions/test_describe_ndframe.py b/pandas/tests/reductions/test_describe_ndframe.py index 9bb45f3ae68ca..ec252fd4316f5 100644 --- a/pandas/tests/reductions/test_describe_ndframe.py +++ b/pandas/tests/reductions/test_describe_ndframe.py @@ -4,25 +4,22 @@ We test the describe_ndframe function. """ -import pytest import numpy as np +import pytest from pandas.core.methods.describe import _refine_percentiles -def test_refine_percentiles(): +@pytest.mark.parametrize( + "percentiles_, expected", [ + (None, np.array([0.25, 0.5, 0.75])), + ([], np.array([0.5])), + ([0.3, 0.6], np.array([0.3, 0.6])), + ] +) +def test_refine_percentiles(percentiles_, expected): """ Check the performance of the _refine_percentiles when multiple values are passed. """ - # by default 0.25, 0.50, 0.75 is returned - # or, when None is passed return behavior is the same - assert _refine_percentiles() == np.array([0.25, 0.5, 0.75]) - assert _refine_percentiles(percentiles = None) == np.array([0.25, 0.5, 0.75]) - - # when any value is passed, then the function should return - percentiles_ = [0.3, 0.6] - assert _refine_percentiles(percentiles_) == np.array(percentiles_) - - # when a blank list is passed, then should return only 0.5 - assert _refine_percentiles(percentiles = []) == np.array([0.5]) + assert np.array_equal(_refine_percentiles(percentiles_), expected) From 98a014331b7097d8c76fc2b88f64636d73ea1410 Mon Sep 17 00:00:00 2001 From: Debmalya Pramanik Date: Tue, 24 Dec 2024 11:30:58 +0530 Subject: [PATCH 7/7] Fix typo in docstring, add space Co-authored-by: Asish Mahapatra --- pandas/core/methods/describe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 1587fc0a4add0..fa53af4c2bac9 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -74,7 +74,7 @@ def describe_ndframe( percentiles : list-like of numbers, optional The percentiles to include in the output. All should fall between 0 and 1. The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and - 75th percentiles.If a blank list is passed, then returns only the + 75th percentiles. If a blank list is passed, then returns only the 50th percentile value. Returns