From 1dfcf1177f91456c29f3d94c8cf383cb94793672 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 6 Jul 2023 12:57:33 -0700 Subject: [PATCH] BUG: ignoring sort in DTA.factorize (#53992) --- pandas/core/arrays/datetimelike.py | 10 +++++++++- pandas/tests/arrays/test_datetimes.py | 13 +++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 86849aa41e3e1..40cd59340f942 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2211,7 +2211,15 @@ def factorize( codes = codes[::-1] uniques = uniques[::-1] return codes, uniques - # FIXME: shouldn't get here; we are ignoring sort + + if sort: + # algorithms.factorize only passes sort=True here when freq is + # not None, so this should not be reached. + raise NotImplementedError( + f"The 'sort' keyword in {type(self).__name__}.factorize is " + "ignored unless arr.freq is not None. To factorize with sort, " + "call pd.factorize(obj, sort=True) instead." + ) return super().factorize(use_na_sentinel=use_na_sentinel) @classmethod diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 2acc7bdc0d902..1fe1d4efbefd7 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -745,3 +745,16 @@ def test_iter_zoneinfo_fold(self, tz): right2 = dta.astype(object)[2] assert str(left) == str(right2) assert left.utcoffset() == right2.utcoffset() + + +def test_factorize_sort_without_freq(): + dta = DatetimeArray._from_sequence([0, 2, 1]) + + msg = r"call pd.factorize\(obj, sort=True\) instead" + with pytest.raises(NotImplementedError, match=msg): + dta.factorize(sort=True) + + # Do TimedeltaArray while we're here + tda = dta - dta[0] + with pytest.raises(NotImplementedError, match=msg): + tda.factorize(sort=True)