From de02cb0bc8c1db1240e82eff2f8fc7466aaff1e3 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 7 Sep 2023 23:24:14 +0200 Subject: [PATCH] Add docs --- doc/source/reference/extensions.rst | 1 + pandas/core/arrays/base.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index e177e2b1d87d5..cf50be8508f23 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -34,6 +34,7 @@ objects. api.extensions.ExtensionArray._accumulate api.extensions.ExtensionArray._concat_same_type + api.extensions.ExtensionArray._factorize_with_other_for_merge api.extensions.ExtensionArray._formatter api.extensions.ExtensionArray._from_factorized api.extensions.ExtensionArray._from_sequence diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 7a303fccd7a06..9d4867c8d6652 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -2270,6 +2270,20 @@ def _groupby_op( def _factorize_with_other_for_merge( self, other: Self, sort: bool = False ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]: + """Factorizes two arrays to get codes for merge operations. + + This allows extension array authors to implement efficient factorizations + for merge operations. + + Parameters + ---------- + other : ExtensionArray with the same dtype as self. + sort : Whether to sort the result. + + Returns + ------- + tuple of codes for left and right and the number of unique elements. + """ lk, _ = self._values_for_factorize() rk, _ = other._values_for_factorize() return factorize_arrays(lk, rk, sort)