From a2ceb52a9b3f8a3bb1ec6ad9729acca3ff1f6707 Mon Sep 17 00:00:00 2001 From: partev Date: Mon, 25 Nov 2024 13:36:08 -0500 Subject: [PATCH 1/9] fix issue #60410 (#60412) --- doc/source/user_guide/window.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index e25c4c2441920..0581951d5bfad 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -567,9 +567,9 @@ One must have :math:`0 < \alpha \leq 1`, and while it is possible to pass \alpha = \begin{cases} - \frac{2}{s + 1}, & \text{for span}\ s \geq 1\\ - \frac{1}{1 + c}, & \text{for center of mass}\ c \geq 0\\ - 1 - \exp^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0 + \frac{2}{s + 1}, & \text{for span}\ s \geq 1\\ + \frac{1}{1 + c}, & \text{for center of mass}\ c \geq 0\\ + 1 - e^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0 \end{cases} One must specify precisely one of **span**, **center of mass**, **half-life** From e78df6f8f2ed2ca892e4caff61d8edfdfce2e981 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:09:31 +0530 Subject: [PATCH 2/9] DOC: fix SA01 for pandas.errors.UnsortedIndexError (#60404) --- ci/code_checks.sh | 1 - pandas/errors/__init__.py | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 772793702f8b8..2a8b5f15d95f3 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -119,7 +119,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.errors.PerformanceWarning SA01" \ -i "pandas.errors.PossibleDataLossError SA01" \ -i "pandas.errors.UndefinedVariableError PR01,SA01" \ - -i "pandas.errors.UnsortedIndexError SA01" \ -i "pandas.errors.ValueLabelTypeMismatch SA01" \ -i "pandas.infer_freq SA01" \ -i "pandas.io.json.build_table_schema PR07,RT03,SA01" \ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 68bd70603abae..d6d2fd82858ed 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -100,6 +100,11 @@ class UnsortedIndexError(KeyError): Subclass of `KeyError`. + See Also + -------- + DataFrame.sort_index : Sort a DataFrame by its index. + DataFrame.set_index : Set the DataFrame index using existing columns. + Examples -------- >>> df = pd.DataFrame( From cbd90ba5c403dc5449ac3b3a821ddc442c5ddc7d Mon Sep 17 00:00:00 2001 From: lfffkh <167774581+lfffkh@users.noreply.github.com> Date: Tue, 26 Nov 2024 02:40:37 +0800 Subject: [PATCH 3/9] Fix BUG: Cannot shift Intervals that are not closed='right' (the default) (#60407) first --- pandas/core/arrays/interval.py | 4 +++- pandas/tests/frame/methods/test_shift.py | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index f47ef095a8409..bbbf1d9ca60bd 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1055,7 +1055,9 @@ def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray: from pandas import Index fill_value = Index(self._left, copy=False)._na_value - empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) + empty = IntervalArray.from_breaks( + [fill_value] * (empty_len + 1), closed=self.closed + ) else: empty = self._from_sequence([fill_value] * empty_len, dtype=self.dtype) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index a0f96ff111444..b52240c208493 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -757,3 +757,12 @@ def test_shift_with_offsets_freq_empty(self): df_shifted = DataFrame(index=shifted_dates) result = df.shift(freq=offset) tm.assert_frame_equal(result, df_shifted) + + def test_series_shift_interval_preserves_closed(self): + # GH#60389 + ser = Series( + [pd.Interval(1, 2, closed="right"), pd.Interval(2, 3, closed="right")] + ) + result = ser.shift(1) + expected = Series([np.nan, pd.Interval(1, 2, closed="right")]) + tm.assert_series_equal(result, expected) From bca4b1c0ccb3fe5a74bb945d01bc372a90cc0e11 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:11:18 +0530 Subject: [PATCH 4/9] DOC: fix SA01,ES01 for pandas.errors.PossibleDataLossError (#60403) --- ci/code_checks.sh | 1 - pandas/errors/__init__.py | 9 +++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 2a8b5f15d95f3..03c6b8dc077b9 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -117,7 +117,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.errors.NumbaUtilError SA01" \ -i "pandas.errors.OutOfBoundsTimedelta SA01" \ -i "pandas.errors.PerformanceWarning SA01" \ - -i "pandas.errors.PossibleDataLossError SA01" \ -i "pandas.errors.UndefinedVariableError PR01,SA01" \ -i "pandas.errors.ValueLabelTypeMismatch SA01" \ -i "pandas.infer_freq SA01" \ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index d6d2fd82858ed..5642b0d33b4f7 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -638,6 +638,15 @@ class PossibleDataLossError(Exception): """ Exception raised when trying to open a HDFStore file when already opened. + This error is triggered when there is a potential risk of data loss due to + conflicting operations on an HDFStore file. It serves to prevent unintended + overwrites or data corruption by enforcing exclusive access to the file. + + See Also + -------- + HDFStore : Dict-like IO interface for storing pandas objects in PyTables. + HDFStore.open : Open an HDFStore file in the specified mode. + Examples -------- >>> store = pd.HDFStore("my-store", "a") # doctest: +SKIP From 582740b3c0a1ef211b490abbbd94c192b0367af5 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:11:50 +0530 Subject: [PATCH 5/9] DOC: fix SA01 for pandas.errors.OutOfBoundsTimedelta (#60402) --- ci/code_checks.sh | 1 - pandas/_libs/tslibs/np_datetime.pyx | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 03c6b8dc077b9..2817d84bad7b8 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -115,7 +115,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.errors.NullFrequencyError SA01" \ -i "pandas.errors.NumExprClobberingError SA01" \ -i "pandas.errors.NumbaUtilError SA01" \ - -i "pandas.errors.OutOfBoundsTimedelta SA01" \ -i "pandas.errors.PerformanceWarning SA01" \ -i "pandas.errors.UndefinedVariableError PR01,SA01" \ -i "pandas.errors.ValueLabelTypeMismatch SA01" \ diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 193556b2697a9..1b7f04fe17238 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -201,6 +201,10 @@ class OutOfBoundsTimedelta(ValueError): Representation should be within a timedelta64[ns]. + See Also + -------- + date_range : Return a fixed frequency DatetimeIndex. + Examples -------- >>> pd.date_range(start="1/1/1700", freq="B", periods=100000) From 9fab4eb5fb0132731a360fdd8ea3b31d95de187f Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:12:23 +0530 Subject: [PATCH 6/9] DOC: fix SA01,ES01 for pandas.errors.DuplicateLabelError (#60399) --- ci/code_checks.sh | 1 - pandas/errors/__init__.py | 13 +++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 2817d84bad7b8..8bafcb8944e14 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -109,7 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.core.resample.Resampler.std SA01" \ -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \ -i "pandas.core.resample.Resampler.var SA01" \ - -i "pandas.errors.DuplicateLabelError SA01" \ -i "pandas.errors.IntCastingNaNError SA01" \ -i "pandas.errors.InvalidIndexError SA01" \ -i "pandas.errors.NullFrequencyError SA01" \ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 5642b0d33b4f7..70e523688c644 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -393,6 +393,19 @@ class DuplicateLabelError(ValueError): """ Error raised when an operation would introduce duplicate labels. + This error is typically encountered when performing operations on objects + with `allows_duplicate_labels=False` and the operation would result in + duplicate labels in the index. Duplicate labels can lead to ambiguities + in indexing and reduce data integrity. + + See Also + -------- + Series.set_flags : Return a new ``Series`` object with updated flags. + DataFrame.set_flags : Return a new ``DataFrame`` object with updated flags. + Series.reindex : Conform ``Series`` object to new index with optional filling logic. + DataFrame.reindex : Conform ``DataFrame`` object to new index with optional filling + logic. + Examples -------- >>> s = pd.Series([0, 1, 2], index=["a", "b", "c"]).set_flags( From 00c2207cbe8e429d11db5973794b604041cd74b2 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:12:55 +0530 Subject: [PATCH 7/9] DOC: fix SA01,ES01 for pandas.errors.InvalidIndexError (#60400) --- ci/code_checks.sh | 1 - pandas/errors/__init__.py | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 8bafcb8944e14..58b0d26f7e2f3 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -110,7 +110,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \ -i "pandas.core.resample.Resampler.var SA01" \ -i "pandas.errors.IntCastingNaNError SA01" \ - -i "pandas.errors.InvalidIndexError SA01" \ -i "pandas.errors.NullFrequencyError SA01" \ -i "pandas.errors.NumExprClobberingError SA01" \ -i "pandas.errors.NumbaUtilError SA01" \ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 70e523688c644..814feadfb06e4 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -425,6 +425,16 @@ class InvalidIndexError(Exception): """ Exception raised when attempting to use an invalid index key. + This exception is triggered when a user attempts to access or manipulate + data in a pandas DataFrame or Series using an index key that is not valid + for the given object. This may occur in cases such as using a malformed + slice, a mismatched key for a ``MultiIndex``, or attempting to access an index + element that does not exist. + + See Also + -------- + MultiIndex : A multi-level, or hierarchical, index object for pandas objects. + Examples -------- >>> idx = pd.MultiIndex.from_product([["x", "y"], [0, 1]]) From 39dcbb4a06beaee7dd584a28958db72b9bba7531 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 26 Nov 2024 00:20:15 +0530 Subject: [PATCH 8/9] DOC: fix SA01 for pandas.errors.NumExprClobberingError (#60401) --- ci/code_checks.sh | 1 - pandas/errors/__init__.py | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 58b0d26f7e2f3..246a907c5052c 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -111,7 +111,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.core.resample.Resampler.var SA01" \ -i "pandas.errors.IntCastingNaNError SA01" \ -i "pandas.errors.NullFrequencyError SA01" \ - -i "pandas.errors.NumExprClobberingError SA01" \ -i "pandas.errors.NumbaUtilError SA01" \ -i "pandas.errors.PerformanceWarning SA01" \ -i "pandas.errors.UndefinedVariableError PR01,SA01" \ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 814feadfb06e4..70d839d817114 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -538,6 +538,11 @@ class NumExprClobberingError(NameError): to 'numexpr'. 'numexpr' is the default engine value for these methods if the numexpr package is installed. + See Also + -------- + eval : Evaluate a Python expression as a string using various backends. + DataFrame.query : Query the columns of a DataFrame with a boolean expression. + Examples -------- >>> df = pd.DataFrame({"abs": [1, 1, 1]}) From 0b6cece3acda1ae6e4f582d8276851b02aeac1ea Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Nov 2024 11:35:37 -0800 Subject: [PATCH 9/9] TST: Avoid hashing np.timedelta64 without unit (#60416) --- pandas/tests/test_algos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 3d1177c23c612..611b92eb022d6 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1254,7 +1254,7 @@ def test_value_counts_nat(self): result_dt = algos.value_counts_internal(dt) tm.assert_series_equal(result_dt, exp_dt) - exp_td = Series({np.timedelta64(10000): 1}, name="count") + exp_td = Series([1], index=[np.timedelta64(10000)], name="count") result_td = algos.value_counts_internal(td) tm.assert_series_equal(result_td, exp_td)