Merge remote-tracking branch 'upstream/main' into cow_blokcs_rest

# Conflicts: # pandas/core/internals/blocks.py # pandas/core/internals/managers.py
phofl · Feb 5, 2024 · 69722e8 · 69722e8
2 parents 5b9021d + 77b4824
commit 69722e8
Show file tree

Hide file tree

Showing 43 changed files with 188 additions and 553 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -299,13 +299,6 @@ repos:
         files: ^pandas/core/
         exclude: ^pandas/core/api\.py$
         types: [python]
-    -   id: use-io-common-urlopen
-        name: Use pandas.io.common.urlopen instead of urllib.request.urlopen
-        language: python
-        entry: python scripts/use_io_common_urlopen.py
-        files: ^pandas/
-        exclude: ^pandas/tests/
-        types: [python]
     -   id: no-bool-in-core-generic
         name: Use bool_t instead of bool in pandas/core/generic.py
         entry: python scripts/no_bool_in_generic.py

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -71,26 +71,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
 
     MSG='Partially validate docstrings (PR02)' ;  echo $MSG
     $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR02 --ignore_functions \
-        pandas.CategoricalIndex.rename_categories\
-        pandas.CategoricalIndex.reorder_categories\
-        pandas.CategoricalIndex.add_categories\
-        pandas.CategoricalIndex.remove_categories\
-        pandas.CategoricalIndex.set_categories\
-        pandas.IntervalIndex.set_closed\
-        pandas.IntervalIndex.contains\
-        pandas.IntervalIndex.overlaps\
-        pandas.IntervalIndex.to_tuples\
-        pandas.DatetimeIndex.round\
-        pandas.DatetimeIndex.floor\
-        pandas.DatetimeIndex.ceil\
-        pandas.DatetimeIndex.month_name\
-        pandas.DatetimeIndex.day_name\
-        pandas.DatetimeIndex.to_period\
-        pandas.DatetimeIndex.std\
-        pandas.TimedeltaIndex.round\
-        pandas.TimedeltaIndex.floor\
-        pandas.TimedeltaIndex.ceil\
-        pandas.PeriodIndex.strftime\
         pandas.Series.dt.to_period\
         pandas.Series.dt.tz_localize\
         pandas.Series.dt.tz_convert\
@@ -156,7 +136,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.tseries.offsets.Milli\
         pandas.tseries.offsets.Micro\
         pandas.tseries.offsets.Nano\
-        pandas.set_option\
         pandas.Timestamp.max\
         pandas.Timestamp.min\
         pandas.Timestamp.resolution\

diff --git a/pandas/_config/config.py b/pandas/_config/config.py
@@ -326,30 +326,37 @@ def __doc__(self) -> str:  # type: ignore[override]
 """
 
 _set_option_tmpl = """
-set_option(pat, value)
+set_option(*args, **kwargs)
 
-Sets the value of the specified option.
+Sets the value of the specified option or options.
 
 Available options:
 
 {opts_list}
 
 Parameters
 ----------
-pat : str
-    Regexp which should match a single option.
-    Note: partial matches are supported for convenience, but unless you use the
-    full option name (e.g. x.y.z.option_name), your code may break in future
-    versions if new options with similar names are introduced.
-value : object
-    New value of option.
+*args : str | object
+    Arguments provided in pairs, which will be interpreted as (pattern, value)
+    pairs.
+    pattern: str
+    Regexp which should match a single option
+    value: object
+    New value of option
+    Note: partial pattern matches are supported for convenience, but unless you
+    use the full option name (e.g. x.y.z.option_name), your code may break in
+    future versions if new options with similar names are introduced.
+**kwargs : str
+    Keyword arguments are not currently supported.
 
 Returns
 -------
 None
 
 Raises
 ------
+ValueError if odd numbers of non-keyword arguments are provided
+TypeError if keyword arguments are provided
 OptionError if no such option exists
 
 Notes

diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -189,6 +189,7 @@ def __reversed__(self) -> Iterator[_T_co]:
 # passed in, a DataFrame is always returned.
 NDFrameT = TypeVar("NDFrameT", bound="NDFrame")
 
+IndexT = TypeVar("IndexT", bound="Index")
 NumpyIndexT = TypeVar("NumpyIndexT", np.ndarray, "Index")
 
 AxisInt = int

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10740,7 +10740,7 @@ def _shift_with_freq(self, periods: int, axis: int, freq) -> Self:
                     f"does not match PeriodIndex freq "
                     f"{freq_to_period_freqstr(orig_freq.n, orig_freq.name)}"
                 )
-            new_ax = index.shift(periods)
+            new_ax: Index = index.shift(periods)
         else:
             new_ax = index.shift(periods, freq)
 

diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py
@@ -5,6 +5,7 @@
 
 from typing import (
     TYPE_CHECKING,
+    NoReturn,
     cast,
 )
 import warnings
@@ -108,7 +109,7 @@ def _delegate_property_get(self, name: str):
         # return the result as a Series
         return Series(result, index=index, name=self.name).__finalize__(self._parent)
 
-    def _delegate_property_set(self, name: str, value, *args, **kwargs):
+    def _delegate_property_set(self, name: str, value, *args, **kwargs) -> NoReturn:
         raise ValueError(
             "modifications to a property of a datetimelike object are not supported. "
             "Change values on the original."
@@ -483,7 +484,7 @@ def to_pytimedelta(self) -> np.ndarray:
         return self._get_values().to_pytimedelta()
 
     @property
-    def components(self):
+    def components(self) -> DataFrame:
         """
         Return a Dataframe of the components of the Timedeltas.
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -55,6 +55,7 @@
     F,
     IgnoreRaise,
     IndexLabel,
+    IndexT,
     JoinHow,
     Level,
     NaPosition,
@@ -2027,7 +2028,7 @@ def sortlevel(
         ascending: bool | list[bool] = True,
         sort_remaining=None,
         na_position: NaPosition = "first",
-    ):
+    ) -> tuple[Self, np.ndarray]:
         """
         For internal compatibility with the Index API.
 
@@ -4432,7 +4433,7 @@ def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
         target = self._maybe_preserve_names(target, preserve_names)
         return target
 
-    def _maybe_preserve_names(self, target: Index, preserve_names: bool):
+    def _maybe_preserve_names(self, target: IndexT, preserve_names: bool) -> IndexT:
         if preserve_names and target.nlevels == 1 and target.name != self.name:
             target = target.copy(deep=False)
             target.name = self.name
@@ -5987,7 +5988,7 @@ def sort(self, *args, **kwargs):
         """
         raise TypeError("cannot sort an Index object in-place, use sort_values instead")
 
-    def shift(self, periods: int = 1, freq=None):
+    def shift(self, periods: int = 1, freq=None) -> Self:
         """
         Shift index by desired number of time frequency increments.
 

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -273,7 +273,7 @@ def _can_partial_date_slice(self, reso: Resolution) -> bool:
     def _parsed_string_to_bounds(self, reso: Resolution, parsed):
         raise NotImplementedError
 
-    def _parse_with_reso(self, label: str):
+    def _parse_with_reso(self, label: str) -> tuple[datetime, Resolution]:
         # overridden by TimedeltaIndex
         try:
             if self.freq is None or hasattr(self.freq, "rule_code"):
@@ -295,7 +295,7 @@ def _parse_with_reso(self, label: str):
         reso = Resolution.from_attrname(reso_str)
         return parsed, reso
 
-    def _get_string_slice(self, key: str):
+    def _get_string_slice(self, key: str) -> slice | npt.NDArray[np.intp]:
         # overridden by TimedeltaIndex
         parsed, reso = self._parse_with_reso(key)
         try:

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -518,7 +518,9 @@ def snap(self, freq: Frequency = "S") -> DatetimeIndex:
     # --------------------------------------------------------------------
     # Indexing Methods
 
-    def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime):
+    def _parsed_string_to_bounds(
+        self, reso: Resolution, parsed: dt.datetime
+    ) -> tuple[Timestamp, Timestamp]:
         """
         Calculate datetime bounds for parsed time string and its resolution.
 
@@ -555,7 +557,7 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime):
         #  which localizes parsed.
         return start, end
 
-    def _parse_with_reso(self, label: str):
+    def _parse_with_reso(self, label: str) -> tuple[Timestamp, Resolution]:
         parsed, reso = super()._parse_with_reso(label)
 
         parsed = Timestamp(parsed)

diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py
@@ -3,6 +3,7 @@
 """
 from __future__ import annotations
 
+from inspect import signature
 from typing import (
     TYPE_CHECKING,
     Callable,
@@ -104,6 +105,7 @@ def method(self, *args, **kwargs):  # type: ignore[misc]
         # error: "property" has no attribute "__name__"
         method.__name__ = name  # type: ignore[attr-defined]
         method.__doc__ = attr.__doc__
+        method.__signature__ = signature(attr)  # type: ignore[attr-defined]
     return method
 
 

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -37,6 +37,7 @@
     F,
     IgnoreRaise,
     IndexLabel,
+    IndexT,
     Scalar,
     Self,
     Shape,
@@ -2727,7 +2728,7 @@ def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
         target = self._maybe_preserve_names(target, preserve_names)
         return target
 
-    def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index:
+    def _maybe_preserve_names(self, target: IndexT, preserve_names: bool) -> IndexT:
         if (
             preserve_names
             and target.nlevels == self.nlevels

diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -876,7 +876,7 @@ def _difference(self, other, sort=None):
 
     def symmetric_difference(
         self, other, result_name: Hashable | None = None, sort=None
-    ):
+    ) -> Index:
         if not isinstance(other, RangeIndex) or sort is not None:
             return super().symmetric_difference(other, result_name, sort)
 

diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
@@ -32,6 +32,7 @@
 from pandas.core.indexes.extension import inherit_names
 
 if TYPE_CHECKING:
+    from pandas._libs import NaTType
     from pandas._typing import DtypeObj
 
 
@@ -245,7 +246,10 @@ def get_loc(self, key):
 
         return Index.get_loc(self, key)
 
-    def _parse_with_reso(self, label: str):
+    # error: Return type "tuple[Timedelta | NaTType, None]" of "_parse_with_reso"
+    # incompatible with return type "tuple[datetime, Resolution]" in supertype
+    # "DatetimeIndexOpsMixin"
+    def _parse_with_reso(self, label: str) -> tuple[Timedelta | NaTType, None]:  # type: ignore[override]
         # the "with_reso" is a no-op for TimedeltaIndex
         parsed = Timedelta(label)
         return parsed, None

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -819,7 +819,7 @@ def replace(
         if isinstance(values, Categorical):
             # TODO: avoid special-casing
             # GH49404
-            blk = self._maybe_copy(inplace)
+            blk = self._maybe_copy_cow_check(inplace)
             values = cast(Categorical, blk.values)
             values._replace(to_replace=to_replace, value=value, inplace=True)
             return [blk]
@@ -841,7 +841,7 @@ def replace(
         elif self._can_hold_element(value):
             # TODO(CoW): Maybe split here as well into columns where mask has True
             # and rest?
-            blk = self._maybe_copy(inplace)
+            blk = self._maybe_copy_cow_check(inplace)
             putmask_inplace(blk.values, mask, value)
 
             if not (self.is_object and value is None):
@@ -928,7 +928,7 @@ def _replace_regex(
 
         rx = re.compile(to_replace)
 
-        block = self._maybe_copy(inplace)
+        block = self._maybe_copy_cow_check(inplace)
 
         replace_regex(block.values, rx, value, mask)
 
@@ -964,7 +964,7 @@ def replace_list(
         if isinstance(values, Categorical):
             # TODO: avoid special-casing
             # GH49404
-            blk = self._maybe_copy(inplace)
+            blk = self._maybe_copy_cow_check(inplace)
             values = cast(Categorical, blk.values)
             values._replace(to_replace=src_list, value=dest_list, inplace=True)
             return [blk]

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -286,7 +286,7 @@ def urlopen(*args, **kwargs):
     """
     import urllib.request
 
-    return urllib.request.urlopen(*args, **kwargs)
+    return urllib.request.urlopen(*args, **kwargs)  # noqa: TID251
 
 
 def is_fsspec_url(url: FilePath | BaseBuffer) -> bool:

diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py
@@ -391,18 +391,13 @@ def test_getitem_empty_frame_with_boolean(self):
         df2 = df[df > 0]
         tm.assert_frame_equal(df, df2)
 
-    def test_getitem_returns_view_when_column_is_unique_in_df(
-        self, using_copy_on_write
-    ):
+    def test_getitem_returns_view_when_column_is_unique_in_df(self):
         # GH#45316
         df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
         df_orig = df.copy()
         view = df["b"]
         view.loc[:] = 100
-        if using_copy_on_write:
-            expected = df_orig
-        else:
-            expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"])
+        expected = df_orig
         tm.assert_frame_equal(df, expected)
 
     def test_getitem_frozenset_unique_in_column(self):