Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into cow_blokcs_rest
Browse files Browse the repository at this point in the history
# Conflicts:
#	pandas/core/internals/blocks.py
#	pandas/core/internals/managers.py
  • Loading branch information
phofl committed Feb 5, 2024
2 parents 5b9021d + 77b4824 commit 69722e8
Show file tree
Hide file tree
Showing 43 changed files with 188 additions and 553 deletions.
7 changes: 0 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -299,13 +299,6 @@ repos:
files: ^pandas/core/
exclude: ^pandas/core/api\.py$
types: [python]
- id: use-io-common-urlopen
name: Use pandas.io.common.urlopen instead of urllib.request.urlopen
language: python
entry: python scripts/use_io_common_urlopen.py
files: ^pandas/
exclude: ^pandas/tests/
types: [python]
- id: no-bool-in-core-generic
name: Use bool_t instead of bool in pandas/core/generic.py
entry: python scripts/no_bool_in_generic.py
Expand Down
21 changes: 0 additions & 21 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,26 +71,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then

MSG='Partially validate docstrings (PR02)' ; echo $MSG
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR02 --ignore_functions \
pandas.CategoricalIndex.rename_categories\
pandas.CategoricalIndex.reorder_categories\
pandas.CategoricalIndex.add_categories\
pandas.CategoricalIndex.remove_categories\
pandas.CategoricalIndex.set_categories\
pandas.IntervalIndex.set_closed\
pandas.IntervalIndex.contains\
pandas.IntervalIndex.overlaps\
pandas.IntervalIndex.to_tuples\
pandas.DatetimeIndex.round\
pandas.DatetimeIndex.floor\
pandas.DatetimeIndex.ceil\
pandas.DatetimeIndex.month_name\
pandas.DatetimeIndex.day_name\
pandas.DatetimeIndex.to_period\
pandas.DatetimeIndex.std\
pandas.TimedeltaIndex.round\
pandas.TimedeltaIndex.floor\
pandas.TimedeltaIndex.ceil\
pandas.PeriodIndex.strftime\
pandas.Series.dt.to_period\
pandas.Series.dt.tz_localize\
pandas.Series.dt.tz_convert\
Expand Down Expand Up @@ -156,7 +136,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
pandas.tseries.offsets.Milli\
pandas.tseries.offsets.Micro\
pandas.tseries.offsets.Nano\
pandas.set_option\
pandas.Timestamp.max\
pandas.Timestamp.min\
pandas.Timestamp.resolution\
Expand Down
25 changes: 16 additions & 9 deletions pandas/_config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,30 +326,37 @@ def __doc__(self) -> str: # type: ignore[override]
"""

_set_option_tmpl = """
set_option(pat, value)
set_option(*args, **kwargs)
Sets the value of the specified option.
Sets the value of the specified option or options.
Available options:
{opts_list}
Parameters
----------
pat : str
Regexp which should match a single option.
Note: partial matches are supported for convenience, but unless you use the
full option name (e.g. x.y.z.option_name), your code may break in future
versions if new options with similar names are introduced.
value : object
New value of option.
*args : str | object
Arguments provided in pairs, which will be interpreted as (pattern, value)
pairs.
pattern: str
Regexp which should match a single option
value: object
New value of option
Note: partial pattern matches are supported for convenience, but unless you
use the full option name (e.g. x.y.z.option_name), your code may break in
future versions if new options with similar names are introduced.
**kwargs : str
Keyword arguments are not currently supported.
Returns
-------
None
Raises
------
ValueError if odd numbers of non-keyword arguments are provided
TypeError if keyword arguments are provided
OptionError if no such option exists
Notes
Expand Down
1 change: 1 addition & 0 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ def __reversed__(self) -> Iterator[_T_co]:
# passed in, a DataFrame is always returned.
NDFrameT = TypeVar("NDFrameT", bound="NDFrame")

IndexT = TypeVar("IndexT", bound="Index")
NumpyIndexT = TypeVar("NumpyIndexT", np.ndarray, "Index")

AxisInt = int
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10740,7 +10740,7 @@ def _shift_with_freq(self, periods: int, axis: int, freq) -> Self:
f"does not match PeriodIndex freq "
f"{freq_to_period_freqstr(orig_freq.n, orig_freq.name)}"
)
new_ax = index.shift(periods)
new_ax: Index = index.shift(periods)
else:
new_ax = index.shift(periods, freq)

Expand Down
5 changes: 3 additions & 2 deletions pandas/core/indexes/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from typing import (
TYPE_CHECKING,
NoReturn,
cast,
)
import warnings
Expand Down Expand Up @@ -108,7 +109,7 @@ def _delegate_property_get(self, name: str):
# return the result as a Series
return Series(result, index=index, name=self.name).__finalize__(self._parent)

def _delegate_property_set(self, name: str, value, *args, **kwargs):
def _delegate_property_set(self, name: str, value, *args, **kwargs) -> NoReturn:
raise ValueError(
"modifications to a property of a datetimelike object are not supported. "
"Change values on the original."
Expand Down Expand Up @@ -483,7 +484,7 @@ def to_pytimedelta(self) -> np.ndarray:
return self._get_values().to_pytimedelta()

@property
def components(self):
def components(self) -> DataFrame:
"""
Return a Dataframe of the components of the Timedeltas.
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
F,
IgnoreRaise,
IndexLabel,
IndexT,
JoinHow,
Level,
NaPosition,
Expand Down Expand Up @@ -2027,7 +2028,7 @@ def sortlevel(
ascending: bool | list[bool] = True,
sort_remaining=None,
na_position: NaPosition = "first",
):
) -> tuple[Self, np.ndarray]:
"""
For internal compatibility with the Index API.
Expand Down Expand Up @@ -4432,7 +4433,7 @@ def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
target = self._maybe_preserve_names(target, preserve_names)
return target

def _maybe_preserve_names(self, target: Index, preserve_names: bool):
def _maybe_preserve_names(self, target: IndexT, preserve_names: bool) -> IndexT:
if preserve_names and target.nlevels == 1 and target.name != self.name:
target = target.copy(deep=False)
target.name = self.name
Expand Down Expand Up @@ -5987,7 +5988,7 @@ def sort(self, *args, **kwargs):
"""
raise TypeError("cannot sort an Index object in-place, use sort_values instead")

def shift(self, periods: int = 1, freq=None):
def shift(self, periods: int = 1, freq=None) -> Self:
"""
Shift index by desired number of time frequency increments.
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def _can_partial_date_slice(self, reso: Resolution) -> bool:
def _parsed_string_to_bounds(self, reso: Resolution, parsed):
raise NotImplementedError

def _parse_with_reso(self, label: str):
def _parse_with_reso(self, label: str) -> tuple[datetime, Resolution]:
# overridden by TimedeltaIndex
try:
if self.freq is None or hasattr(self.freq, "rule_code"):
Expand All @@ -295,7 +295,7 @@ def _parse_with_reso(self, label: str):
reso = Resolution.from_attrname(reso_str)
return parsed, reso

def _get_string_slice(self, key: str):
def _get_string_slice(self, key: str) -> slice | npt.NDArray[np.intp]:
# overridden by TimedeltaIndex
parsed, reso = self._parse_with_reso(key)
try:
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,9 @@ def snap(self, freq: Frequency = "S") -> DatetimeIndex:
# --------------------------------------------------------------------
# Indexing Methods

def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime):
def _parsed_string_to_bounds(
self, reso: Resolution, parsed: dt.datetime
) -> tuple[Timestamp, Timestamp]:
"""
Calculate datetime bounds for parsed time string and its resolution.
Expand Down Expand Up @@ -555,7 +557,7 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime):
# which localizes parsed.
return start, end

def _parse_with_reso(self, label: str):
def _parse_with_reso(self, label: str) -> tuple[Timestamp, Resolution]:
parsed, reso = super()._parse_with_reso(label)

parsed = Timestamp(parsed)
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
from __future__ import annotations

from inspect import signature
from typing import (
TYPE_CHECKING,
Callable,
Expand Down Expand Up @@ -104,6 +105,7 @@ def method(self, *args, **kwargs): # type: ignore[misc]
# error: "property" has no attribute "__name__"
method.__name__ = name # type: ignore[attr-defined]
method.__doc__ = attr.__doc__
method.__signature__ = signature(attr) # type: ignore[attr-defined]
return method


Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
F,
IgnoreRaise,
IndexLabel,
IndexT,
Scalar,
Self,
Shape,
Expand Down Expand Up @@ -2727,7 +2728,7 @@ def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
target = self._maybe_preserve_names(target, preserve_names)
return target

def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index:
def _maybe_preserve_names(self, target: IndexT, preserve_names: bool) -> IndexT:
if (
preserve_names
and target.nlevels == self.nlevels
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,7 @@ def _difference(self, other, sort=None):

def symmetric_difference(
self, other, result_name: Hashable | None = None, sort=None
):
) -> Index:
if not isinstance(other, RangeIndex) or sort is not None:
return super().symmetric_difference(other, result_name, sort)

Expand Down
6 changes: 5 additions & 1 deletion pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from pandas.core.indexes.extension import inherit_names

if TYPE_CHECKING:
from pandas._libs import NaTType
from pandas._typing import DtypeObj


Expand Down Expand Up @@ -245,7 +246,10 @@ def get_loc(self, key):

return Index.get_loc(self, key)

def _parse_with_reso(self, label: str):
# error: Return type "tuple[Timedelta | NaTType, None]" of "_parse_with_reso"
# incompatible with return type "tuple[datetime, Resolution]" in supertype
# "DatetimeIndexOpsMixin"
def _parse_with_reso(self, label: str) -> tuple[Timedelta | NaTType, None]: # type: ignore[override]
# the "with_reso" is a no-op for TimedeltaIndex
parsed = Timedelta(label)
return parsed, None
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,7 @@ def replace(
if isinstance(values, Categorical):
# TODO: avoid special-casing
# GH49404
blk = self._maybe_copy(inplace)
blk = self._maybe_copy_cow_check(inplace)
values = cast(Categorical, blk.values)
values._replace(to_replace=to_replace, value=value, inplace=True)
return [blk]
Expand All @@ -841,7 +841,7 @@ def replace(
elif self._can_hold_element(value):
# TODO(CoW): Maybe split here as well into columns where mask has True
# and rest?
blk = self._maybe_copy(inplace)
blk = self._maybe_copy_cow_check(inplace)
putmask_inplace(blk.values, mask, value)

if not (self.is_object and value is None):
Expand Down Expand Up @@ -928,7 +928,7 @@ def _replace_regex(

rx = re.compile(to_replace)

block = self._maybe_copy(inplace)
block = self._maybe_copy_cow_check(inplace)

replace_regex(block.values, rx, value, mask)

Expand Down Expand Up @@ -964,7 +964,7 @@ def replace_list(
if isinstance(values, Categorical):
# TODO: avoid special-casing
# GH49404
blk = self._maybe_copy(inplace)
blk = self._maybe_copy_cow_check(inplace)
values = cast(Categorical, blk.values)
values._replace(to_replace=src_list, value=dest_list, inplace=True)
return [blk]
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def urlopen(*args, **kwargs):
"""
import urllib.request

return urllib.request.urlopen(*args, **kwargs)
return urllib.request.urlopen(*args, **kwargs) # noqa: TID251


def is_fsspec_url(url: FilePath | BaseBuffer) -> bool:
Expand Down
9 changes: 2 additions & 7 deletions pandas/tests/frame/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,18 +391,13 @@ def test_getitem_empty_frame_with_boolean(self):
df2 = df[df > 0]
tm.assert_frame_equal(df, df2)

def test_getitem_returns_view_when_column_is_unique_in_df(
self, using_copy_on_write
):
def test_getitem_returns_view_when_column_is_unique_in_df(self):
# GH#45316
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
df_orig = df.copy()
view = df["b"]
view.loc[:] = 100
if using_copy_on_write:
expected = df_orig
else:
expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"])
expected = df_orig
tm.assert_frame_equal(df, expected)

def test_getitem_frozenset_unique_in_column(self):
Expand Down
Loading

0 comments on commit 69722e8

Please sign in to comment.