Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: allowing subclass-specific keywords in pd.Index.__new__ #38597

Merged
merged 10 commits into from
Dec 22, 2020
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ Other API changes

Deprecations
~~~~~~~~~~~~

- Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`,:issue:`21311`,:issue:`22315`,:issue:`26974`)
-
-

Expand Down
9 changes: 9 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,15 @@ def __new__(
cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs
) -> "Index":

if kwargs:
warnings.warn(
"Passing keywords other than 'data', 'dtype', 'copy', 'name', "
"'tupleize_cols' is deprecated and will raise TypeError in a "
"future version. Use the specific Index subclass directly instead",
FutureWarning,
stacklevel=2,
)

from pandas.core.indexes.range import RangeIndex

name = maybe_extract_name(name, data, cls)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def __array_wrap__(self, result, context=None):
if not is_period_dtype(self.dtype) and attrs["freq"]:
# no need to infer if freq is None
attrs["freq"] = "infer"
return Index(result, **attrs)
return type(self)(result, **attrs)

# ------------------------------------------------------------------------

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1966,6 +1966,10 @@ def _asfreq_compat(index, freq):
new_index: Index
if isinstance(index, PeriodIndex):
new_index = index.asfreq(freq=freq)
else:
new_index = Index([], dtype=index.dtype, freq=freq, name=index.name)
elif isinstance(index, DatetimeIndex):
new_index = DatetimeIndex([], dtype=index.dtype, freq=freq, name=index.name)
elif isinstance(index, TimedeltaIndex):
new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name)
jreback marked this conversation as resolved.
Show resolved Hide resolved
else: # pragma: no cover
raise TypeError(type(index))
return new_index
63 changes: 39 additions & 24 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Optional,
Expand Down Expand Up @@ -2045,15 +2046,19 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
if self.freq is not None:
kwargs["freq"] = _ensure_decoded(self.freq)

factory: Union[Type[Index], Type[DatetimeIndex]] = Index
if is_datetime64_dtype(values.dtype) or is_datetime64tz_dtype(values.dtype):
factory = DatetimeIndex

# making an Index instance could throw a number of different errors
try:
new_pd_index = Index(values, **kwargs)
new_pd_index = factory(values, **kwargs)
except ValueError:
# if the output freq is different that what we recorded,
# it should be None (see also 'doc example part 2')
if "freq" in kwargs:
kwargs["freq"] = None
new_pd_index = Index(values, **kwargs)
new_pd_index = factory(values, **kwargs)

new_pd_index = _set_tz(new_pd_index, self.tz)
return new_pd_index, new_pd_index
Expand Down Expand Up @@ -2736,8 +2741,14 @@ def _alias_to_class(self, alias):
return alias
return self._reverse_index_map.get(alias, Index)

def _get_index_factory(self, klass):
if klass == DatetimeIndex:
def _get_index_factory(self, attrs):
index_class = self._alias_to_class(
_ensure_decoded(getattr(attrs, "index_class", ""))
)

factory: Callable

if index_class == DatetimeIndex:

def f(values, freq=None, tz=None):
# data are already in UTC, localize and convert if tz present
Expand All @@ -2747,16 +2758,34 @@ def f(values, freq=None, tz=None):
result = result.tz_localize("UTC").tz_convert(tz)
return result

return f
elif klass == PeriodIndex:
factory = f
elif index_class == PeriodIndex:

def f(values, freq=None, tz=None):
parr = PeriodArray._simple_new(values, freq=freq)
return PeriodIndex._simple_new(parr, name=None)

return f
factory = f
else:
factory = index_class

kwargs = {}
if "freq" in attrs:
kwargs["freq"] = attrs["freq"]
if index_class is Index:
# DTI/PI would be gotten by _alias_to_class
factory = TimedeltaIndex

if "tz" in attrs:
if isinstance(attrs["tz"], bytes):
# created by python2
kwargs["tz"] = attrs["tz"].decode("utf-8")
else:
# created by python3
kwargs["tz"] = attrs["tz"]
assert index_class is DatetimeIndex # just checking

return klass
return factory, kwargs

def validate_read(self, columns, where):
"""
Expand Down Expand Up @@ -2928,22 +2957,8 @@ def read_index_node(
name = _ensure_str(node._v_attrs.name)
name = _ensure_decoded(name)

index_class = self._alias_to_class(
_ensure_decoded(getattr(node._v_attrs, "index_class", ""))
)
factory = self._get_index_factory(index_class)

kwargs = {}
if "freq" in node._v_attrs:
kwargs["freq"] = node._v_attrs["freq"]

if "tz" in node._v_attrs:
if isinstance(node._v_attrs["tz"], bytes):
# created by python2
kwargs["tz"] = node._v_attrs["tz"].decode("utf-8")
else:
# created by python3
kwargs["tz"] = node._v_attrs["tz"]
attrs = node._v_attrs
factory, kwargs = self._get_index_factory(attrs)

if kind == "date":
index = factory(
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,8 @@ def test_grouper_creation_bug(self):
)
result = s.groupby(pd.Grouper(level="three", freq="M")).sum()
expected = Series(
[28], index=Index([Timestamp("2013-01-31")], freq="M", name="three")
[28],
index=pd.DatetimeIndex([Timestamp("2013-01-31")], freq="M", name="three"),
)
tm.assert_series_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def test_columns_groupby_quantile():
[9.6, 8.4, 10.6, 9.4],
],
index=list("XYZ"),
columns=Index(
columns=pd.MultiIndex.from_tuples(
[("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None]
),
)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/indexes/base_class/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest

from pandas import Index, MultiIndex
import pandas._testing as tm


class TestIndexConstructor:
Expand Down Expand Up @@ -29,7 +30,8 @@ def test_construction_list_mixed_tuples(self, index_vals):
def test_constructor_wrong_kwargs(self):
# GH #19348
with pytest.raises(TypeError, match="Unexpected keyword arguments {'foo'}"):
Index([], foo="bar")
with tm.assert_produces_warning(FutureWarning):
Index([], foo="bar")
jreback marked this conversation as resolved.
Show resolved Hide resolved

@pytest.mark.xfail(reason="see GH#21311: Index doesn't enforce dtype argument")
def test_constructor_cast(self):
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/indexes/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,14 @@ def test_construction_with_categorical_dtype(self):
CategoricalIndex(data, categories=cats, dtype=dtype)

with pytest.raises(ValueError, match=msg):
Index(data, categories=cats, dtype=dtype)
with tm.assert_produces_warning(FutureWarning):
# passing subclass-specific kwargs to pd.Index
Index(data, categories=cats, dtype=dtype)

with pytest.raises(ValueError, match=msg):
CategoricalIndex(data, ordered=ordered, dtype=dtype)

with pytest.raises(ValueError, match=msg):
Index(data, ordered=ordered, dtype=dtype)
with tm.assert_produces_warning(FutureWarning):
# passing subclass-specific kwargs to pd.Index
Index(data, ordered=ordered, dtype=dtype)
26 changes: 15 additions & 11 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,9 @@ def test_construction_index_with_mixed_timezones_with_NaT(self):
assert result.tz is None

# all NaT with tz
result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
with tm.assert_produces_warning(FutureWarning):
# subclass-specific kwargs to pd.Index
result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")
exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx")

tm.assert_index_equal(result, exp, exact=True)
Expand Down Expand Up @@ -462,16 +464,18 @@ def test_construction_dti_with_mixed_timezones(self):
with pytest.raises(ValueError, match=msg):
# passing tz should results in DatetimeIndex, then mismatch raises
# TypeError
Index(
[
pd.NaT,
Timestamp("2011-01-01 10:00"),
pd.NaT,
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
tz="Asia/Tokyo",
name="idx",
)
with tm.assert_produces_warning(FutureWarning):
# subclass-specific kwargs to pd.Index
Index(
[
pd.NaT,
Timestamp("2011-01-01 10:00"),
pd.NaT,
Timestamp("2011-01-02 10:00", tz="US/Eastern"),
],
tz="Asia/Tokyo",
name="idx",
)

def test_construction_base_constructor(self):
arr = [Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-03")]
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexes/interval/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class ConstructorTests:
get_kwargs_from_breaks to the expected format.
"""

@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize(
"breaks",
[
Expand Down Expand Up @@ -80,6 +81,7 @@ def test_constructor_dtype(self, constructor, breaks, subtype):
result = constructor(dtype=dtype, **result_kwargs)
tm.assert_index_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
def test_constructor_nan(self, constructor, breaks, closed):
# GH 18421
Expand All @@ -93,6 +95,7 @@ def test_constructor_nan(self, constructor, breaks, closed):
assert result.dtype.subtype == expected_subtype
tm.assert_numpy_array_equal(np.array(result), expected_values)

@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize(
"breaks",
[
Expand Down Expand Up @@ -378,6 +381,7 @@ def test_constructor_errors(self, constructor):
with pytest.raises(TypeError, match=msg):
constructor([0, 1])

@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize(
"data, closed",
[
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/indexes/multi/test_equivalence.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,16 @@ def test_identical(idx):
mi2 = mi2.set_names(["new1", "new2"])
assert mi.identical(mi2)

mi3 = Index(mi.tolist(), names=mi.names)
with tm.assert_produces_warning(FutureWarning):
# subclass-specific keywords to pd.Index
mi3 = Index(mi.tolist(), names=mi.names)

msg = r"Unexpected keyword arguments {'names'}"
with pytest.raises(TypeError, match=msg):
Index(mi.tolist(), names=mi.names, tupleize_cols=False)
with tm.assert_produces_warning(FutureWarning):
# subclass-specific keywords to pd.Index
Index(mi.tolist(), names=mi.names, tupleize_cols=False)

mi4 = Index(mi.tolist(), tupleize_cols=False)
assert mi.identical(mi3)
assert not mi.identical(mi4)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/indexes/multi/test_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ def test_take_preserve_name(idx):
def test_copy_names():
# Check that adding a "names" parameter to the copy is honored
# GH14302
multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
with tm.assert_produces_warning(FutureWarning):
# subclass-specific kwargs to pd.Index
multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"])
multi_idx1 = multi_idx.copy()

assert multi_idx.equals(multi_idx1)
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
index = Index(vals)
assert isinstance(index, TimedeltaIndex)

@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
@pytest.mark.parametrize("attr", ["values", "asi8"])
@pytest.mark.parametrize("klass", [Index, DatetimeIndex])
def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
Expand Down Expand Up @@ -2255,6 +2256,7 @@ def test_index_subclass_constructor_wrong_kwargs(index_maker):
index_maker(foo="bar")


@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
def test_deprecated_fastpath():
msg = "[Uu]nexpected keyword argument"
with pytest.raises(TypeError, match=msg):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_index_groupby(self):
idx.groupby(to_groupby), {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]}
)

to_groupby = Index(
to_groupby = pd.DatetimeIndex(
[
datetime(2011, 11, 1),
datetime(2011, 12, 1),
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,9 @@ def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
else:
msg = r"Unexpected keyword arguments {'freq'}"
with pytest.raises(TypeError, match=msg):
pd.Index(data, freq="M")
with tm.assert_produces_warning(FutureWarning):
# passing keywords to pd.Index
pd.Index(data, freq="M")

def test_insert_index_complex128(self):
pytest.xfail("Test not implemented")
Expand Down
20 changes: 8 additions & 12 deletions pandas/tests/io/json/test_json_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,18 +705,14 @@ def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
"idx",
[
pd.Index(range(4)),
pd.Index(
pd.date_range(
"2020-08-30",
freq="d",
periods=4,
),
freq=None,
),
pd.Index(
pd.date_range("2020-08-30", freq="d", periods=4, tz="US/Central"),
freq=None,
),
pd.date_range(
"2020-08-30",
freq="d",
periods=4,
)._with_freq(None),
pd.date_range(
"2020-08-30", freq="d", periods=4, tz="US/Central"
)._with_freq(None),
pd.MultiIndex.from_product(
[
pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"),
Expand Down