diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index fbd2c2b5345fc9..53f254aee2e0e8 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -143,7 +143,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - +- Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`,:issue:`21311`,:issue:`22315`,:issue:`26974`) - - diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f5b9d0194833a7..8b742f8353e41b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -252,6 +252,15 @@ def __new__( cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs ) -> "Index": + if kwargs: + warnings.warn( + "Passing keywords other than 'data', 'dtype', 'copy', 'name', " + "'tupleize_cols' is deprecated and will raise TypeError in a " + "future version. Use the specific Index subclass directly instead", + FutureWarning, + stacklevel=2, + ) + from pandas.core.indexes.range import RangeIndex name = maybe_extract_name(name, data, cls) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 220cd5363e78f4..ba4cffe0533a0a 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -147,7 +147,7 @@ def __array_wrap__(self, result, context=None): if not is_period_dtype(self.dtype) and attrs["freq"]: # no need to infer if freq is None attrs["freq"] = "infer" - return Index(result, **attrs) + return type(self)(result, **attrs) # ------------------------------------------------------------------------ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index b8b372e7666b80..e432ec6cb54a28 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1966,6 +1966,10 @@ def _asfreq_compat(index, freq): new_index: Index if isinstance(index, PeriodIndex): new_index = index.asfreq(freq=freq) - else: - new_index = Index([], dtype=index.dtype, freq=freq, name=index.name) + elif isinstance(index, DatetimeIndex): + new_index = DatetimeIndex([], dtype=index.dtype, freq=freq, name=index.name) + elif isinstance(index, TimedeltaIndex): + new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name) + else: # pragma: no cover + raise TypeError(type(index)) return new_index diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c22f6806e932ee..d5c5e8edb9efe3 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -12,6 +12,7 @@ from typing import ( TYPE_CHECKING, Any, + Callable, Dict, List, Optional, @@ -2045,15 +2046,19 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): if self.freq is not None: kwargs["freq"] = _ensure_decoded(self.freq) + factory: Union[Type[Index], Type[DatetimeIndex]] = Index + if is_datetime64_dtype(values.dtype) or is_datetime64tz_dtype(values.dtype): + factory = DatetimeIndex + # making an Index instance could throw a number of different errors try: - new_pd_index = Index(values, **kwargs) + new_pd_index = factory(values, **kwargs) except ValueError: # if the output freq is different that what we recorded, # it should be None (see also 'doc example part 2') if "freq" in kwargs: kwargs["freq"] = None - new_pd_index = Index(values, **kwargs) + new_pd_index = factory(values, **kwargs) new_pd_index = _set_tz(new_pd_index, self.tz) return new_pd_index, new_pd_index @@ -2736,8 +2741,14 @@ def _alias_to_class(self, alias): return alias return self._reverse_index_map.get(alias, Index) - def _get_index_factory(self, klass): - if klass == DatetimeIndex: + def _get_index_factory(self, attrs): + index_class = self._alias_to_class( + _ensure_decoded(getattr(attrs, "index_class", "")) + ) + + factory: Callable + + if index_class == DatetimeIndex: def f(values, freq=None, tz=None): # data are already in UTC, localize and convert if tz present @@ -2747,16 +2758,34 @@ def f(values, freq=None, tz=None): result = result.tz_localize("UTC").tz_convert(tz) return result - return f - elif klass == PeriodIndex: + factory = f + elif index_class == PeriodIndex: def f(values, freq=None, tz=None): parr = PeriodArray._simple_new(values, freq=freq) return PeriodIndex._simple_new(parr, name=None) - return f + factory = f + else: + factory = index_class + + kwargs = {} + if "freq" in attrs: + kwargs["freq"] = attrs["freq"] + if index_class is Index: + # DTI/PI would be gotten by _alias_to_class + factory = TimedeltaIndex + + if "tz" in attrs: + if isinstance(attrs["tz"], bytes): + # created by python2 + kwargs["tz"] = attrs["tz"].decode("utf-8") + else: + # created by python3 + kwargs["tz"] = attrs["tz"] + assert index_class is DatetimeIndex # just checking - return klass + return factory, kwargs def validate_read(self, columns, where): """ @@ -2928,22 +2957,8 @@ def read_index_node( name = _ensure_str(node._v_attrs.name) name = _ensure_decoded(name) - index_class = self._alias_to_class( - _ensure_decoded(getattr(node._v_attrs, "index_class", "")) - ) - factory = self._get_index_factory(index_class) - - kwargs = {} - if "freq" in node._v_attrs: - kwargs["freq"] = node._v_attrs["freq"] - - if "tz" in node._v_attrs: - if isinstance(node._v_attrs["tz"], bytes): - # created by python2 - kwargs["tz"] = node._v_attrs["tz"].decode("utf-8") - else: - # created by python3 - kwargs["tz"] = node._v_attrs["tz"] + attrs = node._v_attrs + factory, kwargs = self._get_index_factory(attrs) if kind == "date": index = factory( diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 5205ca3777fc02..10e69ddcd5f800 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -254,7 +254,8 @@ def test_grouper_creation_bug(self): ) result = s.groupby(pd.Grouper(level="three", freq="M")).sum() expected = Series( - [28], index=Index([Timestamp("2013-01-31")], freq="M", name="three") + [28], + index=pd.DatetimeIndex([Timestamp("2013-01-31")], freq="M", name="three"), ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 76fc82c6288eb1..c8d6d09577c2b1 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -271,7 +271,7 @@ def test_columns_groupby_quantile(): [9.6, 8.4, 10.6, 9.4], ], index=list("XYZ"), - columns=Index( + columns=pd.MultiIndex.from_tuples( [("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None] ), ) diff --git a/pandas/tests/indexes/base_class/test_constructors.py b/pandas/tests/indexes/base_class/test_constructors.py index 02b32c46e7d6fb..b3229b4eda0308 100644 --- a/pandas/tests/indexes/base_class/test_constructors.py +++ b/pandas/tests/indexes/base_class/test_constructors.py @@ -2,6 +2,7 @@ import pytest from pandas import Index, MultiIndex +import pandas._testing as tm class TestIndexConstructor: @@ -29,7 +30,8 @@ def test_construction_list_mixed_tuples(self, index_vals): def test_constructor_wrong_kwargs(self): # GH #19348 with pytest.raises(TypeError, match="Unexpected keyword arguments {'foo'}"): - Index([], foo="bar") + with tm.assert_produces_warning(FutureWarning): + Index([], foo="bar") @pytest.mark.xfail(reason="see GH#21311: Index doesn't enforce dtype argument") def test_constructor_cast(self): diff --git a/pandas/tests/indexes/categorical/test_constructors.py b/pandas/tests/indexes/categorical/test_constructors.py index ee3f85da227815..3bd2ac20962d2c 100644 --- a/pandas/tests/indexes/categorical/test_constructors.py +++ b/pandas/tests/indexes/categorical/test_constructors.py @@ -129,10 +129,14 @@ def test_construction_with_categorical_dtype(self): CategoricalIndex(data, categories=cats, dtype=dtype) with pytest.raises(ValueError, match=msg): - Index(data, categories=cats, dtype=dtype) + with tm.assert_produces_warning(FutureWarning): + # passing subclass-specific kwargs to pd.Index + Index(data, categories=cats, dtype=dtype) with pytest.raises(ValueError, match=msg): CategoricalIndex(data, ordered=ordered, dtype=dtype) with pytest.raises(ValueError, match=msg): - Index(data, ordered=ordered, dtype=dtype) + with tm.assert_produces_warning(FutureWarning): + # passing subclass-specific kwargs to pd.Index + Index(data, ordered=ordered, dtype=dtype) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 698da83d9e4ad1..3f30a1a076eda9 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -374,7 +374,9 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): assert result.tz is None # all NaT with tz - result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") + with tm.assert_produces_warning(FutureWarning): + # subclass-specific kwargs to pd.Index + result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") tm.assert_index_equal(result, exp, exact=True) @@ -462,16 +464,18 @@ def test_construction_dti_with_mixed_timezones(self): with pytest.raises(ValueError, match=msg): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError - Index( - [ - pd.NaT, - Timestamp("2011-01-01 10:00"), - pd.NaT, - Timestamp("2011-01-02 10:00", tz="US/Eastern"), - ], - tz="Asia/Tokyo", - name="idx", - ) + with tm.assert_produces_warning(FutureWarning): + # subclass-specific kwargs to pd.Index + Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="Asia/Tokyo", + name="idx", + ) def test_construction_base_constructor(self): arr = [Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-03")] diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 8b4cafc17a202d..6182df8429e8b1 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -36,6 +36,7 @@ class ConstructorTests: get_kwargs_from_breaks to the expected format. """ + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize( "breaks", [ @@ -80,6 +81,7 @@ def test_constructor_dtype(self, constructor, breaks, subtype): result = constructor(dtype=dtype, **result_kwargs) tm.assert_index_equal(result, expected) + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) def test_constructor_nan(self, constructor, breaks, closed): # GH 18421 @@ -93,6 +95,7 @@ def test_constructor_nan(self, constructor, breaks, closed): assert result.dtype.subtype == expected_subtype tm.assert_numpy_array_equal(np.array(result), expected_values) + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize( "breaks", [ @@ -378,6 +381,7 @@ def test_constructor_errors(self, constructor): with pytest.raises(TypeError, match=msg): constructor([0, 1]) + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize( "data, closed", [ diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index bb34760e28d963..52acb3d0797c63 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -185,10 +185,16 @@ def test_identical(idx): mi2 = mi2.set_names(["new1", "new2"]) assert mi.identical(mi2) - mi3 = Index(mi.tolist(), names=mi.names) + with tm.assert_produces_warning(FutureWarning): + # subclass-specific keywords to pd.Index + mi3 = Index(mi.tolist(), names=mi.names) + msg = r"Unexpected keyword arguments {'names'}" with pytest.raises(TypeError, match=msg): - Index(mi.tolist(), names=mi.names, tupleize_cols=False) + with tm.assert_produces_warning(FutureWarning): + # subclass-specific keywords to pd.Index + Index(mi.tolist(), names=mi.names, tupleize_cols=False) + mi4 = Index(mi.tolist(), tupleize_cols=False) assert mi.identical(mi3) assert not mi.identical(mi4) diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index 891380b35a8be8..2689e24502b5b7 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -56,7 +56,9 @@ def test_take_preserve_name(idx): def test_copy_names(): # Check that adding a "names" parameter to the copy is honored # GH14302 - multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"]) + with tm.assert_produces_warning(FutureWarning): + # subclass-specific kwargs to pd.Index + multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"]) multi_idx1 = multi_idx.copy() assert multi_idx.equals(multi_idx1) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 372a1d290bca03..e5bb78604207fc 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -335,6 +335,7 @@ def test_constructor_dtypes_to_timedelta(self, cast_index, vals): index = Index(vals) assert isinstance(index, TimedeltaIndex) + @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize("attr", ["values", "asi8"]) @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass): @@ -2255,6 +2256,7 @@ def test_index_subclass_constructor_wrong_kwargs(index_maker): index_maker(foo="bar") +@pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") def test_deprecated_fastpath(): msg = "[Uu]nexpected keyword argument" with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index ff1632e33c0fb6..e391b76ddbd151 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -57,7 +57,7 @@ def test_index_groupby(self): idx.groupby(to_groupby), {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]} ) - to_groupby = Index( + to_groupby = pd.DatetimeIndex( [ datetime(2011, 11, 1), datetime(2011, 12, 1), diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index bde7e9991bbedf..15f58006426f49 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -506,7 +506,9 @@ def test_insert_index_period(self, insert, coerced_val, coerced_dtype): else: msg = r"Unexpected keyword arguments {'freq'}" with pytest.raises(TypeError, match=msg): - pd.Index(data, freq="M") + with tm.assert_produces_warning(FutureWarning): + # passing keywords to pd.Index + pd.Index(data, freq="M") def test_insert_index_complex128(self): pytest.xfail("Test not implemented") diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index dba4b9214e50ca..06e7c96277fd6a 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -705,18 +705,14 @@ def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): "idx", [ pd.Index(range(4)), - pd.Index( - pd.date_range( - "2020-08-30", - freq="d", - periods=4, - ), - freq=None, - ), - pd.Index( - pd.date_range("2020-08-30", freq="d", periods=4, tz="US/Central"), - freq=None, - ), + pd.date_range( + "2020-08-30", + freq="d", + periods=4, + )._with_freq(None), + pd.date_range( + "2020-08-30", freq="d", periods=4, tz="US/Central" + )._with_freq(None), pd.MultiIndex.from_product( [ pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"),