Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: deprecate fastpath keyword in Index constructors #23110

Merged
merged 4 commits into from
Oct 18, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ Deprecations
many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`)
- :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`)
- :func:`DatetimeIndex.shift` and :func:`PeriodIndex.shift` now accept ``periods`` argument instead of ``n`` for consistency with :func:`Index.shift` and :func:`Series.shift`. Using ``n`` throws a deprecation warning (:issue:`22458`, :issue:`22912`)
- The ``fastpath`` keyword of the different Index constructors is deprecated.

.. _whatsnew_0240.prior_deprecations:

Expand Down
10 changes: 7 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,13 +252,17 @@ class Index(IndexOpsMixin, PandasObject):
str = CachedAccessor("str", StringMethods)

def __new__(cls, data=None, dtype=None, copy=False, name=None,
fastpath=False, tupleize_cols=True, **kwargs):
fastpath=None, tupleize_cols=True, **kwargs):

if name is None and hasattr(data, 'name'):
name = data.name

if fastpath:
return cls._simple_new(data, name)
if fastpath is not None:
warnings.warn("The 'fastpath' keyword is deprecated, and will be "
"removed in a future version.",
FutureWarning, stacklevel=2)
if fastpath:
return cls._simple_new(data, name)

from .range import RangeIndex

Expand Down
13 changes: 9 additions & 4 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import operator
import warnings

import numpy as np
from pandas._libs import index as libindex
Expand Down Expand Up @@ -87,10 +88,14 @@ class CategoricalIndex(Index, accessor.PandasDelegate):
_attributes = ['name']

def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
copy=False, name=None, fastpath=False):

if fastpath:
return cls._simple_new(data, name=name, dtype=dtype)
copy=False, name=None, fastpath=None):

if fastpath is not None:
warnings.warn("The 'fastpath' keyword is deprecated, and will be "
"removed in a future version.",
FutureWarning, stacklevel=2)
if fastpath:
return cls._simple_new(data, name=name, dtype=dtype)

if name is None and hasattr(data, 'name'):
name = data.name
Expand Down
14 changes: 10 additions & 4 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

import numpy as np
from pandas._libs import (index as libindex,
join as libjoin)
Expand Down Expand Up @@ -35,10 +37,14 @@ class NumericIndex(Index):
_is_numeric_dtype = True

def __new__(cls, data=None, dtype=None, copy=False, name=None,
fastpath=False):

if fastpath:
return cls._simple_new(data, name=name)
fastpath=None):

if fastpath is not None:
warnings.warn("The 'fastpath' keyword is deprecated, and will be "
"removed in a future version.",
FutureWarning, stacklevel=2)
if fastpath:
return cls._simple_new(data, name=name)

# is_scalar, generators handled in coerce_to_ndarray
data = cls._coerce_to_ndarray(data)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def __contains__(self, key):

@cache_readonly
def _int64index(self):
return Int64Index(self.asi8, name=self.name, fastpath=True)
return Int64Index._simple_new(self.asi8, name=self.name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the performance implication of using _simple_new vs just Int64Index(...)? If its sufficiently small, we should be using the public constructors

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Relatively it is a big difference:

In [8]: values = np.arange(100000)

In [9]: %timeit pd.Index(values)
31 µs ± 140 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)

In [10]: %timeit pd.Index._simple_new(values)
1.62 µs ± 26.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

but if that will be significant in actual code, I don't know.

But since we are using that in many places, I leave changing _simple_new calls to the public constructor for another PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel this leads a bit further, but given that the public constructors (__init__) of Index classes are very complex (something we will not be able to change I think), I think that we need to see _simple_new as kind of a "internal public" method within the pandas code base.


@property
def values(self):
Expand Down
33 changes: 19 additions & 14 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import operator
from datetime import timedelta
from sys import getsizeof
import warnings

import numpy as np

Expand Down Expand Up @@ -68,10 +69,14 @@ class RangeIndex(Int64Index):
_engine_type = libindex.Int64Engine

def __new__(cls, start=None, stop=None, step=None,
dtype=None, copy=False, name=None, fastpath=False):
dtype=None, copy=False, name=None, fastpath=None):

if fastpath:
return cls._simple_new(start, stop, step, name=name)
if fastpath is not None:
warnings.warn("The 'fastpath' keyword is deprecated, and will be "
"removed in a future version.",
FutureWarning, stacklevel=2)
if fastpath:
return cls._simple_new(start, stop, step, name=name)

cls._validate_dtype(dtype)

Expand Down Expand Up @@ -174,7 +179,7 @@ def _data(self):

@cache_readonly
def _int64index(self):
return Int64Index(self._data, name=self.name, fastpath=True)
return Int64Index._simple_new(self._data, name=self.name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shallow_copy? ditto below

Copy link
Member Author

@jorisvandenbossche jorisvandenbossche Oct 13, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not in this case, since it is not self that is calling it, so you need to pass the self.name anyway

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One case below is shallow_copy, but the other can indeed be changed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And actually, _shallow_copy has not the ability to specify (override) the name (which is not really following how _shallow_copy works for other Index classes), so can also not use it there.

Might be a bit buggy in _shallow_copy (it is also not used at all anywhere in the RangeIndex implementation, although maybe in inherited methods ..).
But I want to keep it here just about deprecating the fastpath keyword, so let's leave that for another issue.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are u changing these constructors at all?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not changing anything. fastpath=True means exactly the _simple_new what I replaced it with.
This PR does not try to do a clean-up of usage of _simple_new by replacing it with the default constructor, it only deprecates the fastpath argument.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok


def _get_data_as_items(self):
""" return a list of tuples of start, stop, step """
Expand Down Expand Up @@ -262,8 +267,8 @@ def tolist(self):
@Appender(_index_shared_docs['_shallow_copy'])
def _shallow_copy(self, values=None, **kwargs):
if values is None:
return RangeIndex(name=self.name, fastpath=True,
**dict(self._get_data_as_items()))
return RangeIndex._simple_new(
name=self.name, **dict(self._get_data_as_items()))
else:
kwargs.setdefault('name', self.name)
return self._int64index._shallow_copy(values, **kwargs)
Expand All @@ -273,8 +278,8 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs):
self._validate_dtype(dtype)
if name is None:
name = self.name
return RangeIndex(name=name, fastpath=True,
**dict(self._get_data_as_items()))
return RangeIndex._simple_new(
name=name, **dict(self._get_data_as_items()))

def _minmax(self, meth):
no_steps = len(self) - 1
Expand Down Expand Up @@ -374,7 +379,7 @@ def intersection(self, other):
tmp_start = first._start + (second._start - first._start) * \
first._step // gcd * s
new_step = first._step * second._step // gcd
new_index = RangeIndex(tmp_start, int_high, new_step, fastpath=True)
new_index = RangeIndex._simple_new(tmp_start, int_high, new_step)

# adjust index to limiting interval
new_index._start = new_index._min_fitting_element(int_low)
Expand Down Expand Up @@ -552,7 +557,7 @@ def __getitem__(self, key):
stop = self._start + self._step * stop
step = self._step * step

return RangeIndex(start, stop, step, name=self.name, fastpath=True)
return RangeIndex._simple_new(start, stop, step, name=self.name)

# fall back to Int64Index
return super_getitem(key)
Expand All @@ -565,12 +570,12 @@ def __floordiv__(self, other):
start = self._start // other
step = self._step // other
stop = start + len(self) * step
return RangeIndex(start, stop, step, name=self.name,
fastpath=True)
return RangeIndex._simple_new(
start, stop, step, name=self.name)
if len(self) == 1:
start = self._start // other
return RangeIndex(start, start + 1, 1, name=self.name,
fastpath=True)
return RangeIndex._simple_new(
start, start + 1, 1, name=self.name)
return self._int64index // other

@classmethod
Expand Down
29 changes: 29 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2530,3 +2530,32 @@ def test_index_subclass_constructor_wrong_kwargs(index_maker):
# GH #19348
with tm.assert_raises_regex(TypeError, 'unexpected keyword argument'):
index_maker(foo='bar')


def test_deprecated_fastpath():

with tm.assert_produces_warning(FutureWarning):
idx = pd.Index(
np.array(['a', 'b'], dtype=object), name='test', fastpath=True)

expected = pd.Index(['a', 'b'], name='test')
tm.assert_index_equal(idx, expected)

with tm.assert_produces_warning(FutureWarning):
idx = pd.Int64Index(
np.array([1, 2, 3], dtype='int64'), name='test', fastpath=True)

expected = pd.Index([1, 2, 3], name='test', dtype='int64')
tm.assert_index_equal(idx, expected)

with tm.assert_produces_warning(FutureWarning):
idx = pd.RangeIndex(0, 5, 2, name='test', fastpath=True)

expected = pd.RangeIndex(0, 5, 2, name='test')
tm.assert_index_equal(idx, expected)

with tm.assert_produces_warning(FutureWarning):
idx = pd.CategoricalIndex(['a', 'b', 'c'], name='test', fastpath=True)

expected = pd.CategoricalIndex(['a', 'b', 'c'], name='test')
tm.assert_index_equal(idx, expected)