Skip to content

Commit

Permalink
implement _index_data parts of #24024 (#24379)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Dec 21, 2018
1 parent 5d134ec commit 04a0eac
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 3 deletions.
13 changes: 10 additions & 3 deletions pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,10 @@ cdef class SeriesBinGrouper:
cached_typ = self.typ(vslider.buf, index=cached_ityp,
name=name)
else:
object.__setattr__(cached_ityp, '_data', islider.buf)
# See the comment in indexes/base.py about _index_data.
# We need this for EA-backed indexes that have a reference
# to a 1-d ndarray like datetime / timedelta / period.
object.__setattr__(cached_ityp, '_index_data', islider.buf)
cached_ityp._engine.clear_mapping()
object.__setattr__(
cached_typ._data._block, 'values', vslider.buf)
Expand Down Expand Up @@ -569,8 +572,11 @@ cdef class BlockSlider:
util.set_array_not_contiguous(x)

self.nblocks = len(self.blocks)
# See the comment in indexes/base.py about _index_data.
# We need this for EA-backed indexes that have a reference to a 1-d
# ndarray like datetime / timedelta / period.
self.idx_slider = Slider(
self.frame.index.values, self.dummy.index.values)
self.frame.index._index_data, self.dummy.index._index_data)

self.base_ptrs = <char**>malloc(sizeof(char*) * len(self.blocks))
for i, block in enumerate(self.blocks):
Expand All @@ -594,7 +600,8 @@ cdef class BlockSlider:

# move and set the index
self.idx_slider.move(start, end)
object.__setattr__(self.index, '_data', self.idx_slider.buf)

object.__setattr__(self.index, '_index_data', self.idx_slider.buf)
self.index._engine.clear_mapping()

cdef reset(self):
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,12 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs):

result = object.__new__(cls)
result._data = values
# _index_data is a (temporary?) fix to ensure that the direct data
# manipulation we do in `_libs/reduction.pyx` continues to work.
# We need access to the actual ndarray, since we're messing with
# data buffers and strides. We don't re-use `_ndarray_values`, since
# we actually set this value too.
result._index_data = values
result.name = name
for k, v in compat.iteritems(kwargs):
setattr(result, k, v)
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None):

result = super(DatetimeIndex, cls)._simple_new(values, freq, tz)
result.name = name
# For groupby perf. See note in indexes/base about _index_data
result._index_data = result._data
result._reset_identity()
return result

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,8 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs):
raise TypeError("PeriodIndex._simple_new only accepts PeriodArray")
result = object.__new__(cls)
result._data = values
# For groupby perf. See note in indexes/base about _index_data
result._index_data = values._data
result.name = name
result._reset_identity()
return result
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE):

result = super(TimedeltaIndex, cls)._simple_new(values, freq)
result.name = name
# For groupby perf. See note in indexes/base about _index_data
result._index_data = result._data
result._reset_identity()
return result

Expand Down

0 comments on commit 04a0eac

Please sign in to comment.