Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify Index._dir_* with Series implementation #17117

Merged
merged 9 commits into from
Aug 29, 2017
30 changes: 30 additions & 0 deletions pandas/core/accessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env python
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove the first line, we don't normally do this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just pushed a commit that does this and adds a comment in the docstring as requested below. The docstring isn't super-useful at this point because there isn't much here. After this is merged I'll rebase #17042 and accessor.py will have a coherent theme.

# -*- coding: utf-8 -*-

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment about what this module contains


class DirNamesMixin(object):
_accessors = frozenset([])

def _dir_deletions(self):
""" delete unwanted __dir__ for this object """
return self._accessors
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you put a comment why accessors are deleted? (as only the relevant ones are added back afterwards?)


def _dir_additions(self):
""" add addtional __dir__ for this object """
rv = set()
for accessor in self._accessors:
try:
getattr(self, accessor)
rv.add(accessor)
except AttributeError:
pass
return rv

def __dir__(self):
"""
Provide method name lookup and completion
Only provide 'public' methods
"""
rv = set(dir(type(self)))
rv = (rv - self._dir_deletions()) | self._dir_additions()
return sorted(rv)
22 changes: 3 additions & 19 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pandas.util._decorators import (Appender, cache_readonly,
deprecate_kwarg, Substitution)
from pandas.core.common import AbstractMethodError
from pandas.core.accessor import DirNamesMixin

_shared_docs = dict()
_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='',
Expand Down Expand Up @@ -72,7 +73,7 @@ def __repr__(self):
return str(self)


class PandasObject(StringMixin):
class PandasObject(StringMixin, DirNamesMixin):

"""baseclass for various pandas objects"""

Expand All @@ -91,23 +92,6 @@ def __unicode__(self):
# Should be overwritten by base classes
return object.__repr__(self)

def _dir_additions(self):
""" add addtional __dir__ for this object """
return set()

def _dir_deletions(self):
""" delete unwanted __dir__ for this object """
return set()

def __dir__(self):
"""
Provide method name lookup and completion
Only provide 'public' methods
"""
rv = set(dir(type(self)))
rv = (rv - self._dir_deletions()) | self._dir_additions()
return sorted(rv)

def _reset_cache(self, key=None):
"""
Reset cached properties. If ``key`` is passed, only clears that key.
Expand Down Expand Up @@ -140,7 +124,7 @@ class NoNewAttributesMixin(object):

Prevents additional attributes via xxx.attribute = "something" after a
call to `self.__freeze()`. Mainly used to prevent the user from using
wrong attrirbutes on a accessor (`Series.cat/.str/.dt`).
wrong attributes on a accessor (`Series.cat/.str/.dt`).

If you really want to add a new attribute at a later time, you need to use
`object.__setattr__(self, key, value)`.
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,9 @@ def __unicode__(self):

def _dir_additions(self):
""" add the string-like attributes from the info_axis """
return set([c for c in self._info_axis
if isinstance(c, string_types) and isidentifier(c)])
additions = set([c for c in self._info_axis
if isinstance(c, string_types) and isidentifier(c)])
return PandasObject._dir_additions(self).union(additions)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason not to use super ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really. I developed the habit working in statsmodels where inheritance chains can get pretty messy. Happy to change it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

super is the idiom. Calling specifically named methods usually indicates something odd in the inheritance chain.


@property
def _constructor_sliced(self):
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
import pandas.core.sorting as sorting
from pandas.io.formats.printing import pprint_thing
from pandas.core.ops import _comp_method_OBJECT_ARRAY
from pandas.core.strings import StringAccessorMixin
from pandas.core import strings
from pandas.core.config import get_option


Expand Down Expand Up @@ -98,7 +98,7 @@ def _new_Index(cls, d):
return cls.__new__(cls, **d)


class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
class Index(IndexOpsMixin, PandasObject):
"""
Immutable ndarray implementing an ordered, sliceable set. The basic object
storing axis labels for all pandas objects
Expand Down Expand Up @@ -151,6 +151,11 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):

_engine_type = libindex.ObjectEngine

_accessors = frozenset(['dt', 'str', 'cat'])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe leave out 'dt' and 'cat' until they are actually added as accessor? (in a next PR I suppose)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, but they are not yet really added as accessor (so no user visible change), only 'removed' from __dir__ (but not really removed since they are not there yet), correct?
In that case not that important.

I suppose we already have tests to check that 'str' is only present on the correct index types?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose we already have tests to check that 'str' is only present on the correct index types?

I'm not sure off the top, but I wrote some tests for #17204 that can be adapted pretty readily.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls remove dt/cat, leave for another PR. str is covered I believe.


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are no tests for the non string accessors in Index

# String Methods
str = base.AccessorProperty(strings.StringMethods)

def __new__(cls, data=None, dtype=None, copy=False, name=None,
fastpath=False, tupleize_cols=True, **kwargs):

Expand Down
17 changes: 3 additions & 14 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,7 @@ def wrapper(self):
# Series class


class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
generic.NDFrame,):
class Series(base.IndexOpsMixin, generic.NDFrame):
"""
One-dimensional ndarray with axis labels (including time series).

Expand Down Expand Up @@ -2924,18 +2923,8 @@ def to_period(self, freq=None, copy=True):
# Categorical methods
cat = base.AccessorProperty(CategoricalAccessor)

def _dir_deletions(self):
return self._accessors

def _dir_additions(self):
rv = set()
for accessor in self._accessors:
try:
getattr(self, accessor)
rv.add(accessor)
except AttributeError:
pass
return rv
# String Methods
str = base.AccessorProperty(strings.StringMethods)

# ----------------------------------------------------------------------
# Add plotting methods to Series
Expand Down
20 changes: 2 additions & 18 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from pandas.core.algorithms import take_1d
import pandas.compat as compat
from pandas.core.base import AccessorProperty, NoNewAttributesMixin
from pandas.core.base import NoNewAttributesMixin
from pandas.util._decorators import Appender
import re
import pandas._libs.lib as lib
Expand Down Expand Up @@ -1920,20 +1920,4 @@ def _make_accessor(cls, data):
message = ("Can only use .str accessor with Index, not "
"MultiIndex")
raise AttributeError(message)
return StringMethods(data)


class StringAccessorMixin(object):
""" Mixin to add a `.str` acessor to the class."""

str = AccessorProperty(StringMethods)

def _dir_additions(self):
return set()

def _dir_deletions(self):
try:
getattr(self, 'str')
except AttributeError:
return set(['str'])
return set()
return cls(data)