From e9d1f059f11cad0e0138123944900f2247d60acc Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 29 Jul 2017 15:38:38 -0700 Subject: [PATCH 1/8] Make Index behave more like Series De-duplicate _dir_additions and _dir_deletions code Typo fix --- pandas/core/base.py | 49 ++++++++++++++++++----------- pandas/core/generic.py | 3 +- pandas/core/indexes/base.py | 11 +++++-- pandas/core/indexes/datetimelike.py | 9 ++++++ pandas/core/series.py | 17 +++------- pandas/core/strings.py | 35 ++++++--------------- 6 files changed, 64 insertions(+), 60 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index eb785b18bd02b..605037caf50f7 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -72,7 +72,35 @@ def __repr__(self): return str(self) -class PandasObject(StringMixin): +class DirNamesMixin(object): + _accessors = frozenset([]) + + def _dir_deletions(self): + """ delete unwanted __dir__ for this object """ + return self._accessors + + def _dir_additions(self): + """ add addtional __dir__ for this object """ + rv = set() + for accessor in self._accessors: + try: + getattr(self, accessor) + rv.add(accessor) + except AttributeError: + pass + return rv + + def __dir__(self): + """ + Provide method name lookup and completion + Only provide 'public' methods + """ + rv = set(dir(type(self))) + rv = (rv - self._dir_deletions()) | self._dir_additions() + return sorted(rv) + + +class PandasObject(StringMixin, DirNamesMixin): """baseclass for various pandas objects""" @@ -91,23 +119,6 @@ def __unicode__(self): # Should be overwritten by base classes return object.__repr__(self) - def _dir_additions(self): - """ add addtional __dir__ for this object """ - return set() - - def _dir_deletions(self): - """ delete unwanted __dir__ for this object """ - return set() - - def __dir__(self): - """ - Provide method name lookup and completion - Only provide 'public' methods - """ - rv = set(dir(type(self))) - rv = (rv - self._dir_deletions()) | self._dir_additions() - return sorted(rv) - def _reset_cache(self, key=None): """ Reset cached properties. If ``key`` is passed, only clears that key. @@ -140,7 +151,7 @@ class NoNewAttributesMixin(object): Prevents additional attributes via xxx.attribute = "something" after a call to `self.__freeze()`. Mainly used to prevent the user from using - wrong attrirbutes on a accessor (`Series.cat/.str/.dt`). + wrong attributes on a accessor (`Series.cat/.str/.dt`). If you really want to add a new attribute at a later time, you need to use `object.__setattr__(self, key, value)`. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fbd26655798bd..2f794992ddeea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -192,8 +192,9 @@ def __unicode__(self): def _dir_additions(self): """ add the string-like attributes from the info_axis """ - return set([c for c in self._info_axis + additions = set([c for c in self._info_axis if isinstance(c, string_types) and isidentifier(c)]) + return PandasObject._dir_additions(self).union(additions) @property def _constructor_sliced(self): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 411428e001c81..78492748b107a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -52,7 +52,7 @@ import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY -from pandas.core.strings import StringAccessorMixin +from pandas.core import strings from pandas.core.config import get_option @@ -98,7 +98,7 @@ def _new_Index(cls, d): return cls.__new__(cls, **d) -class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): +class Index(IndexOpsMixin, PandasObject): """ Immutable ndarray implementing an ordered, sliceable set. The basic object storing axis labels for all pandas objects @@ -151,6 +151,13 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): _engine_type = libindex.ObjectEngine + _accessors = frozenset(['dt', 'str', 'cat']) + + # String Methods + str = base.AccessorProperty(strings.StringMethods, + strings.StringMethods._make_accessor) + + def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 845c71b6c41d8..93ac2ef155d51 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -125,6 +125,15 @@ def ceil(self, freq): class DatetimeIndexOpsMixin(object): """ common ops mixin to support a unified inteface datetimelike Index """ + @property + def dt(self): + """ + For a datetime-like Index object, `self.dt` returns `self` so that + datetime-like attributes can be accessed symmetrically for Index + and Series objects. + """ + return self + def equals(self, other): """ Determines if two Index objects contain the same elements. diff --git a/pandas/core/series.py b/pandas/core/series.py index 60d268c89a9d7..d3b646a96e885 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -114,8 +114,7 @@ def wrapper(self): # Series class -class Series(base.IndexOpsMixin, strings.StringAccessorMixin, - generic.NDFrame,): +class Series(base.IndexOpsMixin, generic.NDFrame): """ One-dimensional ndarray with axis labels (including time series). @@ -2934,18 +2933,10 @@ def _make_cat_accessor(self): cat = base.AccessorProperty(CategoricalAccessor, _make_cat_accessor) - def _dir_deletions(self): - return self._accessors + # String Methods + str = base.AccessorProperty(strings.StringMethods, + strings.StringMethods._make_accessor) - def _dir_additions(self): - rv = set() - for accessor in self._accessors: - try: - getattr(self, accessor) - rv.add(accessor) - except AttributeError: - pass - return rv Series._setup_axes(['index'], info_axis=0, stat_axis=0, aliases={'rows': 0}) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 30465561a911c..9008ed080e45d 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1890,18 +1890,15 @@ def rindex(self, sub, start=0, end=None): docstring=_shared_docs['ismethods'] % _shared_docs['isdecimal']) - -class StringAccessorMixin(object): - """ Mixin to add a `.str` acessor to the class.""" - # string methods - def _make_str_accessor(self): + @classmethod + def _make_accessor(cls, data): from pandas.core.index import Index - if (isinstance(self, ABCSeries) and - not ((is_categorical_dtype(self.dtype) and - is_object_dtype(self.values.categories)) or - (is_object_dtype(self.dtype)))): + if (isinstance(data, ABCSeries) and + not ((is_categorical_dtype(data.dtype) and + is_object_dtype(data.values.categories)) or + (is_object_dtype(data.dtype)))): # it's neither a string series not a categorical series with # strings inside the categories. # this really should exclude all series with any non-string values @@ -1910,30 +1907,18 @@ def _make_str_accessor(self): raise AttributeError("Can only use .str accessor with string " "values, which use np.object_ dtype in " "pandas") - elif isinstance(self, Index): + elif isinstance(data, Index): # can't use ABCIndex to exclude non-str # see scc/inferrence.pyx which can contain string values allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') - if self.inferred_type not in allowed_types: + if data.inferred_type not in allowed_types: message = ("Can only use .str accessor with string values " "(i.e. inferred_type is 'string', 'unicode' or " "'mixed')") raise AttributeError(message) - if self.nlevels > 1: + if data.nlevels > 1: message = ("Can only use .str accessor with Index, not " "MultiIndex") raise AttributeError(message) - return StringMethods(self) - - str = AccessorProperty(StringMethods, _make_str_accessor) - - def _dir_additions(self): - return set() - - def _dir_deletions(self): - try: - getattr(self, 'str') - except AttributeError: - return set(['str']) - return set() + return cls(data) From b9809583acbcfad6429fb0a1dcc42b12e8fc8649 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 29 Jul 2017 15:51:55 -0700 Subject: [PATCH 2/8] flake8 cleanups --- pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 1 - pandas/core/series.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2f794992ddeea..c583ab83fe636 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -193,7 +193,7 @@ def __unicode__(self): def _dir_additions(self): """ add the string-like attributes from the info_axis """ additions = set([c for c in self._info_axis - if isinstance(c, string_types) and isidentifier(c)]) + if isinstance(c, string_types) and isidentifier(c)]) return PandasObject._dir_additions(self).union(additions) @property diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 78492748b107a..2ac07c6835db7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -157,7 +157,6 @@ class Index(IndexOpsMixin, PandasObject): str = base.AccessorProperty(strings.StringMethods, strings.StringMethods._make_accessor) - def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): diff --git a/pandas/core/series.py b/pandas/core/series.py index d3b646a96e885..9dc8cdfc4c7ea 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2938,7 +2938,6 @@ def _make_cat_accessor(self): strings.StringMethods._make_accessor) - Series._setup_axes(['index'], info_axis=0, stat_axis=0, aliases={'rows': 0}) Series._add_numeric_operations() Series._add_series_only_operations() From f43d2478fe7e3981a0c0eedb38782e9fa80fed54 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 29 Jul 2017 17:10:00 -0700 Subject: [PATCH 3/8] Fix lint unused import --- pandas/core/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 9008ed080e45d..03c01ea0817b9 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -16,7 +16,7 @@ from pandas.core.algorithms import take_1d import pandas.compat as compat -from pandas.core.base import AccessorProperty, NoNewAttributesMixin +from pandas.core.base import NoNewAttributesMixin from pandas.util._decorators import Appender import re import pandas._libs.lib as lib From 2a6afcbb0cbc936347efaa16de766310f4a8c822 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 8 Aug 2017 21:33:29 -0700 Subject: [PATCH 4/8] Rebase, move DirNamesMixin to accessor.py --- pandas/core/accessor.py | 29 +++++++++++++++++++++++++++++ pandas/core/base.py | 26 +------------------------- pandas/core/indexes/base.py | 3 +-- pandas/core/series.py | 3 +-- pandas/core/strings.py | 16 ---------------- 5 files changed, 32 insertions(+), 45 deletions(-) create mode 100644 pandas/core/accessor.py diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py new file mode 100644 index 0000000000000..26f8147dca20e --- /dev/null +++ b/pandas/core/accessor.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +class DirNamesMixin(object): + _accessors = frozenset([]) + + def _dir_deletions(self): + """ delete unwanted __dir__ for this object """ + return self._accessors + + def _dir_additions(self): + """ add addtional __dir__ for this object """ + rv = set() + for accessor in self._accessors: + try: + getattr(self, accessor) + rv.add(accessor) + except AttributeError: + pass + return rv + + def __dir__(self): + """ + Provide method name lookup and completion + Only provide 'public' methods + """ + rv = set(dir(type(self))) + rv = (rv - self._dir_deletions()) | self._dir_additions() + return sorted(rv) diff --git a/pandas/core/base.py b/pandas/core/base.py index db6a5bea0dab3..f9ef644ec34e0 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -18,6 +18,7 @@ from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError +from pandas.core.accessor import DirNamesMixin _shared_docs = dict() _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', @@ -72,32 +73,7 @@ def __repr__(self): return str(self) -class DirNamesMixin(object): - _accessors = frozenset([]) - def _dir_deletions(self): - """ delete unwanted __dir__ for this object """ - return self._accessors - - def _dir_additions(self): - """ add addtional __dir__ for this object """ - rv = set() - for accessor in self._accessors: - try: - getattr(self, accessor) - rv.add(accessor) - except AttributeError: - pass - return rv - - def __dir__(self): - """ - Provide method name lookup and completion - Only provide 'public' methods - """ - rv = set(dir(type(self))) - rv = (rv - self._dir_deletions()) | self._dir_additions() - return sorted(rv) class PandasObject(StringMixin, DirNamesMixin): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ad8cea9214b9f..2175a015b0e09 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -154,8 +154,7 @@ class Index(IndexOpsMixin, PandasObject): _accessors = frozenset(['dt', 'str', 'cat']) # String Methods - str = base.AccessorProperty(strings.StringMethods, - strings.StringMethods._make_accessor) + str = base.AccessorProperty(strings.StringMethods) def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): diff --git a/pandas/core/series.py b/pandas/core/series.py index 2beff37dcbe5c..06504a4059a2f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2924,8 +2924,7 @@ def to_period(self, freq=None, copy=True): cat = base.AccessorProperty(CategoricalAccessor) # String Methods - str = base.AccessorProperty(strings.StringMethods, - strings.StringMethods._make_accessor) + str = base.AccessorProperty(strings.StringMethods) # ---------------------------------------------------------------------- # Add plotting methods to Series diff --git a/pandas/core/strings.py b/pandas/core/strings.py index d4f83288cda13..2f95e510bba5e 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1921,19 +1921,3 @@ def _make_accessor(cls, data): "MultiIndex") raise AttributeError(message) return cls(data) - - -class StringAccessorMixin(object): - """ Mixin to add a `.str` acessor to the class.""" - - str = AccessorProperty(StringMethods) - - def _dir_additions(self): - return set() - - def _dir_deletions(self): - try: - getattr(self, 'str') - except AttributeError: - return set(['str']) - return set() From 7aa973e0fb953b8a6ecefcde26912f1526ba2aca Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 9 Aug 2017 09:22:50 -0700 Subject: [PATCH 5/8] Flake8 whitespace fixup --- pandas/core/accessor.py | 1 + pandas/core/base.py | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 26f8147dca20e..7e480080182b5 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- + class DirNamesMixin(object): _accessors = frozenset([]) diff --git a/pandas/core/base.py b/pandas/core/base.py index f9ef644ec34e0..b15431464b166 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -73,9 +73,6 @@ def __repr__(self): return str(self) - - - class PandasObject(StringMixin, DirNamesMixin): """baseclass for various pandas objects""" From d71dbf889a8b0820f45470f63fbd429ec4eb8d2c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 16 Aug 2017 16:54:38 -0700 Subject: [PATCH 6/8] Remove shebang, add docstring per review request --- pandas/core/accessor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 7e480080182b5..9101e57c5779d 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -1,6 +1,10 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- +""" +accessor.py contains base classes for implementing accessor properties +that can be mixed into or pinned onto other pandas classes. + +""" class DirNamesMixin(object): _accessors = frozenset([]) From a21392730651367cd373529881b377d52034a902 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 19 Aug 2017 11:20:23 -0700 Subject: [PATCH 7/8] flake8 whitespace fixup --- pandas/core/accessor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 9101e57c5779d..9f8556d1e6961 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -6,6 +6,7 @@ """ + class DirNamesMixin(object): _accessors = frozenset([]) From bf66c5b1a4946bdb7ec5b9f94225ad8f8aaf8178 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 20 Aug 2017 08:38:31 -0700 Subject: [PATCH 8/8] Fixes for reviewer comments --- pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 46ee8f9f71db5..04debef2fcac0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -194,7 +194,7 @@ def _dir_additions(self): """ add the string-like attributes from the info_axis """ additions = set([c for c in self._info_axis if isinstance(c, string_types) and isidentifier(c)]) - return PandasObject._dir_additions(self).union(additions) + return super(NDFrame, self)._dir_additions().union(additions) @property def _constructor_sliced(self): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2175a015b0e09..d614b69c85afa 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -151,7 +151,7 @@ class Index(IndexOpsMixin, PandasObject): _engine_type = libindex.ObjectEngine - _accessors = frozenset(['dt', 'str', 'cat']) + _accessors = frozenset(['str']) # String Methods str = base.AccessorProperty(strings.StringMethods)