From be747259c41039a8e70062b03be6fa0571aa7443 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 8 Dec 2017 14:45:57 -0600 Subject: [PATCH] ENH: Added public accessor registrar Adds new methods for registing custom accessors to pandas objects. This will be helpful for implementing https://github.com/pandas-dev/pandas/issues/18767 outside of pandas. Closes https://github.com/pandas-dev/pandas/issues/14781 --- LICENSES/XARRAY_LICENSE | 191 +++++++++++++++++++++++++ doc/source/developer.rst | 1 + doc/source/internals.rst | 42 ++++++ doc/source/whatsnew/v0.22.0.txt | 44 ++++++ pandas/core/accessor.py | 111 ++++++++++++++ pandas/core/api.py | 3 + pandas/errors/__init__.py | 4 + pandas/tests/api/test_api.py | 5 +- pandas/tests/test_register_accessor.py | 75 ++++++++++ 9 files changed, 475 insertions(+), 1 deletion(-) create mode 100644 LICENSES/XARRAY_LICENSE create mode 100644 pandas/tests/test_register_accessor.py diff --git a/LICENSES/XARRAY_LICENSE b/LICENSES/XARRAY_LICENSE new file mode 100644 index 00000000000000..37ec93a14fdcd0 --- /dev/null +++ b/LICENSES/XARRAY_LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/doc/source/developer.rst b/doc/source/developer.rst index 9c214020ab43dd..2385d3670c159f 100644 --- a/doc/source/developer.rst +++ b/doc/source/developer.rst @@ -140,3 +140,4 @@ As an example of fully-formed metadata: 'metadata': None} ], 'pandas_version': '0.20.0'} + diff --git a/doc/source/internals.rst b/doc/source/internals.rst index 3d96b93de4cc94..1be3d166810344 100644 --- a/doc/source/internals.rst +++ b/doc/source/internals.rst @@ -89,6 +89,46 @@ not check (or care) whether the levels themselves are sorted. Fortunately, the constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but if you compute the levels and labels yourself, please be careful. +.. _register-accessors: + +Registering Custom Accessors +---------------------------- + +Libraries can use the decorators :func:`register_dataframe_accessor`, +:func:`register_series_accessor`, and :func:`register_index_accessor`, to add +additional "namespaces" to pandas objects. All of these follow a similar +convention: you decorate a class, providing the name of attribute to add. The +class's `__init__` method gets the object being decorated. For example: + +.. ipython:: python + + @pd.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Now users can access your methods using the `geo` namespace: + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +This can be a convenient way to extend pandas objects without subclassing them. + .. _ref-subclassing-pandas: Subclassing pandas Data Structures @@ -100,6 +140,8 @@ Subclassing pandas Data Structures 2. Use *composition*. See `here `_. + 3. Extending by :ref:`registering an accessor ` + This section describes how to subclass ``pandas`` data structures to meet more specific needs. There are 2 points which need attention: 1. Override constructor properties. diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index ae6d0816abc415..f1224714634f55 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -119,6 +119,50 @@ Current Behavior s.rank(na_option='top') +Extending Pandas Objects with New Accessors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas uses accessors to group together many related methods into a namespace on +``Series`` or ``Index`` objects. For example, ``Series.str`` for string methods, +or ``Series.dt`` for datetime methods. Inspired by xarray, pandas now officially +supports registering custom accessors in library code. + + +.. code-block:: python + + import pandas as pd + + @pd.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Back in an interactive IPython session: + +.. code-block:: python + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +This provides a convenient alternative to subclassing or composition. +If you write a custom accessor, make a pull request adding it to our +:ref:`ecosystem` page. + .. _whatsnew_0220.enhancements.other: Other Enhancements diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 53ead5e8f74a33..526825fdedc3b7 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -5,7 +5,12 @@ that can be mixed into or pinned onto other pandas classes. """ +import traceback +import warnings + from pandas.core.common import AbstractMethodError +from pandas.compat import PY2 +from pandas.errors import AccessorRegistrationWarning class DirNamesMixin(object): @@ -129,3 +134,109 @@ def f(self, *args, **kwargs): # don't overwrite existing methods/properties if overwrite or not hasattr(cls, name): setattr(cls, name, f) + + +# Ported with modifications from xarray +# https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py + + +class _CachedAccessor(object): + """Custom property-like object (descriptor) for caching accessors.""" + def __init__(self, name, accessor): + self._name = name + self._accessor = accessor + + def __get__(self, obj, cls): + if obj is None: + # we're accessing the attribute of the class, i.e., Dataset.geo + return self._accessor + try: + accessor_obj = self._accessor(obj) + except AttributeError: + # TODO + # __getattr__ on data object will swallow any AttributeErrors + # raised when initializing the accessor, so we need to raise + # as something else (GH933): + msg = 'error initializing %r accessor.' % self._name + if PY2: + msg += ' Full traceback:\n' + traceback.format_exc() + raise RuntimeError(msg) + # Replace the property with the accessor object. Inspired by: + # http://www.pydanny.com/cached-property.html + # We need to use object.__setattr__ because we overwrite __setattr__ on + # AttrAccessMixin. + object.__setattr__(obj, self._name, accessor_obj) + return accessor_obj + + +def _register_accessor(name, cls): + def decorator(accessor): + if hasattr(cls, name): + warnings.warn( + 'registration of accessor {!r} under name {!r} for type ' + '{!r} is overriding a preexisting attribute with the same ' + 'name.'.format(accessor, name, cls), + AccessorRegistrationWarning, + stacklevel=2) + setattr(cls, name, _CachedAccessor(name, accessor)) + return accessor + return decorator + + +def register_dataframe_accessor(name): + """Register a custom accessor on pandas.DataFrame objects. + + Parameters + ---------- + name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + + Examples + -------- + + In your library code:: + + import pandas as pd + + @pd.register_dataframe_accessor("geo") + class GeoAccessor(object): + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFarme + lon = self._obj.latitude + lat = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + + Back in an interactive IPython session: + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + + See also + -------- + register_series_accessor + register_index_accessor + """ + from pandas import DataFrame + return _register_accessor(name, DataFrame) + + +def register_series_accessor(name): + from pandas import Series + return _register_accessor(name, Series) + + +def register_index_accessor(name): + from pandas import Index + return _register_accessor(name, Index) diff --git a/pandas/core/api.py b/pandas/core/api.py index 8a624da3629764..597ed29709d18e 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -5,6 +5,9 @@ import numpy as np from pandas.core.algorithms import factorize, unique, value_counts +from pandas.core.accessor import (register_dataframe_accessor, + register_index_accessor, + register_series_accessor) from pandas.core.dtypes.missing import isna, isnull, notna, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 42b3bdd4991a9a..b4026336c1ded4 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -65,3 +65,7 @@ class MergeError(ValueError): Error raised when problems arise during merging due to problems with input data. Subclass of `ValueError`. """ + + +class AccessorRegistrationWarning(Warning): + """Warning for attribute conflicts in accessor registration.""" \ No newline at end of file diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index e47f1919faaf57..a353f6df7cec1a 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -70,7 +70,10 @@ class TestPDApi(Base): 'period_range', 'pivot', 'pivot_table', 'qcut', 'show_versions', 'timedelta_range', 'unique', - 'value_counts', 'wide_to_long'] + 'value_counts', 'wide_to_long', + 'register_series_accessor', + 'register_dataframe_accessor', + 'register_index_accessor'] # top-level option funcs funcs_option = ['reset_option', 'describe_option', 'get_option', diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py new file mode 100644 index 00000000000000..f7564aaa8daa91 --- /dev/null +++ b/pandas/tests/test_register_accessor.py @@ -0,0 +1,75 @@ +import contextlib + +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas.errors import AccessorRegistrationWarning + + +@contextlib.contextmanager +def ensure_removed(obj, attr): + """Ensure that an attribute added to 'obj' during the test is + removed when we're done""" + try: + yield + finally: + try: + delattr(obj, attr) + except AttributeError: + pass + + +class MyAccessor(object): + + def __init__(self, obj): + self.obj = obj + self.item = 'item' + + @property + def prop(self): + return self.item + + def method(self): + return self.item + + +@pytest.mark.parametrize('obj, registrar', [ + (pd.Series, pd.register_series_accessor), + (pd.DataFrame, pd.register_dataframe_accessor), + (pd.Index, pd.register_index_accessor) +]) +def test_series_register(obj, registrar): + with ensure_removed(obj, 'mine'): + before = set(dir(obj)) + registrar('mine')(MyAccessor) + assert obj([]).mine.prop == 'item' + after = set(dir(obj)) + assert (before ^ after) == {'mine'} + + +def test_accessor_works(): + with ensure_removed(pd.Series, 'mine'): + pd.register_series_accessor('mine')(MyAccessor) + + s = pd.Series([1, 2]) + assert s.mine.obj is s + + assert s.mine.prop == 'item' + assert s.mine.method() == 'item' + + +def test_overwrite_warns(): + # Need to restore mean + mean = pd.Series.mean + try: + with tm.assert_produces_warning(AccessorRegistrationWarning) as w: + pd.register_series_accessor('mean')(MyAccessor) + s = pd.Series([1, 2]) + assert s.mean.prop == 'item' + msg = str(w[0].message) + assert 'mean' in msg + assert 'MyAccessor' in msg + assert 'Series' in msg + finally: + pd.Series.mean = mean