diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 5789f39266927..7a056203ed447 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -93,7 +93,7 @@ Accelerated operations ---------------------- pandas has support for accelerating certain types of binary numerical and boolean operations using -the ``numexpr`` library (starting in 0.11.0) and the ``bottleneck`` libraries. +the ``numexpr`` library and the ``bottleneck`` libraries. These libraries are especially useful when dealing with large data sets, and provide large speedups. ``numexpr`` uses smart chunking, caching, and multiple cores. ``bottleneck`` is @@ -114,6 +114,15 @@ Here is a sample (using 100 column x 100,000 row ``DataFrames``): You are highly encouraged to install both libraries. See the section :ref:`Recommended Dependencies ` for more installation info. +These are both enabled to be used by default, you can control this by setting the options: + +.. versionadded:: 0.20.0 + +.. code-block:: python + + pd.set_option('compute.use_bottleneck', False) + pd.set_option('compute.use_numexpr', False) + .. _basics.binop: Flexible binary operations diff --git a/doc/source/options.rst b/doc/source/options.rst index 1b219f640cc87..5f6bf2fbb9662 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -425,6 +425,10 @@ mode.use_inf_as_null False True means treat None, NaN, -IN INF as null (old way), False means None and NaN are null, but INF, -INF are not null (new way). +compute.use_bottleneck True Use the bottleneck library to accelerate + computation if it is installed +compute.use_numexpr True Use the numexpr library to accelerate + computation if it is installed =================================== ============ ================================== @@ -538,4 +542,4 @@ Only ``'display.max_rows'`` are serialized and published. .. ipython:: python :suppress: - pd.reset_option('display.html.table_schema') \ No newline at end of file + pd.reset_option('display.html.table_schema') diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index b6feb5cf8cedd..86a598183517c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -521,6 +521,7 @@ Other Enhancements - The ``display.show_dimensions`` option can now also be used to specify whether the length of a ``Series`` should be shown in its repr (:issue:`7117`). - ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`) +- Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here ` (:issue:`16157`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations @@ -1217,7 +1218,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul "pandas.lib", "pandas._libs.lib", "X" "pandas.tslib", "pandas._libs.tslib", "X" - "pandas.computation", "pandas.core.computation", "" + "pandas.computation", "pandas.core.computation", "X" "pandas.msgpack", "pandas.io.msgpack", "" "pandas.index", "pandas._libs.index", "" "pandas.algos", "pandas._libs.algos", "" diff --git a/pandas/computation/__init__.py b/pandas/computation/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py new file mode 100644 index 0000000000000..f46487cfa1b79 --- /dev/null +++ b/pandas/computation/expressions.py @@ -0,0 +1,11 @@ +import warnings + + +def set_use_numexpr(v=True): + warnings.warn("pandas.computation.expressions.set_use_numexpr is " + "deprecated and will be removed in a future version.\n" + "you can toggle usage of numexpr via " + "pandas.get_option('compute.use_numexpr')", + FutureWarning, stacklevel=2) + from pandas import set_option + set_option('compute.use_numexpr', v) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 4eeefb183001e..83d02af65cc85 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -10,6 +10,7 @@ import numpy as np from pandas.core.common import _values_from_object from pandas.core.computation import _NUMEXPR_INSTALLED +from pandas.core.config import get_option if _NUMEXPR_INSTALLED: import numexpr as ne @@ -156,7 +157,7 @@ def _where_numexpr(cond, a, b, raise_on_error=False): # turn myself on -set_use_numexpr(True) +set_use_numexpr(get_option('compute.use_numexpr')) def _has_bool_dtype(x): diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index f8cbdffa27bb4..70ebb170cb763 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -15,8 +15,41 @@ from pandas.core.config import (is_int, is_bool, is_text, is_instance_factory, is_one_of_factory, get_default_val, is_callable) -from pandas.io.formats.format import detect_console_encoding +from pandas.io.formats.console import detect_console_encoding +# compute + +use_bottleneck_doc = """ +: bool + Use the bottleneck library to accelerate if it is installed, + the default is True + Valid values: False,True +""" + + +def use_bottleneck_cb(key): + from pandas.core import nanops + nanops.set_use_bottleneck(cf.get_option(key)) + + +use_numexpr_doc = """ +: bool + Use the numexpr library to accelerate computation if it is installed, + the default is True + Valid values: False,True +""" + + +def use_numexpr_cb(key): + from pandas.core.computation import expressions + expressions.set_use_numexpr(cf.get_option(key)) + + +with cf.config_prefix('compute'): + cf.register_option('use_bottleneck', True, use_bottleneck_doc, + validator=is_bool, cb=use_bottleneck_cb) + cf.register_option('use_numexpr', True, use_numexpr_doc, + validator=is_bool, cb=use_numexpr_cb) # # options from the "display" namespace diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 983a6ef3e045a..06bd8f8fc51bc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -91,6 +91,7 @@ import pandas.core.nanops as nanops import pandas.core.ops as ops import pandas.io.formats.format as fmt +import pandas.io.formats.console as console from pandas.io.formats.printing import pprint_thing import pandas.plotting._core as gfx @@ -513,7 +514,7 @@ def _repr_fits_horizontal_(self, ignore_width=False): GH3541, GH3573 """ - width, height = fmt.get_console_size() + width, height = console.get_console_size() max_columns = get_option("display.max_columns") nb_columns = len(self.columns) @@ -577,7 +578,7 @@ def __unicode__(self): max_cols = get_option("display.max_columns") show_dimensions = get_option("display.show_dimensions") if get_option("display.expand_frame_repr"): - width, _ = fmt.get_console_size() + width, _ = console.get_console_size() else: width = None self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 04458d684d795..4345c74664bf5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -837,7 +837,8 @@ def _format_data(self): """ Return the formatted data as a unicode string """ - from pandas.io.formats.format import get_console_size, _get_adjustment + from pandas.io.formats.console import get_console_size + from pandas.io.formats.format import _get_adjustment display_width, _ = get_console_size() if display_width is None: display_width = get_option('display.width') or 80 diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index e9be43b184537..1d64f87b15761 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,14 +1,8 @@ import itertools import functools -import numpy as np import operator -try: - import bottleneck as bn - _USE_BOTTLENECK = True -except ImportError: # pragma: no cover - _USE_BOTTLENECK = False - +import numpy as np from pandas import compat from pandas._libs import tslib, algos, lib from pandas.core.dtypes.common import ( @@ -23,9 +17,27 @@ is_int_or_datetime_dtype, is_any_int_dtype) from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.missing import isnull, notnull - +from pandas.core.config import get_option from pandas.core.common import _values_from_object +try: + import bottleneck as bn + _BOTTLENECK_INSTALLED = True +except ImportError: # pragma: no cover + _BOTTLENECK_INSTALLED = False + +_USE_BOTTLENECK = False + + +def set_use_bottleneck(v=True): + # set/unset to use bottleneck + global _USE_BOTTLENECK + if _BOTTLENECK_INSTALLED: + _USE_BOTTLENECK = v + + +set_use_bottleneck(get_option('compute.use_bottleneck')) + class disallow(object): diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py new file mode 100644 index 0000000000000..0e46b0073a53d --- /dev/null +++ b/pandas/io/formats/console.py @@ -0,0 +1,84 @@ +""" +Internal module for console introspection +""" + +import sys +import locale +from pandas.util.terminal import get_terminal_size + +# ----------------------------------------------------------------------------- +# Global formatting options +_initial_defencoding = None + + +def detect_console_encoding(): + """ + Try to find the most capable encoding supported by the console. + slighly modified from the way IPython handles the same issue. + """ + global _initial_defencoding + + encoding = None + try: + encoding = sys.stdout.encoding or sys.stdin.encoding + except AttributeError: + pass + + # try again for something better + if not encoding or 'ascii' in encoding.lower(): + try: + encoding = locale.getpreferredencoding() + except Exception: + pass + + # when all else fails. this will usually be "ascii" + if not encoding or 'ascii' in encoding.lower(): + encoding = sys.getdefaultencoding() + + # GH3360, save the reported defencoding at import time + # MPL backends may change it. Make available for debugging. + if not _initial_defencoding: + _initial_defencoding = sys.getdefaultencoding() + + return encoding + + +def get_console_size(): + """Return console size as tuple = (width, height). + + Returns (None,None) in non-interactive session. + """ + from pandas import get_option + from pandas.core import common as com + + display_width = get_option('display.width') + # deprecated. + display_height = get_option('display.height', silent=True) + + # Consider + # interactive shell terminal, can detect term size + # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term + # size non-interactive script, should disregard term size + + # in addition + # width,height have default values, but setting to 'None' signals + # should use Auto-Detection, But only in interactive shell-terminal. + # Simple. yeah. + + if com.in_interactive_session(): + if com.in_ipython_frontend(): + # sane defaults for interactive non-shell terminal + # match default for width,height in config_init + from pandas.core.config import get_default_val + terminal_width = get_default_val('display.width') + terminal_height = get_default_val('display.height') + else: + # pure terminal + terminal_width, terminal_height = get_terminal_size() + else: + terminal_width, terminal_height = None, None + + # Note if the User sets width/Height to None (auto-detection) + # and we're in a script (non-inter), this will return (None,None) + # caller needs to deal. + return (display_width or terminal_width, display_height or terminal_height) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 1a9b3526a7503..43b0b5fbeee90 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -8,7 +8,6 @@ from distutils.version import LooseVersion # pylint: disable=W0141 -import sys from textwrap import dedent from pandas.core.dtypes.missing import isnull, notnull @@ -2290,82 +2289,6 @@ def _has_names(index): return index.name is not None -# ----------------------------------------------------------------------------- -# Global formatting options -_initial_defencoding = None - - -def detect_console_encoding(): - """ - Try to find the most capable encoding supported by the console. - slighly modified from the way IPython handles the same issue. - """ - import locale - global _initial_defencoding - - encoding = None - try: - encoding = sys.stdout.encoding or sys.stdin.encoding - except AttributeError: - pass - - # try again for something better - if not encoding or 'ascii' in encoding.lower(): - try: - encoding = locale.getpreferredencoding() - except Exception: - pass - - # when all else fails. this will usually be "ascii" - if not encoding or 'ascii' in encoding.lower(): - encoding = sys.getdefaultencoding() - - # GH3360, save the reported defencoding at import time - # MPL backends may change it. Make available for debugging. - if not _initial_defencoding: - _initial_defencoding = sys.getdefaultencoding() - - return encoding - - -def get_console_size(): - """Return console size as tuple = (width, height). - - Returns (None,None) in non-interactive session. - """ - display_width = get_option('display.width') - # deprecated. - display_height = get_option('display.height', silent=True) - - # Consider - # interactive shell terminal, can detect term size - # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term - # size non-interactive script, should disregard term size - - # in addition - # width,height have default values, but setting to 'None' signals - # should use Auto-Detection, But only in interactive shell-terminal. - # Simple. yeah. - - if com.in_interactive_session(): - if com.in_ipython_frontend(): - # sane defaults for interactive non-shell terminal - # match default for width,height in config_init - from pandas.core.config import get_default_val - terminal_width = get_default_val('display.width') - terminal_height = get_default_val('display.height') - else: - # pure terminal - terminal_width, terminal_height = get_terminal_size() - else: - terminal_width, terminal_height = None, None - - # Note if the User sets width/Height to None (auto-detection) - # and we're in a script (non-inter), this will return (None,None) - # caller needs to deal. - return (display_width or terminal_width, display_height or terminal_height) - - class EngFormatter(object): """ Formats float values according to engineering format. diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 026a36fd9f4f9..4678db4a52c5a 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -217,3 +217,17 @@ class TestTSLib(tm.TestCase): def test_deprecation_access_func(self): with catch_warnings(record=True): pd.tslib.Timestamp('20160101') + + +class TestTypes(tm.TestCase): + + def test_deprecation_access_func(self): + with tm.assert_produces_warning( + FutureWarning, check_stacklevel=False): + from pandas.types.concat import union_categoricals + c1 = pd.Categorical(list('aabc')) + c2 = pd.Categorical(list('abcd')) + union_categoricals( + [c1, c2], + sort_categories=True, + ignore_order=True) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index dda466a6937dd..92d7f29366c69 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -4,9 +4,10 @@ from functools import partial import pytest - import warnings import numpy as np + +import pandas as pd from pandas import Series, isnull, _np_version_under1p9 from pandas.core.dtypes.common import is_integer_dtype import pandas.core.nanops as nanops @@ -991,3 +992,16 @@ def test_nans_skipna(self): @property def prng(self): return np.random.RandomState(1234) + + +def test_use_bottleneck(): + + if nanops._BOTTLENECK_INSTALLED: + + pd.set_option('use_bottleneck', True) + assert pd.get_option('use_bottleneck') + + pd.set_option('use_bottleneck', False) + assert not pd.get_option('use_bottleneck') + + pd.set_option('use_bottleneck', use_bn) diff --git a/pandas/types/__init__.py b/pandas/types/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/types/concat.py b/pandas/types/concat.py new file mode 100644 index 0000000000000..477156b38d56d --- /dev/null +++ b/pandas/types/concat.py @@ -0,0 +1,11 @@ +import warnings + + +def union_categoricals(to_union, sort_categories=False, ignore_order=False): + warnings.warn("pandas.types.concat.union_categoricals is " + "deprecated and will be removed in a future version.\n" + "use pandas.api.types.union_categoricals", + FutureWarning, stacklevel=2) + from pandas.api.types import union_categoricals + return union_categoricals( + to_union, sort_categories=sort_categories, ignore_order=ignore_order) diff --git a/setup.py b/setup.py index 5647e18aa227c..6f3ddbe2ad9d0 100755 --- a/setup.py +++ b/setup.py @@ -645,6 +645,7 @@ def pxd(name): 'pandas.core.reshape', 'pandas.core.sparse', 'pandas.core.tools', + 'pandas.computation', 'pandas.errors', 'pandas.io', 'pandas.io.json', @@ -654,6 +655,7 @@ def pxd(name): 'pandas._libs', 'pandas.plotting', 'pandas.stats', + 'pandas.types', 'pandas.util', 'pandas.tests', 'pandas.tests.api',